1
2
3
4
5
6
7
8
9#include <linux/slab.h>
10#include <linux/backing-dev.h>
11#include <linux/mm.h>
12#include <linux/shm.h>
13#include <linux/mman.h>
14#include <linux/pagemap.h>
15#include <linux/swap.h>
16#include <linux/syscalls.h>
17#include <linux/capability.h>
18#include <linux/init.h>
19#include <linux/file.h>
20#include <linux/fs.h>
21#include <linux/personality.h>
22#include <linux/security.h>
23#include <linux/hugetlb.h>
24#include <linux/profile.h>
25#include <linux/export.h>
26#include <linux/mount.h>
27#include <linux/mempolicy.h>
28#include <linux/rmap.h>
29#include <linux/mmu_notifier.h>
30#include <linux/perf_event.h>
31#include <linux/audit.h>
32#include <linux/khugepaged.h>
33#include <linux/uprobes.h>
34#include <linux/rbtree_augmented.h>
35
36#include <asm/uaccess.h>
37#include <asm/cacheflush.h>
38#include <asm/tlb.h>
39#include <asm/mmu_context.h>
40
41#include "internal.h"
42
43#ifndef arch_mmap_check
44#define arch_mmap_check(addr, len, flags) (0)
45#endif
46
47#ifndef arch_rebalance_pgtables
48#define arch_rebalance_pgtables(addr, len) (addr)
49#endif
50
51static void unmap_region(struct mm_struct *mm,
52 struct vm_area_struct *vma, struct vm_area_struct *prev,
53 unsigned long start, unsigned long end);
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70pgprot_t protection_map[16] = {
71 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
72 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
73};
74
75pgprot_t vm_get_page_prot(unsigned long vm_flags)
76{
77 return __pgprot(pgprot_val(protection_map[vm_flags &
78 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
79 pgprot_val(arch_vm_get_page_prot(vm_flags)));
80}
81EXPORT_SYMBOL(vm_get_page_prot);
82
83int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
84int sysctl_overcommit_ratio __read_mostly = 50;
85int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
86
87
88
89
90struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
91
92
93
94
95
96
97
98
99
100unsigned long vm_memory_committed(void)
101{
102 return percpu_counter_read_positive(&vm_committed_as);
103}
104EXPORT_SYMBOL_GPL(vm_memory_committed);
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
123{
124 unsigned long free, allowed;
125
126 vm_acct_memory(pages);
127
128
129
130
131 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
132 return 0;
133
134 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
135 free = global_page_state(NR_FREE_PAGES);
136 free += global_page_state(NR_FILE_PAGES);
137
138
139
140
141
142
143
144 free -= global_page_state(NR_SHMEM);
145
146 free += nr_swap_pages;
147
148
149
150
151
152
153
154 free += global_page_state(NR_SLAB_RECLAIMABLE);
155
156
157
158
159 if (free <= totalreserve_pages)
160 goto error;
161 else
162 free -= totalreserve_pages;
163
164
165
166
167 if (!cap_sys_admin)
168 free -= free / 32;
169
170 if (free > pages)
171 return 0;
172
173 goto error;
174 }
175
176 allowed = (totalram_pages - hugetlb_total_pages())
177 * sysctl_overcommit_ratio / 100;
178
179
180
181 if (!cap_sys_admin)
182 allowed -= allowed / 32;
183 allowed += total_swap_pages;
184
185
186
187 if (mm)
188 allowed -= mm->total_vm / 32;
189
190 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
191 return 0;
192error:
193 vm_unacct_memory(pages);
194
195 return -ENOMEM;
196}
197
198
199
200
201static void __remove_shared_vm_struct(struct vm_area_struct *vma,
202 struct file *file, struct address_space *mapping)
203{
204 if (vma->vm_flags & VM_DENYWRITE)
205 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
206 if (vma->vm_flags & VM_SHARED)
207 mapping->i_mmap_writable--;
208
209 flush_dcache_mmap_lock(mapping);
210 if (unlikely(vma->vm_flags & VM_NONLINEAR))
211 list_del_init(&vma->shared.nonlinear);
212 else
213 vma_interval_tree_remove(vma, &mapping->i_mmap);
214 flush_dcache_mmap_unlock(mapping);
215}
216
217
218
219
220
221void unlink_file_vma(struct vm_area_struct *vma)
222{
223 struct file *file = vma->vm_file;
224
225 if (file) {
226 struct address_space *mapping = file->f_mapping;
227 mutex_lock(&mapping->i_mmap_mutex);
228 __remove_shared_vm_struct(vma, file, mapping);
229 mutex_unlock(&mapping->i_mmap_mutex);
230 }
231}
232
233
234
235
236static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
237{
238 struct vm_area_struct *next = vma->vm_next;
239
240 might_sleep();
241 if (vma->vm_ops && vma->vm_ops->close)
242 vma->vm_ops->close(vma);
243 if (vma->vm_file)
244 fput(vma->vm_file);
245 mpol_put(vma_policy(vma));
246 kmem_cache_free(vm_area_cachep, vma);
247 return next;
248}
249
250static unsigned long do_brk(unsigned long addr, unsigned long len);
251
252SYSCALL_DEFINE1(brk, unsigned long, brk)
253{
254 unsigned long rlim, retval;
255 unsigned long newbrk, oldbrk;
256 struct mm_struct *mm = current->mm;
257 unsigned long min_brk;
258
259 down_write(&mm->mmap_sem);
260
261#ifdef CONFIG_COMPAT_BRK
262
263
264
265
266
267 if (current->brk_randomized)
268 min_brk = mm->start_brk;
269 else
270 min_brk = mm->end_data;
271#else
272 min_brk = mm->start_brk;
273#endif
274 if (brk < min_brk)
275 goto out;
276
277
278
279
280
281
282
283 rlim = rlimit(RLIMIT_DATA);
284 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
285 (mm->end_data - mm->start_data) > rlim)
286 goto out;
287
288 newbrk = PAGE_ALIGN(brk);
289 oldbrk = PAGE_ALIGN(mm->brk);
290 if (oldbrk == newbrk)
291 goto set_brk;
292
293
294 if (brk <= mm->brk) {
295 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
296 goto set_brk;
297 goto out;
298 }
299
300
301 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
302 goto out;
303
304
305 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
306 goto out;
307set_brk:
308 mm->brk = brk;
309out:
310 retval = mm->brk;
311 up_write(&mm->mmap_sem);
312 return retval;
313}
314
315static long vma_compute_subtree_gap(struct vm_area_struct *vma)
316{
317 unsigned long max, subtree_gap;
318 max = vma->vm_start;
319 if (vma->vm_prev)
320 max -= vma->vm_prev->vm_end;
321 if (vma->vm_rb.rb_left) {
322 subtree_gap = rb_entry(vma->vm_rb.rb_left,
323 struct vm_area_struct, vm_rb)->rb_subtree_gap;
324 if (subtree_gap > max)
325 max = subtree_gap;
326 }
327 if (vma->vm_rb.rb_right) {
328 subtree_gap = rb_entry(vma->vm_rb.rb_right,
329 struct vm_area_struct, vm_rb)->rb_subtree_gap;
330 if (subtree_gap > max)
331 max = subtree_gap;
332 }
333 return max;
334}
335
336#ifdef CONFIG_DEBUG_VM_RB
337static int browse_rb(struct rb_root *root)
338{
339 int i = 0, j, bug = 0;
340 struct rb_node *nd, *pn = NULL;
341 unsigned long prev = 0, pend = 0;
342
343 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
344 struct vm_area_struct *vma;
345 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
346 if (vma->vm_start < prev) {
347 printk("vm_start %lx prev %lx\n", vma->vm_start, prev);
348 bug = 1;
349 }
350 if (vma->vm_start < pend) {
351 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
352 bug = 1;
353 }
354 if (vma->vm_start > vma->vm_end) {
355 printk("vm_end %lx < vm_start %lx\n",
356 vma->vm_end, vma->vm_start);
357 bug = 1;
358 }
359 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
360 printk("free gap %lx, correct %lx\n",
361 vma->rb_subtree_gap,
362 vma_compute_subtree_gap(vma));
363 bug = 1;
364 }
365 i++;
366 pn = nd;
367 prev = vma->vm_start;
368 pend = vma->vm_end;
369 }
370 j = 0;
371 for (nd = pn; nd; nd = rb_prev(nd))
372 j++;
373 if (i != j) {
374 printk("backwards %d, forwards %d\n", j, i);
375 bug = 1;
376 }
377 return bug ? -1 : i;
378}
379
380static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
381{
382 struct rb_node *nd;
383
384 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
385 struct vm_area_struct *vma;
386 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
387 BUG_ON(vma != ignore &&
388 vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
389 }
390}
391
392void validate_mm(struct mm_struct *mm)
393{
394 int bug = 0;
395 int i = 0;
396 unsigned long highest_address = 0;
397 struct vm_area_struct *vma = mm->mmap;
398 while (vma) {
399 struct anon_vma_chain *avc;
400 vma_lock_anon_vma(vma);
401 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
402 anon_vma_interval_tree_verify(avc);
403 vma_unlock_anon_vma(vma);
404 highest_address = vma->vm_end;
405 vma = vma->vm_next;
406 i++;
407 }
408 if (i != mm->map_count) {
409 printk("map_count %d vm_next %d\n", mm->map_count, i);
410 bug = 1;
411 }
412 if (highest_address != mm->highest_vm_end) {
413 printk("mm->highest_vm_end %lx, found %lx\n",
414 mm->highest_vm_end, highest_address);
415 bug = 1;
416 }
417 i = browse_rb(&mm->mm_rb);
418 if (i != mm->map_count) {
419 printk("map_count %d rb %d\n", mm->map_count, i);
420 bug = 1;
421 }
422 BUG_ON(bug);
423}
424#else
425#define validate_mm_rb(root, ignore) do { } while (0)
426#define validate_mm(mm) do { } while (0)
427#endif
428
429RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
430 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
431
432
433
434
435
436
437static void vma_gap_update(struct vm_area_struct *vma)
438{
439
440
441
442
443 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
444}
445
446static inline void vma_rb_insert(struct vm_area_struct *vma,
447 struct rb_root *root)
448{
449
450 validate_mm_rb(root, NULL);
451
452 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
453}
454
455static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
456{
457
458
459
460
461 validate_mm_rb(root, vma);
462
463
464
465
466
467
468 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
469}
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485static inline void
486anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
487{
488 struct anon_vma_chain *avc;
489
490 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
491 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
492}
493
494static inline void
495anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
496{
497 struct anon_vma_chain *avc;
498
499 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
500 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
501}
502
503static int find_vma_links(struct mm_struct *mm, unsigned long addr,
504 unsigned long end, struct vm_area_struct **pprev,
505 struct rb_node ***rb_link, struct rb_node **rb_parent)
506{
507 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
508
509 __rb_link = &mm->mm_rb.rb_node;
510 rb_prev = __rb_parent = NULL;
511
512 while (*__rb_link) {
513 struct vm_area_struct *vma_tmp;
514
515 __rb_parent = *__rb_link;
516 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
517
518 if (vma_tmp->vm_end > addr) {
519
520 if (vma_tmp->vm_start < end)
521 return -ENOMEM;
522 __rb_link = &__rb_parent->rb_left;
523 } else {
524 rb_prev = __rb_parent;
525 __rb_link = &__rb_parent->rb_right;
526 }
527 }
528
529 *pprev = NULL;
530 if (rb_prev)
531 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
532 *rb_link = __rb_link;
533 *rb_parent = __rb_parent;
534 return 0;
535}
536
537void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
538 struct rb_node **rb_link, struct rb_node *rb_parent)
539{
540
541 if (vma->vm_next)
542 vma_gap_update(vma->vm_next);
543 else
544 mm->highest_vm_end = vma->vm_end;
545
546
547
548
549
550
551
552
553
554
555 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
556 vma->rb_subtree_gap = 0;
557 vma_gap_update(vma);
558 vma_rb_insert(vma, &mm->mm_rb);
559}
560
561static void __vma_link_file(struct vm_area_struct *vma)
562{
563 struct file *file;
564
565 file = vma->vm_file;
566 if (file) {
567 struct address_space *mapping = file->f_mapping;
568
569 if (vma->vm_flags & VM_DENYWRITE)
570 atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
571 if (vma->vm_flags & VM_SHARED)
572 mapping->i_mmap_writable++;
573
574 flush_dcache_mmap_lock(mapping);
575 if (unlikely(vma->vm_flags & VM_NONLINEAR))
576 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
577 else
578 vma_interval_tree_insert(vma, &mapping->i_mmap);
579 flush_dcache_mmap_unlock(mapping);
580 }
581}
582
583static void
584__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
585 struct vm_area_struct *prev, struct rb_node **rb_link,
586 struct rb_node *rb_parent)
587{
588 __vma_link_list(mm, vma, prev, rb_parent);
589 __vma_link_rb(mm, vma, rb_link, rb_parent);
590}
591
592static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
593 struct vm_area_struct *prev, struct rb_node **rb_link,
594 struct rb_node *rb_parent)
595{
596 struct address_space *mapping = NULL;
597
598 if (vma->vm_file)
599 mapping = vma->vm_file->f_mapping;
600
601 if (mapping)
602 mutex_lock(&mapping->i_mmap_mutex);
603
604 __vma_link(mm, vma, prev, rb_link, rb_parent);
605 __vma_link_file(vma);
606
607 if (mapping)
608 mutex_unlock(&mapping->i_mmap_mutex);
609
610 mm->map_count++;
611 validate_mm(mm);
612}
613
614
615
616
617
618static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
619{
620 struct vm_area_struct *prev;
621 struct rb_node **rb_link, *rb_parent;
622
623 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
624 &prev, &rb_link, &rb_parent))
625 BUG();
626 __vma_link(mm, vma, prev, rb_link, rb_parent);
627 mm->map_count++;
628}
629
630static inline void
631__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
632 struct vm_area_struct *prev)
633{
634 struct vm_area_struct *next;
635
636 vma_rb_erase(vma, &mm->mm_rb);
637 prev->vm_next = next = vma->vm_next;
638 if (next)
639 next->vm_prev = prev;
640 if (mm->mmap_cache == vma)
641 mm->mmap_cache = prev;
642}
643
644
645
646
647
648
649
650
651int vma_adjust(struct vm_area_struct *vma, unsigned long start,
652 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
653{
654 struct mm_struct *mm = vma->vm_mm;
655 struct vm_area_struct *next = vma->vm_next;
656 struct vm_area_struct *importer = NULL;
657 struct address_space *mapping = NULL;
658 struct rb_root *root = NULL;
659 struct anon_vma *anon_vma = NULL;
660 struct file *file = vma->vm_file;
661 bool start_changed = false, end_changed = false;
662 long adjust_next = 0;
663 int remove_next = 0;
664
665 if (next && !insert) {
666 struct vm_area_struct *exporter = NULL;
667
668 if (end >= next->vm_end) {
669
670
671
672
673again: remove_next = 1 + (end > next->vm_end);
674 end = next->vm_end;
675 exporter = next;
676 importer = vma;
677 } else if (end > next->vm_start) {
678
679
680
681
682 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
683 exporter = next;
684 importer = vma;
685 } else if (end < vma->vm_end) {
686
687
688
689
690
691 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
692 exporter = vma;
693 importer = next;
694 }
695
696
697
698
699
700
701 if (exporter && exporter->anon_vma && !importer->anon_vma) {
702 if (anon_vma_clone(importer, exporter))
703 return -ENOMEM;
704 importer->anon_vma = exporter->anon_vma;
705 }
706 }
707
708 if (file) {
709 mapping = file->f_mapping;
710 if (!(vma->vm_flags & VM_NONLINEAR)) {
711 root = &mapping->i_mmap;
712 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
713
714 if (adjust_next)
715 uprobe_munmap(next, next->vm_start,
716 next->vm_end);
717 }
718
719 mutex_lock(&mapping->i_mmap_mutex);
720 if (insert) {
721
722
723
724
725
726
727 __vma_link_file(insert);
728 }
729 }
730
731 vma_adjust_trans_huge(vma, start, end, adjust_next);
732
733 anon_vma = vma->anon_vma;
734 if (!anon_vma && adjust_next)
735 anon_vma = next->anon_vma;
736 if (anon_vma) {
737 VM_BUG_ON(adjust_next && next->anon_vma &&
738 anon_vma != next->anon_vma);
739 anon_vma_lock_write(anon_vma);
740 anon_vma_interval_tree_pre_update_vma(vma);
741 if (adjust_next)
742 anon_vma_interval_tree_pre_update_vma(next);
743 }
744
745 if (root) {
746 flush_dcache_mmap_lock(mapping);
747 vma_interval_tree_remove(vma, root);
748 if (adjust_next)
749 vma_interval_tree_remove(next, root);
750 }
751
752 if (start != vma->vm_start) {
753 vma->vm_start = start;
754 start_changed = true;
755 }
756 if (end != vma->vm_end) {
757 vma->vm_end = end;
758 end_changed = true;
759 }
760 vma->vm_pgoff = pgoff;
761 if (adjust_next) {
762 next->vm_start += adjust_next << PAGE_SHIFT;
763 next->vm_pgoff += adjust_next;
764 }
765
766 if (root) {
767 if (adjust_next)
768 vma_interval_tree_insert(next, root);
769 vma_interval_tree_insert(vma, root);
770 flush_dcache_mmap_unlock(mapping);
771 }
772
773 if (remove_next) {
774
775
776
777
778 __vma_unlink(mm, next, vma);
779 if (file)
780 __remove_shared_vm_struct(next, file, mapping);
781 } else if (insert) {
782
783
784
785
786
787 __insert_vm_struct(mm, insert);
788 } else {
789 if (start_changed)
790 vma_gap_update(vma);
791 if (end_changed) {
792 if (!next)
793 mm->highest_vm_end = end;
794 else if (!adjust_next)
795 vma_gap_update(next);
796 }
797 }
798
799 if (anon_vma) {
800 anon_vma_interval_tree_post_update_vma(vma);
801 if (adjust_next)
802 anon_vma_interval_tree_post_update_vma(next);
803 anon_vma_unlock(anon_vma);
804 }
805 if (mapping)
806 mutex_unlock(&mapping->i_mmap_mutex);
807
808 if (root) {
809 uprobe_mmap(vma);
810
811 if (adjust_next)
812 uprobe_mmap(next);
813 }
814
815 if (remove_next) {
816 if (file) {
817 uprobe_munmap(next, next->vm_start, next->vm_end);
818 fput(file);
819 }
820 if (next->anon_vma)
821 anon_vma_merge(vma, next);
822 mm->map_count--;
823 mpol_put(vma_policy(next));
824 kmem_cache_free(vm_area_cachep, next);
825
826
827
828
829
830 next = vma->vm_next;
831 if (remove_next == 2)
832 goto again;
833 else if (next)
834 vma_gap_update(next);
835 else
836 mm->highest_vm_end = end;
837 }
838 if (insert && file)
839 uprobe_mmap(insert);
840
841 validate_mm(mm);
842
843 return 0;
844}
845
846
847
848
849
850static inline int is_mergeable_vma(struct vm_area_struct *vma,
851 struct file *file, unsigned long vm_flags)
852{
853 if (vma->vm_flags ^ vm_flags)
854 return 0;
855 if (vma->vm_file != file)
856 return 0;
857 if (vma->vm_ops && vma->vm_ops->close)
858 return 0;
859 return 1;
860}
861
862static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
863 struct anon_vma *anon_vma2,
864 struct vm_area_struct *vma)
865{
866
867
868
869
870 if ((!anon_vma1 || !anon_vma2) && (!vma ||
871 list_is_singular(&vma->anon_vma_chain)))
872 return 1;
873 return anon_vma1 == anon_vma2;
874}
875
876
877
878
879
880
881
882
883
884
885
886
887static int
888can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
889 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
890{
891 if (is_mergeable_vma(vma, file, vm_flags) &&
892 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
893 if (vma->vm_pgoff == vm_pgoff)
894 return 1;
895 }
896 return 0;
897}
898
899
900
901
902
903
904
905
906static int
907can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
908 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
909{
910 if (is_mergeable_vma(vma, file, vm_flags) &&
911 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
912 pgoff_t vm_pglen;
913 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
914 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
915 return 1;
916 }
917 return 0;
918}
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949struct vm_area_struct *vma_merge(struct mm_struct *mm,
950 struct vm_area_struct *prev, unsigned long addr,
951 unsigned long end, unsigned long vm_flags,
952 struct anon_vma *anon_vma, struct file *file,
953 pgoff_t pgoff, struct mempolicy *policy)
954{
955 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
956 struct vm_area_struct *area, *next;
957 int err;
958
959
960
961
962
963 if (vm_flags & VM_SPECIAL)
964 return NULL;
965
966 if (prev)
967 next = prev->vm_next;
968 else
969 next = mm->mmap;
970 area = next;
971 if (next && next->vm_end == end)
972 next = next->vm_next;
973
974
975
976
977 if (prev && prev->vm_end == addr &&
978 mpol_equal(vma_policy(prev), policy) &&
979 can_vma_merge_after(prev, vm_flags,
980 anon_vma, file, pgoff)) {
981
982
983
984 if (next && end == next->vm_start &&
985 mpol_equal(policy, vma_policy(next)) &&
986 can_vma_merge_before(next, vm_flags,
987 anon_vma, file, pgoff+pglen) &&
988 is_mergeable_anon_vma(prev->anon_vma,
989 next->anon_vma, NULL)) {
990
991 err = vma_adjust(prev, prev->vm_start,
992 next->vm_end, prev->vm_pgoff, NULL);
993 } else
994 err = vma_adjust(prev, prev->vm_start,
995 end, prev->vm_pgoff, NULL);
996 if (err)
997 return NULL;
998 khugepaged_enter_vma_merge(prev);
999 return prev;
1000 }
1001
1002
1003
1004
1005 if (next && end == next->vm_start &&
1006 mpol_equal(policy, vma_policy(next)) &&
1007 can_vma_merge_before(next, vm_flags,
1008 anon_vma, file, pgoff+pglen)) {
1009 if (prev && addr < prev->vm_end)
1010 err = vma_adjust(prev, prev->vm_start,
1011 addr, prev->vm_pgoff, NULL);
1012 else
1013 err = vma_adjust(area, addr, next->vm_end,
1014 next->vm_pgoff - pglen, NULL);
1015 if (err)
1016 return NULL;
1017 khugepaged_enter_vma_merge(area);
1018 return area;
1019 }
1020
1021 return NULL;
1022}
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1038{
1039 return a->vm_end == b->vm_start &&
1040 mpol_equal(vma_policy(a), vma_policy(b)) &&
1041 a->vm_file == b->vm_file &&
1042 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
1043 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1044}
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1069{
1070 if (anon_vma_compatible(a, b)) {
1071 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
1072
1073 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1074 return anon_vma;
1075 }
1076 return NULL;
1077}
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1088{
1089 struct anon_vma *anon_vma;
1090 struct vm_area_struct *near;
1091
1092 near = vma->vm_next;
1093 if (!near)
1094 goto try_prev;
1095
1096 anon_vma = reusable_anon_vma(near, vma, near);
1097 if (anon_vma)
1098 return anon_vma;
1099try_prev:
1100 near = vma->vm_prev;
1101 if (!near)
1102 goto none;
1103
1104 anon_vma = reusable_anon_vma(near, near, vma);
1105 if (anon_vma)
1106 return anon_vma;
1107none:
1108
1109
1110
1111
1112
1113
1114
1115
1116 return NULL;
1117}
1118
1119#ifdef CONFIG_PROC_FS
1120void vm_stat_account(struct mm_struct *mm, unsigned long flags,
1121 struct file *file, long pages)
1122{
1123 const unsigned long stack_flags
1124 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
1125
1126 mm->total_vm += pages;
1127
1128 if (file) {
1129 mm->shared_vm += pages;
1130 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
1131 mm->exec_vm += pages;
1132 } else if (flags & stack_flags)
1133 mm->stack_vm += pages;
1134}
1135#endif
1136
1137
1138
1139
1140
1141static inline unsigned long round_hint_to_min(unsigned long hint)
1142{
1143 hint &= PAGE_MASK;
1144 if (((void *)hint != NULL) &&
1145 (hint < mmap_min_addr))
1146 return PAGE_ALIGN(mmap_min_addr);
1147 return hint;
1148}
1149
1150
1151
1152
1153
1154unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1155 unsigned long len, unsigned long prot,
1156 unsigned long flags, unsigned long pgoff)
1157{
1158 struct mm_struct * mm = current->mm;
1159 struct inode *inode;
1160 vm_flags_t vm_flags;
1161
1162
1163
1164
1165
1166
1167
1168 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1169 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
1170 prot |= PROT_EXEC;
1171
1172 if (!len)
1173 return -EINVAL;
1174
1175 if (!(flags & MAP_FIXED))
1176 addr = round_hint_to_min(addr);
1177
1178
1179 len = PAGE_ALIGN(len);
1180 if (!len)
1181 return -ENOMEM;
1182
1183
1184 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1185 return -EOVERFLOW;
1186
1187
1188 if (mm->map_count > sysctl_max_map_count)
1189 return -ENOMEM;
1190
1191
1192
1193
1194 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1195 if (addr & ~PAGE_MASK)
1196 return addr;
1197
1198
1199
1200
1201
1202 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
1203 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1204
1205 if (flags & MAP_LOCKED)
1206 if (!can_do_mlock())
1207 return -EPERM;
1208
1209
1210 if (vm_flags & VM_LOCKED) {
1211 unsigned long locked, lock_limit;
1212 locked = len >> PAGE_SHIFT;
1213 locked += mm->locked_vm;
1214 lock_limit = rlimit(RLIMIT_MEMLOCK);
1215 lock_limit >>= PAGE_SHIFT;
1216 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1217 return -EAGAIN;
1218 }
1219
1220 inode = file ? file->f_path.dentry->d_inode : NULL;
1221
1222 if (file) {
1223 switch (flags & MAP_TYPE) {
1224 case MAP_SHARED:
1225 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1226 return -EACCES;
1227
1228
1229
1230
1231
1232 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1233 return -EACCES;
1234
1235
1236
1237
1238 if (locks_verify_locked(inode))
1239 return -EAGAIN;
1240
1241 vm_flags |= VM_SHARED | VM_MAYSHARE;
1242 if (!(file->f_mode & FMODE_WRITE))
1243 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1244
1245
1246 case MAP_PRIVATE:
1247 if (!(file->f_mode & FMODE_READ))
1248 return -EACCES;
1249 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1250 if (vm_flags & VM_EXEC)
1251 return -EPERM;
1252 vm_flags &= ~VM_MAYEXEC;
1253 }
1254
1255 if (!file->f_op || !file->f_op->mmap)
1256 return -ENODEV;
1257 break;
1258
1259 default:
1260 return -EINVAL;
1261 }
1262 } else {
1263 switch (flags & MAP_TYPE) {
1264 case MAP_SHARED:
1265
1266
1267
1268 pgoff = 0;
1269 vm_flags |= VM_SHARED | VM_MAYSHARE;
1270 break;
1271 case MAP_PRIVATE:
1272
1273
1274
1275 pgoff = addr >> PAGE_SHIFT;
1276 break;
1277 default:
1278 return -EINVAL;
1279 }
1280 }
1281
1282 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1283}
1284
1285SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1286 unsigned long, prot, unsigned long, flags,
1287 unsigned long, fd, unsigned long, pgoff)
1288{
1289 struct file *file = NULL;
1290 unsigned long retval = -EBADF;
1291
1292 if (!(flags & MAP_ANONYMOUS)) {
1293 audit_mmap_fd(fd, flags);
1294 if (unlikely(flags & MAP_HUGETLB))
1295 return -EINVAL;
1296 file = fget(fd);
1297 if (!file)
1298 goto out;
1299 } else if (flags & MAP_HUGETLB) {
1300 struct user_struct *user = NULL;
1301
1302
1303
1304
1305
1306
1307 file = hugetlb_file_setup(HUGETLB_ANON_FILE, addr, len,
1308 VM_NORESERVE,
1309 &user, HUGETLB_ANONHUGE_INODE,
1310 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1311 if (IS_ERR(file))
1312 return PTR_ERR(file);
1313 }
1314
1315 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1316
1317 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1318 if (file)
1319 fput(file);
1320out:
1321 return retval;
1322}
1323
1324#ifdef __ARCH_WANT_SYS_OLD_MMAP
1325struct mmap_arg_struct {
1326 unsigned long addr;
1327 unsigned long len;
1328 unsigned long prot;
1329 unsigned long flags;
1330 unsigned long fd;
1331 unsigned long offset;
1332};
1333
1334SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1335{
1336 struct mmap_arg_struct a;
1337
1338 if (copy_from_user(&a, arg, sizeof(a)))
1339 return -EFAULT;
1340 if (a.offset & ~PAGE_MASK)
1341 return -EINVAL;
1342
1343 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1344 a.offset >> PAGE_SHIFT);
1345}
1346#endif
1347
1348
1349
1350
1351
1352
1353
1354int vma_wants_writenotify(struct vm_area_struct *vma)
1355{
1356 vm_flags_t vm_flags = vma->vm_flags;
1357
1358
1359 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1360 return 0;
1361
1362
1363 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1364 return 1;
1365
1366
1367 if (pgprot_val(vma->vm_page_prot) !=
1368 pgprot_val(vm_get_page_prot(vm_flags)))
1369 return 0;
1370
1371
1372 if (vm_flags & VM_PFNMAP)
1373 return 0;
1374
1375
1376 return vma->vm_file && vma->vm_file->f_mapping &&
1377 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1378}
1379
1380
1381
1382
1383
1384static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1385{
1386
1387
1388
1389
1390 if (file && is_file_hugepages(file))
1391 return 0;
1392
1393 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1394}
1395
1396unsigned long mmap_region(struct file *file, unsigned long addr,
1397 unsigned long len, unsigned long flags,
1398 vm_flags_t vm_flags, unsigned long pgoff)
1399{
1400 struct mm_struct *mm = current->mm;
1401 struct vm_area_struct *vma, *prev;
1402 int correct_wcount = 0;
1403 int error;
1404 struct rb_node **rb_link, *rb_parent;
1405 unsigned long charged = 0;
1406 struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
1407
1408
1409 error = -ENOMEM;
1410munmap_back:
1411 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
1412 if (do_munmap(mm, addr, len))
1413 return -ENOMEM;
1414 goto munmap_back;
1415 }
1416
1417
1418 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1419 return -ENOMEM;
1420
1421
1422
1423
1424
1425 if ((flags & MAP_NORESERVE)) {
1426
1427 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1428 vm_flags |= VM_NORESERVE;
1429
1430
1431 if (file && is_file_hugepages(file))
1432 vm_flags |= VM_NORESERVE;
1433 }
1434
1435
1436
1437
1438 if (accountable_mapping(file, vm_flags)) {
1439 charged = len >> PAGE_SHIFT;
1440 if (security_vm_enough_memory_mm(mm, charged))
1441 return -ENOMEM;
1442 vm_flags |= VM_ACCOUNT;
1443 }
1444
1445
1446
1447
1448 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
1449 if (vma)
1450 goto out;
1451
1452
1453
1454
1455
1456
1457 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1458 if (!vma) {
1459 error = -ENOMEM;
1460 goto unacct_error;
1461 }
1462
1463 vma->vm_mm = mm;
1464 vma->vm_start = addr;
1465 vma->vm_end = addr + len;
1466 vma->vm_flags = vm_flags;
1467 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1468 vma->vm_pgoff = pgoff;
1469 INIT_LIST_HEAD(&vma->anon_vma_chain);
1470
1471 error = -EINVAL;
1472
1473 if (file) {
1474 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1475 goto free_vma;
1476 if (vm_flags & VM_DENYWRITE) {
1477 error = deny_write_access(file);
1478 if (error)
1479 goto free_vma;
1480 correct_wcount = 1;
1481 }
1482 vma->vm_file = get_file(file);
1483 error = file->f_op->mmap(file, vma);
1484 if (error)
1485 goto unmap_and_free_vma;
1486
1487
1488
1489
1490
1491
1492
1493
1494 WARN_ON_ONCE(addr != vma->vm_start);
1495
1496 addr = vma->vm_start;
1497 pgoff = vma->vm_pgoff;
1498 vm_flags = vma->vm_flags;
1499 } else if (vm_flags & VM_SHARED) {
1500 if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP)))
1501 goto free_vma;
1502 error = shmem_zero_setup(vma);
1503 if (error)
1504 goto free_vma;
1505 }
1506
1507 if (vma_wants_writenotify(vma)) {
1508 pgprot_t pprot = vma->vm_page_prot;
1509
1510
1511
1512
1513
1514
1515
1516
1517 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1518 if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
1519 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1520 }
1521
1522 vma_link(mm, vma, prev, rb_link, rb_parent);
1523 file = vma->vm_file;
1524
1525
1526 if (correct_wcount)
1527 atomic_inc(&inode->i_writecount);
1528out:
1529 perf_event_mmap(vma);
1530
1531 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1532 if (vm_flags & VM_LOCKED) {
1533 if (!mlock_vma_pages_range(vma, addr, addr + len))
1534 mm->locked_vm += (len >> PAGE_SHIFT);
1535 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1536 make_pages_present(addr, addr + len);
1537
1538 if (file)
1539 uprobe_mmap(vma);
1540
1541 return addr;
1542
1543unmap_and_free_vma:
1544 if (correct_wcount)
1545 atomic_inc(&inode->i_writecount);
1546 vma->vm_file = NULL;
1547 fput(file);
1548
1549
1550 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1551 charged = 0;
1552free_vma:
1553 kmem_cache_free(vm_area_cachep, vma);
1554unacct_error:
1555 if (charged)
1556 vm_unacct_memory(charged);
1557 return error;
1558}
1559
1560unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1561{
1562
1563
1564
1565
1566
1567
1568
1569
1570 struct mm_struct *mm = current->mm;
1571 struct vm_area_struct *vma;
1572 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1573
1574
1575 length = info->length + info->align_mask;
1576 if (length < info->length)
1577 return -ENOMEM;
1578
1579
1580 if (info->high_limit < length)
1581 return -ENOMEM;
1582 high_limit = info->high_limit - length;
1583
1584 if (info->low_limit > high_limit)
1585 return -ENOMEM;
1586 low_limit = info->low_limit + length;
1587
1588
1589 if (RB_EMPTY_ROOT(&mm->mm_rb))
1590 goto check_highest;
1591 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1592 if (vma->rb_subtree_gap < length)
1593 goto check_highest;
1594
1595 while (true) {
1596
1597 gap_end = vma->vm_start;
1598 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1599 struct vm_area_struct *left =
1600 rb_entry(vma->vm_rb.rb_left,
1601 struct vm_area_struct, vm_rb);
1602 if (left->rb_subtree_gap >= length) {
1603 vma = left;
1604 continue;
1605 }
1606 }
1607
1608 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
1609check_current:
1610
1611 if (gap_start > high_limit)
1612 return -ENOMEM;
1613 if (gap_end >= low_limit && gap_end - gap_start >= length)
1614 goto found;
1615
1616
1617 if (vma->vm_rb.rb_right) {
1618 struct vm_area_struct *right =
1619 rb_entry(vma->vm_rb.rb_right,
1620 struct vm_area_struct, vm_rb);
1621 if (right->rb_subtree_gap >= length) {
1622 vma = right;
1623 continue;
1624 }
1625 }
1626
1627
1628 while (true) {
1629 struct rb_node *prev = &vma->vm_rb;
1630 if (!rb_parent(prev))
1631 goto check_highest;
1632 vma = rb_entry(rb_parent(prev),
1633 struct vm_area_struct, vm_rb);
1634 if (prev == vma->vm_rb.rb_left) {
1635 gap_start = vma->vm_prev->vm_end;
1636 gap_end = vma->vm_start;
1637 goto check_current;
1638 }
1639 }
1640 }
1641
1642check_highest:
1643
1644 gap_start = mm->highest_vm_end;
1645 gap_end = ULONG_MAX;
1646 if (gap_start > high_limit)
1647 return -ENOMEM;
1648
1649found:
1650
1651 if (gap_start < info->low_limit)
1652 gap_start = info->low_limit;
1653
1654
1655 gap_start += (info->align_offset - gap_start) & info->align_mask;
1656
1657 VM_BUG_ON(gap_start + info->length > info->high_limit);
1658 VM_BUG_ON(gap_start + info->length > gap_end);
1659 return gap_start;
1660}
1661
1662unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1663{
1664 struct mm_struct *mm = current->mm;
1665 struct vm_area_struct *vma;
1666 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1667
1668
1669 length = info->length + info->align_mask;
1670 if (length < info->length)
1671 return -ENOMEM;
1672
1673
1674
1675
1676
1677 gap_end = info->high_limit;
1678 if (gap_end < length)
1679 return -ENOMEM;
1680 high_limit = gap_end - length;
1681
1682 if (info->low_limit > high_limit)
1683 return -ENOMEM;
1684 low_limit = info->low_limit + length;
1685
1686
1687 gap_start = mm->highest_vm_end;
1688 if (gap_start <= high_limit)
1689 goto found_highest;
1690
1691
1692 if (RB_EMPTY_ROOT(&mm->mm_rb))
1693 return -ENOMEM;
1694 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1695 if (vma->rb_subtree_gap < length)
1696 return -ENOMEM;
1697
1698 while (true) {
1699
1700 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
1701 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
1702 struct vm_area_struct *right =
1703 rb_entry(vma->vm_rb.rb_right,
1704 struct vm_area_struct, vm_rb);
1705 if (right->rb_subtree_gap >= length) {
1706 vma = right;
1707 continue;
1708 }
1709 }
1710
1711check_current:
1712
1713 gap_end = vma->vm_start;
1714 if (gap_end < low_limit)
1715 return -ENOMEM;
1716 if (gap_start <= high_limit && gap_end - gap_start >= length)
1717 goto found;
1718
1719
1720 if (vma->vm_rb.rb_left) {
1721 struct vm_area_struct *left =
1722 rb_entry(vma->vm_rb.rb_left,
1723 struct vm_area_struct, vm_rb);
1724 if (left->rb_subtree_gap >= length) {
1725 vma = left;
1726 continue;
1727 }
1728 }
1729
1730
1731 while (true) {
1732 struct rb_node *prev = &vma->vm_rb;
1733 if (!rb_parent(prev))
1734 return -ENOMEM;
1735 vma = rb_entry(rb_parent(prev),
1736 struct vm_area_struct, vm_rb);
1737 if (prev == vma->vm_rb.rb_right) {
1738 gap_start = vma->vm_prev ?
1739 vma->vm_prev->vm_end : 0;
1740 goto check_current;
1741 }
1742 }
1743 }
1744
1745found:
1746
1747 if (gap_end > info->high_limit)
1748 gap_end = info->high_limit;
1749
1750found_highest:
1751
1752 gap_end -= info->length;
1753 gap_end -= (gap_end - info->align_offset) & info->align_mask;
1754
1755 VM_BUG_ON(gap_end < info->low_limit);
1756 VM_BUG_ON(gap_end < gap_start);
1757 return gap_end;
1758}
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771#ifndef HAVE_ARCH_UNMAPPED_AREA
1772unsigned long
1773arch_get_unmapped_area(struct file *filp, unsigned long addr,
1774 unsigned long len, unsigned long pgoff, unsigned long flags)
1775{
1776 struct mm_struct *mm = current->mm;
1777 struct vm_area_struct *vma;
1778 struct vm_unmapped_area_info info;
1779
1780 if (len > TASK_SIZE)
1781 return -ENOMEM;
1782
1783 if (flags & MAP_FIXED)
1784 return addr;
1785
1786 if (addr) {
1787 addr = PAGE_ALIGN(addr);
1788 vma = find_vma(mm, addr);
1789 if (TASK_SIZE - len >= addr &&
1790 (!vma || addr + len <= vma->vm_start))
1791 return addr;
1792 }
1793
1794 info.flags = 0;
1795 info.length = len;
1796 info.low_limit = TASK_UNMAPPED_BASE;
1797 info.high_limit = TASK_SIZE;
1798 info.align_mask = 0;
1799 return vm_unmapped_area(&info);
1800}
1801#endif
1802
1803void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1804{
1805
1806
1807
1808 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache)
1809 mm->free_area_cache = addr;
1810}
1811
1812
1813
1814
1815
1816#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1817unsigned long
1818arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1819 const unsigned long len, const unsigned long pgoff,
1820 const unsigned long flags)
1821{
1822 struct vm_area_struct *vma;
1823 struct mm_struct *mm = current->mm;
1824 unsigned long addr = addr0;
1825 struct vm_unmapped_area_info info;
1826
1827
1828 if (len > TASK_SIZE)
1829 return -ENOMEM;
1830
1831 if (flags & MAP_FIXED)
1832 return addr;
1833
1834
1835 if (addr) {
1836 addr = PAGE_ALIGN(addr);
1837 vma = find_vma(mm, addr);
1838 if (TASK_SIZE - len >= addr &&
1839 (!vma || addr + len <= vma->vm_start))
1840 return addr;
1841 }
1842
1843 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
1844 info.length = len;
1845 info.low_limit = PAGE_SIZE;
1846 info.high_limit = mm->mmap_base;
1847 info.align_mask = 0;
1848 addr = vm_unmapped_area(&info);
1849
1850
1851
1852
1853
1854
1855
1856 if (addr & ~PAGE_MASK) {
1857 VM_BUG_ON(addr != -ENOMEM);
1858 info.flags = 0;
1859 info.low_limit = TASK_UNMAPPED_BASE;
1860 info.high_limit = TASK_SIZE;
1861 addr = vm_unmapped_area(&info);
1862 }
1863
1864 return addr;
1865}
1866#endif
1867
1868void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
1869{
1870
1871
1872
1873 if (addr > mm->free_area_cache)
1874 mm->free_area_cache = addr;
1875
1876
1877 if (mm->free_area_cache > mm->mmap_base)
1878 mm->free_area_cache = mm->mmap_base;
1879}
1880
1881unsigned long
1882get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1883 unsigned long pgoff, unsigned long flags)
1884{
1885 unsigned long (*get_area)(struct file *, unsigned long,
1886 unsigned long, unsigned long, unsigned long);
1887
1888 unsigned long error = arch_mmap_check(addr, len, flags);
1889 if (error)
1890 return error;
1891
1892
1893 if (len > TASK_SIZE)
1894 return -ENOMEM;
1895
1896 get_area = current->mm->get_unmapped_area;
1897 if (file && file->f_op && file->f_op->get_unmapped_area)
1898 get_area = file->f_op->get_unmapped_area;
1899 addr = get_area(file, addr, len, pgoff, flags);
1900 if (IS_ERR_VALUE(addr))
1901 return addr;
1902
1903 if (addr > TASK_SIZE - len)
1904 return -ENOMEM;
1905 if (addr & ~PAGE_MASK)
1906 return -EINVAL;
1907
1908 addr = arch_rebalance_pgtables(addr, len);
1909 error = security_mmap_addr(addr);
1910 return error ? error : addr;
1911}
1912
1913EXPORT_SYMBOL(get_unmapped_area);
1914
1915
1916struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1917{
1918 struct vm_area_struct *vma = NULL;
1919
1920 if (WARN_ON_ONCE(!mm))
1921 return NULL;
1922
1923
1924
1925 vma = mm->mmap_cache;
1926 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1927 struct rb_node *rb_node;
1928
1929 rb_node = mm->mm_rb.rb_node;
1930 vma = NULL;
1931
1932 while (rb_node) {
1933 struct vm_area_struct *vma_tmp;
1934
1935 vma_tmp = rb_entry(rb_node,
1936 struct vm_area_struct, vm_rb);
1937
1938 if (vma_tmp->vm_end > addr) {
1939 vma = vma_tmp;
1940 if (vma_tmp->vm_start <= addr)
1941 break;
1942 rb_node = rb_node->rb_left;
1943 } else
1944 rb_node = rb_node->rb_right;
1945 }
1946 if (vma)
1947 mm->mmap_cache = vma;
1948 }
1949 return vma;
1950}
1951
1952EXPORT_SYMBOL(find_vma);
1953
1954
1955
1956
1957struct vm_area_struct *
1958find_vma_prev(struct mm_struct *mm, unsigned long addr,
1959 struct vm_area_struct **pprev)
1960{
1961 struct vm_area_struct *vma;
1962
1963 vma = find_vma(mm, addr);
1964 if (vma) {
1965 *pprev = vma->vm_prev;
1966 } else {
1967 struct rb_node *rb_node = mm->mm_rb.rb_node;
1968 *pprev = NULL;
1969 while (rb_node) {
1970 *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
1971 rb_node = rb_node->rb_right;
1972 }
1973 }
1974 return vma;
1975}
1976
1977
1978
1979
1980
1981
1982static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
1983{
1984 struct mm_struct *mm = vma->vm_mm;
1985 struct rlimit *rlim = current->signal->rlim;
1986 unsigned long new_start;
1987
1988
1989 if (!may_expand_vm(mm, grow))
1990 return -ENOMEM;
1991
1992
1993 if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
1994 return -ENOMEM;
1995
1996
1997 if (vma->vm_flags & VM_LOCKED) {
1998 unsigned long locked;
1999 unsigned long limit;
2000 locked = mm->locked_vm + grow;
2001 limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
2002 limit >>= PAGE_SHIFT;
2003 if (locked > limit && !capable(CAP_IPC_LOCK))
2004 return -ENOMEM;
2005 }
2006
2007
2008 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2009 vma->vm_end - size;
2010 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2011 return -EFAULT;
2012
2013
2014
2015
2016
2017 if (security_vm_enough_memory_mm(mm, grow))
2018 return -ENOMEM;
2019
2020
2021 if (vma->vm_flags & VM_LOCKED)
2022 mm->locked_vm += grow;
2023 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
2024 return 0;
2025}
2026
2027#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2028
2029
2030
2031
2032int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2033{
2034 int error;
2035
2036 if (!(vma->vm_flags & VM_GROWSUP))
2037 return -EFAULT;
2038
2039
2040
2041
2042
2043 if (unlikely(anon_vma_prepare(vma)))
2044 return -ENOMEM;
2045 vma_lock_anon_vma(vma);
2046
2047
2048
2049
2050
2051
2052
2053 if (address < PAGE_ALIGN(address+4))
2054 address = PAGE_ALIGN(address+4);
2055 else {
2056 vma_unlock_anon_vma(vma);
2057 return -ENOMEM;
2058 }
2059 error = 0;
2060
2061
2062 if (address > vma->vm_end) {
2063 unsigned long size, grow;
2064
2065 size = address - vma->vm_start;
2066 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2067
2068 error = -ENOMEM;
2069 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2070 error = acct_stack_growth(vma, size, grow);
2071 if (!error) {
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083 spin_lock(&vma->vm_mm->page_table_lock);
2084 anon_vma_interval_tree_pre_update_vma(vma);
2085 vma->vm_end = address;
2086 anon_vma_interval_tree_post_update_vma(vma);
2087 if (vma->vm_next)
2088 vma_gap_update(vma->vm_next);
2089 else
2090 vma->vm_mm->highest_vm_end = address;
2091 spin_unlock(&vma->vm_mm->page_table_lock);
2092
2093 perf_event_mmap(vma);
2094 }
2095 }
2096 }
2097 vma_unlock_anon_vma(vma);
2098 khugepaged_enter_vma_merge(vma);
2099 validate_mm(vma->vm_mm);
2100 return error;
2101}
2102#endif
2103
2104
2105
2106
2107int expand_downwards(struct vm_area_struct *vma,
2108 unsigned long address)
2109{
2110 int error;
2111
2112
2113
2114
2115
2116 if (unlikely(anon_vma_prepare(vma)))
2117 return -ENOMEM;
2118
2119 address &= PAGE_MASK;
2120 error = security_mmap_addr(address);
2121 if (error)
2122 return error;
2123
2124 vma_lock_anon_vma(vma);
2125
2126
2127
2128
2129
2130
2131
2132
2133 if (address < vma->vm_start) {
2134 unsigned long size, grow;
2135
2136 size = vma->vm_end - address;
2137 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2138
2139 error = -ENOMEM;
2140 if (grow <= vma->vm_pgoff) {
2141 error = acct_stack_growth(vma, size, grow);
2142 if (!error) {
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154 spin_lock(&vma->vm_mm->page_table_lock);
2155 anon_vma_interval_tree_pre_update_vma(vma);
2156 vma->vm_start = address;
2157 vma->vm_pgoff -= grow;
2158 anon_vma_interval_tree_post_update_vma(vma);
2159 vma_gap_update(vma);
2160 spin_unlock(&vma->vm_mm->page_table_lock);
2161
2162 perf_event_mmap(vma);
2163 }
2164 }
2165 }
2166 vma_unlock_anon_vma(vma);
2167 khugepaged_enter_vma_merge(vma);
2168 validate_mm(vma->vm_mm);
2169 return error;
2170}
2171
2172#ifdef CONFIG_STACK_GROWSUP
2173int expand_stack(struct vm_area_struct *vma, unsigned long address)
2174{
2175 return expand_upwards(vma, address);
2176}
2177
2178struct vm_area_struct *
2179find_extend_vma(struct mm_struct *mm, unsigned long addr)
2180{
2181 struct vm_area_struct *vma, *prev;
2182
2183 addr &= PAGE_MASK;
2184 vma = find_vma_prev(mm, addr, &prev);
2185 if (vma && (vma->vm_start <= addr))
2186 return vma;
2187 if (!prev || expand_stack(prev, addr))
2188 return NULL;
2189 if (prev->vm_flags & VM_LOCKED) {
2190 mlock_vma_pages_range(prev, addr, prev->vm_end);
2191 }
2192 return prev;
2193}
2194#else
2195int expand_stack(struct vm_area_struct *vma, unsigned long address)
2196{
2197 return expand_downwards(vma, address);
2198}
2199
2200struct vm_area_struct *
2201find_extend_vma(struct mm_struct * mm, unsigned long addr)
2202{
2203 struct vm_area_struct * vma;
2204 unsigned long start;
2205
2206 addr &= PAGE_MASK;
2207 vma = find_vma(mm,addr);
2208 if (!vma)
2209 return NULL;
2210 if (vma->vm_start <= addr)
2211 return vma;
2212 if (!(vma->vm_flags & VM_GROWSDOWN))
2213 return NULL;
2214 start = vma->vm_start;
2215 if (expand_stack(vma, addr))
2216 return NULL;
2217 if (vma->vm_flags & VM_LOCKED) {
2218 mlock_vma_pages_range(vma, addr, start);
2219 }
2220 return vma;
2221}
2222#endif
2223
2224
2225
2226
2227
2228
2229
2230static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2231{
2232 unsigned long nr_accounted = 0;
2233
2234
2235 update_hiwater_vm(mm);
2236 do {
2237 long nrpages = vma_pages(vma);
2238
2239 if (vma->vm_flags & VM_ACCOUNT)
2240 nr_accounted += nrpages;
2241 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
2242 vma = remove_vma(vma);
2243 } while (vma);
2244 vm_unacct_memory(nr_accounted);
2245 validate_mm(mm);
2246}
2247
2248
2249
2250
2251
2252
2253static void unmap_region(struct mm_struct *mm,
2254 struct vm_area_struct *vma, struct vm_area_struct *prev,
2255 unsigned long start, unsigned long end)
2256{
2257 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
2258 struct mmu_gather tlb;
2259
2260 lru_add_drain();
2261 tlb_gather_mmu(&tlb, mm, 0);
2262 update_hiwater_rss(mm);
2263 unmap_vmas(&tlb, vma, start, end);
2264 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2265 next ? next->vm_start : 0);
2266 tlb_finish_mmu(&tlb, start, end);
2267}
2268
2269
2270
2271
2272
2273static void
2274detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2275 struct vm_area_struct *prev, unsigned long end)
2276{
2277 struct vm_area_struct **insertion_point;
2278 struct vm_area_struct *tail_vma = NULL;
2279 unsigned long addr;
2280
2281 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2282 vma->vm_prev = NULL;
2283 do {
2284 vma_rb_erase(vma, &mm->mm_rb);
2285 mm->map_count--;
2286 tail_vma = vma;
2287 vma = vma->vm_next;
2288 } while (vma && vma->vm_start < end);
2289 *insertion_point = vma;
2290 if (vma) {
2291 vma->vm_prev = prev;
2292 vma_gap_update(vma);
2293 } else
2294 mm->highest_vm_end = prev ? prev->vm_end : 0;
2295 tail_vma->vm_next = NULL;
2296 if (mm->unmap_area == arch_unmap_area)
2297 addr = prev ? prev->vm_end : mm->mmap_base;
2298 else
2299 addr = vma ? vma->vm_start : mm->mmap_base;
2300 mm->unmap_area(mm, addr);
2301 mm->mmap_cache = NULL;
2302}
2303
2304
2305
2306
2307
2308static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
2309 unsigned long addr, int new_below)
2310{
2311 struct mempolicy *pol;
2312 struct vm_area_struct *new;
2313 int err = -ENOMEM;
2314
2315 if (is_vm_hugetlb_page(vma) && (addr &
2316 ~(huge_page_mask(hstate_vma(vma)))))
2317 return -EINVAL;
2318
2319 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2320 if (!new)
2321 goto out_err;
2322
2323
2324 *new = *vma;
2325
2326 INIT_LIST_HEAD(&new->anon_vma_chain);
2327
2328 if (new_below)
2329 new->vm_end = addr;
2330 else {
2331 new->vm_start = addr;
2332 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2333 }
2334
2335 pol = mpol_dup(vma_policy(vma));
2336 if (IS_ERR(pol)) {
2337 err = PTR_ERR(pol);
2338 goto out_free_vma;
2339 }
2340 vma_set_policy(new, pol);
2341
2342 if (anon_vma_clone(new, vma))
2343 goto out_free_mpol;
2344
2345 if (new->vm_file)
2346 get_file(new->vm_file);
2347
2348 if (new->vm_ops && new->vm_ops->open)
2349 new->vm_ops->open(new);
2350
2351 if (new_below)
2352 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2353 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2354 else
2355 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2356
2357
2358 if (!err)
2359 return 0;
2360
2361
2362 if (new->vm_ops && new->vm_ops->close)
2363 new->vm_ops->close(new);
2364 if (new->vm_file)
2365 fput(new->vm_file);
2366 unlink_anon_vmas(new);
2367 out_free_mpol:
2368 mpol_put(pol);
2369 out_free_vma:
2370 kmem_cache_free(vm_area_cachep, new);
2371 out_err:
2372 return err;
2373}
2374
2375
2376
2377
2378
2379int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2380 unsigned long addr, int new_below)
2381{
2382 if (mm->map_count >= sysctl_max_map_count)
2383 return -ENOMEM;
2384
2385 return __split_vma(mm, vma, addr, new_below);
2386}
2387
2388
2389
2390
2391
2392
2393int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
2394{
2395 unsigned long end;
2396 struct vm_area_struct *vma, *prev, *last;
2397
2398 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
2399 return -EINVAL;
2400
2401 if ((len = PAGE_ALIGN(len)) == 0)
2402 return -EINVAL;
2403
2404
2405 vma = find_vma(mm, start);
2406 if (!vma)
2407 return 0;
2408 prev = vma->vm_prev;
2409
2410
2411
2412 end = start + len;
2413 if (vma->vm_start >= end)
2414 return 0;
2415
2416
2417
2418
2419
2420
2421
2422
2423 if (start > vma->vm_start) {
2424 int error;
2425
2426
2427
2428
2429
2430
2431 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2432 return -ENOMEM;
2433
2434 error = __split_vma(mm, vma, start, 0);
2435 if (error)
2436 return error;
2437 prev = vma;
2438 }
2439
2440
2441 last = find_vma(mm, end);
2442 if (last && end > last->vm_start) {
2443 int error = __split_vma(mm, last, end, 1);
2444 if (error)
2445 return error;
2446 }
2447 vma = prev? prev->vm_next: mm->mmap;
2448
2449
2450
2451
2452 if (mm->locked_vm) {
2453 struct vm_area_struct *tmp = vma;
2454 while (tmp && tmp->vm_start < end) {
2455 if (tmp->vm_flags & VM_LOCKED) {
2456 mm->locked_vm -= vma_pages(tmp);
2457 munlock_vma_pages_all(tmp);
2458 }
2459 tmp = tmp->vm_next;
2460 }
2461 }
2462
2463
2464
2465
2466 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2467 unmap_region(mm, vma, prev, start, end);
2468
2469
2470 remove_vma_list(mm, vma);
2471
2472 return 0;
2473}
2474
2475int vm_munmap(unsigned long start, size_t len)
2476{
2477 int ret;
2478 struct mm_struct *mm = current->mm;
2479
2480 down_write(&mm->mmap_sem);
2481 ret = do_munmap(mm, start, len);
2482 up_write(&mm->mmap_sem);
2483 return ret;
2484}
2485EXPORT_SYMBOL(vm_munmap);
2486
2487SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2488{
2489 profile_munmap(addr);
2490 return vm_munmap(addr, len);
2491}
2492
2493static inline void verify_mm_writelocked(struct mm_struct *mm)
2494{
2495#ifdef CONFIG_DEBUG_VM
2496 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
2497 WARN_ON(1);
2498 up_read(&mm->mmap_sem);
2499 }
2500#endif
2501}
2502
2503
2504
2505
2506
2507
2508static unsigned long do_brk(unsigned long addr, unsigned long len)
2509{
2510 struct mm_struct * mm = current->mm;
2511 struct vm_area_struct * vma, * prev;
2512 unsigned long flags;
2513 struct rb_node ** rb_link, * rb_parent;
2514 pgoff_t pgoff = addr >> PAGE_SHIFT;
2515 int error;
2516
2517 len = PAGE_ALIGN(len);
2518 if (!len)
2519 return addr;
2520
2521 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2522
2523 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2524 if (error & ~PAGE_MASK)
2525 return error;
2526
2527
2528
2529
2530 if (mm->def_flags & VM_LOCKED) {
2531 unsigned long locked, lock_limit;
2532 locked = len >> PAGE_SHIFT;
2533 locked += mm->locked_vm;
2534 lock_limit = rlimit(RLIMIT_MEMLOCK);
2535 lock_limit >>= PAGE_SHIFT;
2536 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2537 return -EAGAIN;
2538 }
2539
2540
2541
2542
2543
2544 verify_mm_writelocked(mm);
2545
2546
2547
2548
2549 munmap_back:
2550 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
2551 if (do_munmap(mm, addr, len))
2552 return -ENOMEM;
2553 goto munmap_back;
2554 }
2555
2556
2557 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2558 return -ENOMEM;
2559
2560 if (mm->map_count > sysctl_max_map_count)
2561 return -ENOMEM;
2562
2563 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
2564 return -ENOMEM;
2565
2566
2567 vma = vma_merge(mm, prev, addr, addr + len, flags,
2568 NULL, NULL, pgoff, NULL);
2569 if (vma)
2570 goto out;
2571
2572
2573
2574
2575 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2576 if (!vma) {
2577 vm_unacct_memory(len >> PAGE_SHIFT);
2578 return -ENOMEM;
2579 }
2580
2581 INIT_LIST_HEAD(&vma->anon_vma_chain);
2582 vma->vm_mm = mm;
2583 vma->vm_start = addr;
2584 vma->vm_end = addr + len;
2585 vma->vm_pgoff = pgoff;
2586 vma->vm_flags = flags;
2587 vma->vm_page_prot = vm_get_page_prot(flags);
2588 vma_link(mm, vma, prev, rb_link, rb_parent);
2589out:
2590 perf_event_mmap(vma);
2591 mm->total_vm += len >> PAGE_SHIFT;
2592 if (flags & VM_LOCKED) {
2593 if (!mlock_vma_pages_range(vma, addr, addr + len))
2594 mm->locked_vm += (len >> PAGE_SHIFT);
2595 }
2596 return addr;
2597}
2598
2599unsigned long vm_brk(unsigned long addr, unsigned long len)
2600{
2601 struct mm_struct *mm = current->mm;
2602 unsigned long ret;
2603
2604 down_write(&mm->mmap_sem);
2605 ret = do_brk(addr, len);
2606 up_write(&mm->mmap_sem);
2607 return ret;
2608}
2609EXPORT_SYMBOL(vm_brk);
2610
2611
2612void exit_mmap(struct mm_struct *mm)
2613{
2614 struct mmu_gather tlb;
2615 struct vm_area_struct *vma;
2616 unsigned long nr_accounted = 0;
2617
2618
2619 mmu_notifier_release(mm);
2620
2621 if (mm->locked_vm) {
2622 vma = mm->mmap;
2623 while (vma) {
2624 if (vma->vm_flags & VM_LOCKED)
2625 munlock_vma_pages_all(vma);
2626 vma = vma->vm_next;
2627 }
2628 }
2629
2630 arch_exit_mmap(mm);
2631
2632 vma = mm->mmap;
2633 if (!vma)
2634 return;
2635
2636 lru_add_drain();
2637 flush_cache_mm(mm);
2638 tlb_gather_mmu(&tlb, mm, 1);
2639
2640
2641 unmap_vmas(&tlb, vma, 0, -1);
2642
2643 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
2644 tlb_finish_mmu(&tlb, 0, -1);
2645
2646
2647
2648
2649
2650 while (vma) {
2651 if (vma->vm_flags & VM_ACCOUNT)
2652 nr_accounted += vma_pages(vma);
2653 vma = remove_vma(vma);
2654 }
2655 vm_unacct_memory(nr_accounted);
2656
2657 WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2658}
2659
2660
2661
2662
2663
2664int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
2665{
2666 struct vm_area_struct *prev;
2667 struct rb_node **rb_link, *rb_parent;
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681 if (!vma->vm_file) {
2682 BUG_ON(vma->anon_vma);
2683 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2684 }
2685 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
2686 &prev, &rb_link, &rb_parent))
2687 return -ENOMEM;
2688 if ((vma->vm_flags & VM_ACCOUNT) &&
2689 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2690 return -ENOMEM;
2691
2692 vma_link(mm, vma, prev, rb_link, rb_parent);
2693 return 0;
2694}
2695
2696
2697
2698
2699
2700struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2701 unsigned long addr, unsigned long len, pgoff_t pgoff,
2702 bool *need_rmap_locks)
2703{
2704 struct vm_area_struct *vma = *vmap;
2705 unsigned long vma_start = vma->vm_start;
2706 struct mm_struct *mm = vma->vm_mm;
2707 struct vm_area_struct *new_vma, *prev;
2708 struct rb_node **rb_link, *rb_parent;
2709 struct mempolicy *pol;
2710 bool faulted_in_anon_vma = true;
2711
2712
2713
2714
2715
2716 if (unlikely(!vma->vm_file && !vma->anon_vma)) {
2717 pgoff = addr >> PAGE_SHIFT;
2718 faulted_in_anon_vma = false;
2719 }
2720
2721 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
2722 return NULL;
2723 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2724 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2725 if (new_vma) {
2726
2727
2728
2729 if (unlikely(vma_start >= new_vma->vm_start &&
2730 vma_start < new_vma->vm_end)) {
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743 VM_BUG_ON(faulted_in_anon_vma);
2744 *vmap = vma = new_vma;
2745 }
2746 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
2747 } else {
2748 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2749 if (new_vma) {
2750 *new_vma = *vma;
2751 new_vma->vm_start = addr;
2752 new_vma->vm_end = addr + len;
2753 new_vma->vm_pgoff = pgoff;
2754 pol = mpol_dup(vma_policy(vma));
2755 if (IS_ERR(pol))
2756 goto out_free_vma;
2757 vma_set_policy(new_vma, pol);
2758 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
2759 if (anon_vma_clone(new_vma, vma))
2760 goto out_free_mempol;
2761 if (new_vma->vm_file)
2762 get_file(new_vma->vm_file);
2763 if (new_vma->vm_ops && new_vma->vm_ops->open)
2764 new_vma->vm_ops->open(new_vma);
2765 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2766 *need_rmap_locks = false;
2767 }
2768 }
2769 return new_vma;
2770
2771 out_free_mempol:
2772 mpol_put(pol);
2773 out_free_vma:
2774 kmem_cache_free(vm_area_cachep, new_vma);
2775 return NULL;
2776}
2777
2778
2779
2780
2781
2782int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2783{
2784 unsigned long cur = mm->total_vm;
2785 unsigned long lim;
2786
2787 lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT;
2788
2789 if (cur + npages > lim)
2790 return 0;
2791 return 1;
2792}
2793
2794
2795static int special_mapping_fault(struct vm_area_struct *vma,
2796 struct vm_fault *vmf)
2797{
2798 pgoff_t pgoff;
2799 struct page **pages;
2800
2801
2802
2803
2804
2805
2806
2807 pgoff = vmf->pgoff - vma->vm_pgoff;
2808
2809 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
2810 pgoff--;
2811
2812 if (*pages) {
2813 struct page *page = *pages;
2814 get_page(page);
2815 vmf->page = page;
2816 return 0;
2817 }
2818
2819 return VM_FAULT_SIGBUS;
2820}
2821
2822
2823
2824
2825static void special_mapping_close(struct vm_area_struct *vma)
2826{
2827}
2828
2829static const struct vm_operations_struct special_mapping_vmops = {
2830 .close = special_mapping_close,
2831 .fault = special_mapping_fault,
2832};
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843int install_special_mapping(struct mm_struct *mm,
2844 unsigned long addr, unsigned long len,
2845 unsigned long vm_flags, struct page **pages)
2846{
2847 int ret;
2848 struct vm_area_struct *vma;
2849
2850 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2851 if (unlikely(vma == NULL))
2852 return -ENOMEM;
2853
2854 INIT_LIST_HEAD(&vma->anon_vma_chain);
2855 vma->vm_mm = mm;
2856 vma->vm_start = addr;
2857 vma->vm_end = addr + len;
2858
2859 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
2860 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2861
2862 vma->vm_ops = &special_mapping_vmops;
2863 vma->vm_private_data = pages;
2864
2865 ret = insert_vm_struct(mm, vma);
2866 if (ret)
2867 goto out;
2868
2869 mm->total_vm += len >> PAGE_SHIFT;
2870
2871 perf_event_mmap(vma);
2872
2873 return 0;
2874
2875out:
2876 kmem_cache_free(vm_area_cachep, vma);
2877 return ret;
2878}
2879
2880static DEFINE_MUTEX(mm_all_locks_mutex);
2881
2882static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2883{
2884 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
2885
2886
2887
2888
2889 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899 if (__test_and_set_bit(0, (unsigned long *)
2900 &anon_vma->root->rb_root.rb_node))
2901 BUG();
2902 }
2903}
2904
2905static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2906{
2907 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2918 BUG();
2919 mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
2920 }
2921}
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955int mm_take_all_locks(struct mm_struct *mm)
2956{
2957 struct vm_area_struct *vma;
2958 struct anon_vma_chain *avc;
2959
2960 BUG_ON(down_read_trylock(&mm->mmap_sem));
2961
2962 mutex_lock(&mm_all_locks_mutex);
2963
2964 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2965 if (signal_pending(current))
2966 goto out_unlock;
2967 if (vma->vm_file && vma->vm_file->f_mapping)
2968 vm_lock_mapping(mm, vma->vm_file->f_mapping);
2969 }
2970
2971 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2972 if (signal_pending(current))
2973 goto out_unlock;
2974 if (vma->anon_vma)
2975 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
2976 vm_lock_anon_vma(mm, avc->anon_vma);
2977 }
2978
2979 return 0;
2980
2981out_unlock:
2982 mm_drop_all_locks(mm);
2983 return -EINTR;
2984}
2985
2986static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2987{
2988 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001 if (!__test_and_clear_bit(0, (unsigned long *)
3002 &anon_vma->root->rb_root.rb_node))
3003 BUG();
3004 anon_vma_unlock(anon_vma);
3005 }
3006}
3007
3008static void vm_unlock_mapping(struct address_space *mapping)
3009{
3010 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3011
3012
3013
3014
3015 mutex_unlock(&mapping->i_mmap_mutex);
3016 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3017 &mapping->flags))
3018 BUG();
3019 }
3020}
3021
3022
3023
3024
3025
3026void mm_drop_all_locks(struct mm_struct *mm)
3027{
3028 struct vm_area_struct *vma;
3029 struct anon_vma_chain *avc;
3030
3031 BUG_ON(down_read_trylock(&mm->mmap_sem));
3032 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3033
3034 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3035 if (vma->anon_vma)
3036 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3037 vm_unlock_anon_vma(avc->anon_vma);
3038 if (vma->vm_file && vma->vm_file->f_mapping)
3039 vm_unlock_mapping(vma->vm_file->f_mapping);
3040 }
3041
3042 mutex_unlock(&mm_all_locks_mutex);
3043}
3044
3045
3046
3047
3048void __init mmap_init(void)
3049{
3050 int ret;
3051
3052 ret = percpu_counter_init(&vm_committed_as, 0);
3053 VM_BUG_ON(ret);
3054}
3055