1
2
3
4
5
6
7
8
9#include <linux/slab.h>
10#include <linux/backing-dev.h>
11#include <linux/mm.h>
12#include <linux/shm.h>
13#include <linux/mman.h>
14#include <linux/pagemap.h>
15#include <linux/swap.h>
16#include <linux/syscalls.h>
17#include <linux/capability.h>
18#include <linux/init.h>
19#include <linux/file.h>
20#include <linux/fs.h>
21#include <linux/personality.h>
22#include <linux/security.h>
23#include <linux/ima.h>
24#include <linux/hugetlb.h>
25#include <linux/profile.h>
26#include <linux/module.h>
27#include <linux/mount.h>
28#include <linux/mempolicy.h>
29#include <linux/rmap.h>
30#include <linux/mmu_notifier.h>
31#include <linux/perf_event.h>
32
33#include <asm/uaccess.h>
34#include <asm/cacheflush.h>
35#include <asm/tlb.h>
36#include <asm/mmu_context.h>
37
38#include "internal.h"
39
40#ifndef arch_mmap_check
41#define arch_mmap_check(addr, len, flags) (0)
42#endif
43
44#ifndef arch_rebalance_pgtables
45#define arch_rebalance_pgtables(addr, len) (addr)
46#endif
47
48static void unmap_region(struct mm_struct *mm,
49 struct vm_area_struct *vma, struct vm_area_struct *prev,
50 unsigned long start, unsigned long end);
51
52
53
54
55
56#undef DEBUG_MM_RB
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73pgprot_t protection_map[16] = {
74 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
75 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
76};
77
78pgprot_t vm_get_page_prot(unsigned long vm_flags)
79{
80 return __pgprot(pgprot_val(protection_map[vm_flags &
81 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
82 pgprot_val(arch_vm_get_page_prot(vm_flags)));
83}
84EXPORT_SYMBOL(vm_get_page_prot);
85
86int sysctl_overcommit_memory = OVERCOMMIT_GUESS;
87int sysctl_overcommit_ratio = 50;
88int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
89struct percpu_counter vm_committed_as;
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
108{
109 unsigned long free, allowed;
110
111 vm_acct_memory(pages);
112
113
114
115
116 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
117 return 0;
118
119 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
120 unsigned long n;
121
122 free = global_page_state(NR_FILE_PAGES);
123 free += nr_swap_pages;
124
125
126
127
128
129
130
131 free += global_page_state(NR_SLAB_RECLAIMABLE);
132
133
134
135
136 if (!cap_sys_admin)
137 free -= free / 32;
138
139 if (free > pages)
140 return 0;
141
142
143
144
145
146 n = nr_free_pages();
147
148
149
150
151 if (n <= totalreserve_pages)
152 goto error;
153 else
154 n -= totalreserve_pages;
155
156
157
158
159 if (!cap_sys_admin)
160 n -= n / 32;
161 free += n;
162
163 if (free > pages)
164 return 0;
165
166 goto error;
167 }
168
169 allowed = (totalram_pages - hugetlb_total_pages())
170 * sysctl_overcommit_ratio / 100;
171
172
173
174 if (!cap_sys_admin)
175 allowed -= allowed / 32;
176 allowed += total_swap_pages;
177
178
179
180 if (mm)
181 allowed -= mm->total_vm / 32;
182
183 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
184 return 0;
185error:
186 vm_unacct_memory(pages);
187
188 return -ENOMEM;
189}
190
191
192
193
194static void __remove_shared_vm_struct(struct vm_area_struct *vma,
195 struct file *file, struct address_space *mapping)
196{
197 if (vma->vm_flags & VM_DENYWRITE)
198 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
199 if (vma->vm_flags & VM_SHARED)
200 mapping->i_mmap_writable--;
201
202 flush_dcache_mmap_lock(mapping);
203 if (unlikely(vma->vm_flags & VM_NONLINEAR))
204 list_del_init(&vma->shared.vm_set.list);
205 else
206 vma_prio_tree_remove(vma, &mapping->i_mmap);
207 flush_dcache_mmap_unlock(mapping);
208}
209
210
211
212
213
214void unlink_file_vma(struct vm_area_struct *vma)
215{
216 struct file *file = vma->vm_file;
217
218 if (file) {
219 struct address_space *mapping = file->f_mapping;
220 spin_lock(&mapping->i_mmap_lock);
221 __remove_shared_vm_struct(vma, file, mapping);
222 spin_unlock(&mapping->i_mmap_lock);
223 }
224}
225
226
227
228
229static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
230{
231 struct vm_area_struct *next = vma->vm_next;
232
233 might_sleep();
234 if (vma->vm_ops && vma->vm_ops->close)
235 vma->vm_ops->close(vma);
236 if (vma->vm_file) {
237 fput(vma->vm_file);
238 if (vma->vm_flags & VM_EXECUTABLE)
239 removed_exe_file_vma(vma->vm_mm);
240 }
241 mpol_put(vma_policy(vma));
242 kmem_cache_free(vm_area_cachep, vma);
243 return next;
244}
245
246SYSCALL_DEFINE1(brk, unsigned long, brk)
247{
248 unsigned long rlim, retval;
249 unsigned long newbrk, oldbrk;
250 struct mm_struct *mm = current->mm;
251 unsigned long min_brk;
252
253 down_write(&mm->mmap_sem);
254
255#ifdef CONFIG_COMPAT_BRK
256 min_brk = mm->end_code;
257#else
258 min_brk = mm->start_brk;
259#endif
260 if (brk < min_brk)
261 goto out;
262
263
264
265
266
267
268
269 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
270 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
271 (mm->end_data - mm->start_data) > rlim)
272 goto out;
273
274 newbrk = PAGE_ALIGN(brk);
275 oldbrk = PAGE_ALIGN(mm->brk);
276 if (oldbrk == newbrk)
277 goto set_brk;
278
279
280 if (brk <= mm->brk) {
281 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
282 goto set_brk;
283 goto out;
284 }
285
286
287 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
288 goto out;
289
290
291 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
292 goto out;
293set_brk:
294 mm->brk = brk;
295out:
296 retval = mm->brk;
297 up_write(&mm->mmap_sem);
298 return retval;
299}
300
301#ifdef DEBUG_MM_RB
302static int browse_rb(struct rb_root *root)
303{
304 int i = 0, j;
305 struct rb_node *nd, *pn = NULL;
306 unsigned long prev = 0, pend = 0;
307
308 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
309 struct vm_area_struct *vma;
310 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
311 if (vma->vm_start < prev)
312 printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
313 if (vma->vm_start < pend)
314 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
315 if (vma->vm_start > vma->vm_end)
316 printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
317 i++;
318 pn = nd;
319 prev = vma->vm_start;
320 pend = vma->vm_end;
321 }
322 j = 0;
323 for (nd = pn; nd; nd = rb_prev(nd)) {
324 j++;
325 }
326 if (i != j)
327 printk("backwards %d, forwards %d\n", j, i), i = 0;
328 return i;
329}
330
331void validate_mm(struct mm_struct *mm)
332{
333 int bug = 0;
334 int i = 0;
335 struct vm_area_struct *tmp = mm->mmap;
336 while (tmp) {
337 tmp = tmp->vm_next;
338 i++;
339 }
340 if (i != mm->map_count)
341 printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
342 i = browse_rb(&mm->mm_rb);
343 if (i != mm->map_count)
344 printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
345 BUG_ON(bug);
346}
347#else
348#define validate_mm(mm) do { } while (0)
349#endif
350
351static struct vm_area_struct *
352find_vma_prepare(struct mm_struct *mm, unsigned long addr,
353 struct vm_area_struct **pprev, struct rb_node ***rb_link,
354 struct rb_node ** rb_parent)
355{
356 struct vm_area_struct * vma;
357 struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
358
359 __rb_link = &mm->mm_rb.rb_node;
360 rb_prev = __rb_parent = NULL;
361 vma = NULL;
362
363 while (*__rb_link) {
364 struct vm_area_struct *vma_tmp;
365
366 __rb_parent = *__rb_link;
367 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
368
369 if (vma_tmp->vm_end > addr) {
370 vma = vma_tmp;
371 if (vma_tmp->vm_start <= addr)
372 break;
373 __rb_link = &__rb_parent->rb_left;
374 } else {
375 rb_prev = __rb_parent;
376 __rb_link = &__rb_parent->rb_right;
377 }
378 }
379
380 *pprev = NULL;
381 if (rb_prev)
382 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
383 *rb_link = __rb_link;
384 *rb_parent = __rb_parent;
385 return vma;
386}
387
388static inline void
389__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
390 struct vm_area_struct *prev, struct rb_node *rb_parent)
391{
392 struct vm_area_struct *next;
393
394 vma->vm_prev = prev;
395 if (prev) {
396 next = prev->vm_next;
397 prev->vm_next = vma;
398 } else {
399 mm->mmap = vma;
400 if (rb_parent)
401 next = rb_entry(rb_parent,
402 struct vm_area_struct, vm_rb);
403 else
404 next = NULL;
405 }
406 vma->vm_next = next;
407 if (next)
408 next->vm_prev = vma;
409}
410
411void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
412 struct rb_node **rb_link, struct rb_node *rb_parent)
413{
414 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
415 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
416}
417
418static void __vma_link_file(struct vm_area_struct *vma)
419{
420 struct file *file;
421
422 file = vma->vm_file;
423 if (file) {
424 struct address_space *mapping = file->f_mapping;
425
426 if (vma->vm_flags & VM_DENYWRITE)
427 atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
428 if (vma->vm_flags & VM_SHARED)
429 mapping->i_mmap_writable++;
430
431 flush_dcache_mmap_lock(mapping);
432 if (unlikely(vma->vm_flags & VM_NONLINEAR))
433 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
434 else
435 vma_prio_tree_insert(vma, &mapping->i_mmap);
436 flush_dcache_mmap_unlock(mapping);
437 }
438}
439
440static void
441__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
442 struct vm_area_struct *prev, struct rb_node **rb_link,
443 struct rb_node *rb_parent)
444{
445 __vma_link_list(mm, vma, prev, rb_parent);
446 __vma_link_rb(mm, vma, rb_link, rb_parent);
447 __anon_vma_link(vma);
448}
449
450static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
451 struct vm_area_struct *prev, struct rb_node **rb_link,
452 struct rb_node *rb_parent)
453{
454 struct address_space *mapping = NULL;
455
456 if (vma->vm_file)
457 mapping = vma->vm_file->f_mapping;
458
459 if (mapping) {
460 spin_lock(&mapping->i_mmap_lock);
461 vma->vm_truncate_count = mapping->truncate_count;
462 }
463 anon_vma_lock(vma);
464
465 __vma_link(mm, vma, prev, rb_link, rb_parent);
466 __vma_link_file(vma);
467
468 anon_vma_unlock(vma);
469 if (mapping)
470 spin_unlock(&mapping->i_mmap_lock);
471
472 mm->map_count++;
473 validate_mm(mm);
474}
475
476
477
478
479
480
481static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
482{
483 struct vm_area_struct *__vma, *prev;
484 struct rb_node **rb_link, *rb_parent;
485
486 __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
487 BUG_ON(__vma && __vma->vm_start < vma->vm_end);
488 __vma_link(mm, vma, prev, rb_link, rb_parent);
489 mm->map_count++;
490}
491
492static inline void
493__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
494 struct vm_area_struct *prev)
495{
496 struct vm_area_struct *next = vma->vm_next;
497
498 prev->vm_next = next;
499 if (next)
500 next->vm_prev = prev;
501 rb_erase(&vma->vm_rb, &mm->mm_rb);
502 if (mm->mmap_cache == vma)
503 mm->mmap_cache = prev;
504}
505
506
507
508
509
510
511
512
513void vma_adjust(struct vm_area_struct *vma, unsigned long start,
514 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
515{
516 struct mm_struct *mm = vma->vm_mm;
517 struct vm_area_struct *next = vma->vm_next;
518 struct vm_area_struct *importer = NULL;
519 struct address_space *mapping = NULL;
520 struct prio_tree_root *root = NULL;
521 struct file *file = vma->vm_file;
522 struct anon_vma *anon_vma = NULL;
523 long adjust_next = 0;
524 int remove_next = 0;
525
526 if (next && !insert) {
527 if (end >= next->vm_end) {
528
529
530
531
532again: remove_next = 1 + (end > next->vm_end);
533 end = next->vm_end;
534 anon_vma = next->anon_vma;
535 importer = vma;
536 } else if (end > next->vm_start) {
537
538
539
540
541 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
542 anon_vma = next->anon_vma;
543 importer = vma;
544 } else if (end < vma->vm_end) {
545
546
547
548
549
550 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
551 anon_vma = next->anon_vma;
552 importer = next;
553 }
554 }
555
556 if (file) {
557 mapping = file->f_mapping;
558 if (!(vma->vm_flags & VM_NONLINEAR))
559 root = &mapping->i_mmap;
560 spin_lock(&mapping->i_mmap_lock);
561 if (importer &&
562 vma->vm_truncate_count != next->vm_truncate_count) {
563
564
565
566
567 importer->vm_truncate_count = 0;
568 }
569 if (insert) {
570 insert->vm_truncate_count = vma->vm_truncate_count;
571
572
573
574
575
576
577 __vma_link_file(insert);
578 }
579 }
580
581
582
583
584
585 if (vma->anon_vma && (insert || importer || start != vma->vm_start))
586 anon_vma = vma->anon_vma;
587 if (anon_vma) {
588 spin_lock(&anon_vma->lock);
589
590
591
592
593
594 if (importer && !importer->anon_vma) {
595 importer->anon_vma = anon_vma;
596 __anon_vma_link(importer);
597 }
598 }
599
600 if (root) {
601 flush_dcache_mmap_lock(mapping);
602 vma_prio_tree_remove(vma, root);
603 if (adjust_next)
604 vma_prio_tree_remove(next, root);
605 }
606
607 vma->vm_start = start;
608 vma->vm_end = end;
609 vma->vm_pgoff = pgoff;
610 if (adjust_next) {
611 next->vm_start += adjust_next << PAGE_SHIFT;
612 next->vm_pgoff += adjust_next;
613 }
614
615 if (root) {
616 if (adjust_next)
617 vma_prio_tree_insert(next, root);
618 vma_prio_tree_insert(vma, root);
619 flush_dcache_mmap_unlock(mapping);
620 }
621
622 if (remove_next) {
623
624
625
626
627 __vma_unlink(mm, next, vma);
628 if (file)
629 __remove_shared_vm_struct(next, file, mapping);
630 if (next->anon_vma)
631 __anon_vma_merge(vma, next);
632 } else if (insert) {
633
634
635
636
637
638 __insert_vm_struct(mm, insert);
639 }
640
641 if (anon_vma)
642 spin_unlock(&anon_vma->lock);
643 if (mapping)
644 spin_unlock(&mapping->i_mmap_lock);
645
646 if (remove_next) {
647 if (file) {
648 fput(file);
649 if (next->vm_flags & VM_EXECUTABLE)
650 removed_exe_file_vma(mm);
651 }
652 mm->map_count--;
653 mpol_put(vma_policy(next));
654 kmem_cache_free(vm_area_cachep, next);
655
656
657
658
659
660 if (remove_next == 2) {
661 next = vma->vm_next;
662 goto again;
663 }
664 }
665
666 validate_mm(mm);
667}
668
669
670
671
672
673static inline int is_mergeable_vma(struct vm_area_struct *vma,
674 struct file *file, unsigned long vm_flags)
675{
676
677 if ((vma->vm_flags ^ vm_flags) & ~VM_CAN_NONLINEAR)
678 return 0;
679 if (vma->vm_file != file)
680 return 0;
681 if (vma->vm_ops && vma->vm_ops->close)
682 return 0;
683 return 1;
684}
685
686static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
687 struct anon_vma *anon_vma2)
688{
689 return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
690}
691
692
693
694
695
696
697
698
699
700
701
702
703static int
704can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
705 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
706{
707 if (is_mergeable_vma(vma, file, vm_flags) &&
708 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
709 if (vma->vm_pgoff == vm_pgoff)
710 return 1;
711 }
712 return 0;
713}
714
715
716
717
718
719
720
721
722static int
723can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
724 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
725{
726 if (is_mergeable_vma(vma, file, vm_flags) &&
727 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
728 pgoff_t vm_pglen;
729 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
730 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
731 return 1;
732 }
733 return 0;
734}
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765struct vm_area_struct *vma_merge(struct mm_struct *mm,
766 struct vm_area_struct *prev, unsigned long addr,
767 unsigned long end, unsigned long vm_flags,
768 struct anon_vma *anon_vma, struct file *file,
769 pgoff_t pgoff, struct mempolicy *policy)
770{
771 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
772 struct vm_area_struct *area, *next;
773
774
775
776
777
778 if (vm_flags & VM_SPECIAL)
779 return NULL;
780
781 if (prev)
782 next = prev->vm_next;
783 else
784 next = mm->mmap;
785 area = next;
786 if (next && next->vm_end == end)
787 next = next->vm_next;
788
789
790
791
792 if (prev && prev->vm_end == addr &&
793 mpol_equal(vma_policy(prev), policy) &&
794 can_vma_merge_after(prev, vm_flags,
795 anon_vma, file, pgoff)) {
796
797
798
799 if (next && end == next->vm_start &&
800 mpol_equal(policy, vma_policy(next)) &&
801 can_vma_merge_before(next, vm_flags,
802 anon_vma, file, pgoff+pglen) &&
803 is_mergeable_anon_vma(prev->anon_vma,
804 next->anon_vma)) {
805
806 vma_adjust(prev, prev->vm_start,
807 next->vm_end, prev->vm_pgoff, NULL);
808 } else
809 vma_adjust(prev, prev->vm_start,
810 end, prev->vm_pgoff, NULL);
811 return prev;
812 }
813
814
815
816
817 if (next && end == next->vm_start &&
818 mpol_equal(policy, vma_policy(next)) &&
819 can_vma_merge_before(next, vm_flags,
820 anon_vma, file, pgoff+pglen)) {
821 if (prev && addr < prev->vm_end)
822 vma_adjust(prev, prev->vm_start,
823 addr, prev->vm_pgoff, NULL);
824 else
825 vma_adjust(area, addr, next->vm_end,
826 next->vm_pgoff - pglen, NULL);
827 return area;
828 }
829
830 return NULL;
831}
832
833
834
835
836
837
838
839
840
841struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
842{
843 struct vm_area_struct *near;
844 unsigned long vm_flags;
845
846 near = vma->vm_next;
847 if (!near)
848 goto try_prev;
849
850
851
852
853
854
855
856 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
857 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
858
859 if (near->anon_vma && vma->vm_end == near->vm_start &&
860 mpol_equal(vma_policy(vma), vma_policy(near)) &&
861 can_vma_merge_before(near, vm_flags,
862 NULL, vma->vm_file, vma->vm_pgoff +
863 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
864 return near->anon_vma;
865try_prev:
866
867
868
869
870
871
872
873 BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
874 if (!near)
875 goto none;
876
877 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
878 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
879
880 if (near->anon_vma && near->vm_end == vma->vm_start &&
881 mpol_equal(vma_policy(near), vma_policy(vma)) &&
882 can_vma_merge_after(near, vm_flags,
883 NULL, vma->vm_file, vma->vm_pgoff))
884 return near->anon_vma;
885none:
886
887
888
889
890
891
892
893
894 return NULL;
895}
896
897#ifdef CONFIG_PROC_FS
898void vm_stat_account(struct mm_struct *mm, unsigned long flags,
899 struct file *file, long pages)
900{
901 const unsigned long stack_flags
902 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
903
904 if (file) {
905 mm->shared_vm += pages;
906 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
907 mm->exec_vm += pages;
908 } else if (flags & stack_flags)
909 mm->stack_vm += pages;
910 if (flags & (VM_RESERVED|VM_IO))
911 mm->reserved_vm += pages;
912}
913#endif
914
915
916
917
918
919unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
920 unsigned long len, unsigned long prot,
921 unsigned long flags, unsigned long pgoff)
922{
923 struct mm_struct * mm = current->mm;
924 struct inode *inode;
925 unsigned int vm_flags;
926 int error;
927 unsigned long reqprot = prot;
928
929
930
931
932
933
934
935 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
936 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
937 prot |= PROT_EXEC;
938
939 if (!len)
940 return -EINVAL;
941
942 if (!(flags & MAP_FIXED))
943 addr = round_hint_to_min(addr);
944
945
946 len = PAGE_ALIGN(len);
947 if (!len)
948 return -ENOMEM;
949
950
951 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
952 return -EOVERFLOW;
953
954
955 if (mm->map_count > sysctl_max_map_count)
956 return -ENOMEM;
957
958
959
960
961 addr = get_unmapped_area(file, addr, len, pgoff, flags);
962 if (addr & ~PAGE_MASK)
963 return addr;
964
965
966
967
968
969 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
970 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
971
972 if (flags & MAP_LOCKED)
973 if (!can_do_mlock())
974 return -EPERM;
975
976
977 if (vm_flags & VM_LOCKED) {
978 unsigned long locked, lock_limit;
979 locked = len >> PAGE_SHIFT;
980 locked += mm->locked_vm;
981 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
982 lock_limit >>= PAGE_SHIFT;
983 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
984 return -EAGAIN;
985 }
986
987 inode = file ? file->f_path.dentry->d_inode : NULL;
988
989 if (file) {
990 switch (flags & MAP_TYPE) {
991 case MAP_SHARED:
992 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
993 return -EACCES;
994
995
996
997
998
999 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1000 return -EACCES;
1001
1002
1003
1004
1005 if (locks_verify_locked(inode))
1006 return -EAGAIN;
1007
1008 vm_flags |= VM_SHARED | VM_MAYSHARE;
1009 if (!(file->f_mode & FMODE_WRITE))
1010 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1011
1012
1013 case MAP_PRIVATE:
1014 if (!(file->f_mode & FMODE_READ))
1015 return -EACCES;
1016 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1017 if (vm_flags & VM_EXEC)
1018 return -EPERM;
1019 vm_flags &= ~VM_MAYEXEC;
1020 }
1021
1022 if (!file->f_op || !file->f_op->mmap)
1023 return -ENODEV;
1024 break;
1025
1026 default:
1027 return -EINVAL;
1028 }
1029 } else {
1030 switch (flags & MAP_TYPE) {
1031 case MAP_SHARED:
1032
1033
1034
1035 pgoff = 0;
1036 vm_flags |= VM_SHARED | VM_MAYSHARE;
1037 break;
1038 case MAP_PRIVATE:
1039
1040
1041
1042 pgoff = addr >> PAGE_SHIFT;
1043 break;
1044 default:
1045 return -EINVAL;
1046 }
1047 }
1048
1049 error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
1050 if (error)
1051 return error;
1052 error = ima_file_mmap(file, prot);
1053 if (error)
1054 return error;
1055
1056 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1057}
1058EXPORT_SYMBOL(do_mmap_pgoff);
1059
1060
1061
1062
1063
1064
1065
1066int vma_wants_writenotify(struct vm_area_struct *vma)
1067{
1068 unsigned int vm_flags = vma->vm_flags;
1069
1070
1071 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1072 return 0;
1073
1074
1075 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1076 return 1;
1077
1078
1079 if (pgprot_val(vma->vm_page_prot) !=
1080 pgprot_val(vm_get_page_prot(vm_flags)))
1081 return 0;
1082
1083
1084 if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
1085 return 0;
1086
1087
1088 return vma->vm_file && vma->vm_file->f_mapping &&
1089 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1090}
1091
1092
1093
1094
1095
1096static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
1097{
1098
1099
1100
1101
1102 if (file && is_file_hugepages(file))
1103 return 0;
1104
1105 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1106}
1107
1108unsigned long mmap_region(struct file *file, unsigned long addr,
1109 unsigned long len, unsigned long flags,
1110 unsigned int vm_flags, unsigned long pgoff)
1111{
1112 struct mm_struct *mm = current->mm;
1113 struct vm_area_struct *vma, *prev;
1114 int correct_wcount = 0;
1115 int error;
1116 struct rb_node **rb_link, *rb_parent;
1117 unsigned long charged = 0;
1118 struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
1119
1120
1121 error = -ENOMEM;
1122munmap_back:
1123 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
1124 if (vma && vma->vm_start < addr + len) {
1125 if (do_munmap(mm, addr, len))
1126 return -ENOMEM;
1127 goto munmap_back;
1128 }
1129
1130
1131 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1132 return -ENOMEM;
1133
1134
1135
1136
1137
1138 if ((flags & MAP_NORESERVE)) {
1139
1140 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1141 vm_flags |= VM_NORESERVE;
1142
1143
1144 if (file && is_file_hugepages(file))
1145 vm_flags |= VM_NORESERVE;
1146 }
1147
1148
1149
1150
1151 if (accountable_mapping(file, vm_flags)) {
1152 charged = len >> PAGE_SHIFT;
1153 if (security_vm_enough_memory(charged))
1154 return -ENOMEM;
1155 vm_flags |= VM_ACCOUNT;
1156 }
1157
1158
1159
1160
1161 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
1162 if (vma)
1163 goto out;
1164
1165
1166
1167
1168
1169
1170 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1171 if (!vma) {
1172 error = -ENOMEM;
1173 goto unacct_error;
1174 }
1175
1176 vma->vm_mm = mm;
1177 vma->vm_start = addr;
1178 vma->vm_end = addr + len;
1179 vma->vm_flags = vm_flags;
1180 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1181 vma->vm_pgoff = pgoff;
1182
1183 if (file) {
1184 error = -EINVAL;
1185 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1186 goto free_vma;
1187 if (vm_flags & VM_DENYWRITE) {
1188 error = deny_write_access(file);
1189 if (error)
1190 goto free_vma;
1191 correct_wcount = 1;
1192 }
1193 vma->vm_file = file;
1194 get_file(file);
1195 error = file->f_op->mmap(file, vma);
1196 if (error)
1197 goto unmap_and_free_vma;
1198 if (vm_flags & VM_EXECUTABLE)
1199 added_exe_file_vma(mm);
1200
1201
1202
1203
1204
1205
1206 addr = vma->vm_start;
1207 pgoff = vma->vm_pgoff;
1208 vm_flags = vma->vm_flags;
1209 } else if (vm_flags & VM_SHARED) {
1210 error = shmem_zero_setup(vma);
1211 if (error)
1212 goto free_vma;
1213 }
1214
1215 if (vma_wants_writenotify(vma))
1216 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1217
1218 vma_link(mm, vma, prev, rb_link, rb_parent);
1219 file = vma->vm_file;
1220
1221
1222 if (correct_wcount)
1223 atomic_inc(&inode->i_writecount);
1224out:
1225 perf_event_mmap(vma);
1226
1227 mm->total_vm += len >> PAGE_SHIFT;
1228 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1229 if (vm_flags & VM_LOCKED) {
1230
1231
1232
1233 long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
1234 if (nr_pages < 0)
1235 return nr_pages;
1236 mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
1237 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1238 make_pages_present(addr, addr + len);
1239 return addr;
1240
1241unmap_and_free_vma:
1242 if (correct_wcount)
1243 atomic_inc(&inode->i_writecount);
1244 vma->vm_file = NULL;
1245 fput(file);
1246
1247
1248 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1249 charged = 0;
1250free_vma:
1251 kmem_cache_free(vm_area_cachep, vma);
1252unacct_error:
1253 if (charged)
1254 vm_unacct_memory(charged);
1255 return error;
1256}
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269#ifndef HAVE_ARCH_UNMAPPED_AREA
1270unsigned long
1271arch_get_unmapped_area(struct file *filp, unsigned long addr,
1272 unsigned long len, unsigned long pgoff, unsigned long flags)
1273{
1274 struct mm_struct *mm = current->mm;
1275 struct vm_area_struct *vma;
1276 unsigned long start_addr;
1277
1278 if (len > TASK_SIZE)
1279 return -ENOMEM;
1280
1281 if (flags & MAP_FIXED)
1282 return addr;
1283
1284 if (addr) {
1285 addr = PAGE_ALIGN(addr);
1286 vma = find_vma(mm, addr);
1287 if (TASK_SIZE - len >= addr &&
1288 (!vma || addr + len <= vma->vm_start))
1289 return addr;
1290 }
1291 if (len > mm->cached_hole_size) {
1292 start_addr = addr = mm->free_area_cache;
1293 } else {
1294 start_addr = addr = TASK_UNMAPPED_BASE;
1295 mm->cached_hole_size = 0;
1296 }
1297
1298full_search:
1299 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
1300
1301 if (TASK_SIZE - len < addr) {
1302
1303
1304
1305
1306 if (start_addr != TASK_UNMAPPED_BASE) {
1307 addr = TASK_UNMAPPED_BASE;
1308 start_addr = addr;
1309 mm->cached_hole_size = 0;
1310 goto full_search;
1311 }
1312 return -ENOMEM;
1313 }
1314 if (!vma || addr + len <= vma->vm_start) {
1315
1316
1317
1318 mm->free_area_cache = addr + len;
1319 return addr;
1320 }
1321 if (addr + mm->cached_hole_size < vma->vm_start)
1322 mm->cached_hole_size = vma->vm_start - addr;
1323 addr = vma->vm_end;
1324 }
1325}
1326#endif
1327
1328void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1329{
1330
1331
1332
1333 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
1334 mm->free_area_cache = addr;
1335 mm->cached_hole_size = ~0UL;
1336 }
1337}
1338
1339
1340
1341
1342
1343#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1344unsigned long
1345arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1346 const unsigned long len, const unsigned long pgoff,
1347 const unsigned long flags)
1348{
1349 struct vm_area_struct *vma;
1350 struct mm_struct *mm = current->mm;
1351 unsigned long addr = addr0;
1352
1353
1354 if (len > TASK_SIZE)
1355 return -ENOMEM;
1356
1357 if (flags & MAP_FIXED)
1358 return addr;
1359
1360
1361 if (addr) {
1362 addr = PAGE_ALIGN(addr);
1363 vma = find_vma(mm, addr);
1364 if (TASK_SIZE - len >= addr &&
1365 (!vma || addr + len <= vma->vm_start))
1366 return addr;
1367 }
1368
1369
1370 if (len <= mm->cached_hole_size) {
1371 mm->cached_hole_size = 0;
1372 mm->free_area_cache = mm->mmap_base;
1373 }
1374
1375
1376 addr = mm->free_area_cache;
1377
1378
1379 if (addr > len) {
1380 vma = find_vma(mm, addr-len);
1381 if (!vma || addr <= vma->vm_start)
1382
1383 return (mm->free_area_cache = addr-len);
1384 }
1385
1386 if (mm->mmap_base < len)
1387 goto bottomup;
1388
1389 addr = mm->mmap_base-len;
1390
1391 do {
1392
1393
1394
1395
1396
1397 vma = find_vma(mm, addr);
1398 if (!vma || addr+len <= vma->vm_start)
1399
1400 return (mm->free_area_cache = addr);
1401
1402
1403 if (addr + mm->cached_hole_size < vma->vm_start)
1404 mm->cached_hole_size = vma->vm_start - addr;
1405
1406
1407 addr = vma->vm_start-len;
1408 } while (len < vma->vm_start);
1409
1410bottomup:
1411
1412
1413
1414
1415
1416
1417 mm->cached_hole_size = ~0UL;
1418 mm->free_area_cache = TASK_UNMAPPED_BASE;
1419 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
1420
1421
1422
1423 mm->free_area_cache = mm->mmap_base;
1424 mm->cached_hole_size = ~0UL;
1425
1426 return addr;
1427}
1428#endif
1429
1430void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
1431{
1432
1433
1434
1435 if (addr > mm->free_area_cache)
1436 mm->free_area_cache = addr;
1437
1438
1439 if (mm->free_area_cache > mm->mmap_base)
1440 mm->free_area_cache = mm->mmap_base;
1441}
1442
1443unsigned long
1444get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1445 unsigned long pgoff, unsigned long flags)
1446{
1447 unsigned long (*get_area)(struct file *, unsigned long,
1448 unsigned long, unsigned long, unsigned long);
1449
1450 unsigned long error = arch_mmap_check(addr, len, flags);
1451 if (error)
1452 return error;
1453
1454
1455 if (len > TASK_SIZE)
1456 return -ENOMEM;
1457
1458 get_area = current->mm->get_unmapped_area;
1459 if (file && file->f_op && file->f_op->get_unmapped_area)
1460 get_area = file->f_op->get_unmapped_area;
1461 addr = get_area(file, addr, len, pgoff, flags);
1462 if (IS_ERR_VALUE(addr))
1463 return addr;
1464
1465 if (addr > TASK_SIZE - len)
1466 return -ENOMEM;
1467 if (addr & ~PAGE_MASK)
1468 return -EINVAL;
1469
1470 return arch_rebalance_pgtables(addr, len);
1471}
1472
1473EXPORT_SYMBOL(get_unmapped_area);
1474
1475
1476struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1477{
1478 struct vm_area_struct *vma = NULL;
1479
1480 if (mm) {
1481
1482
1483 vma = mm->mmap_cache;
1484 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1485 struct rb_node * rb_node;
1486
1487 rb_node = mm->mm_rb.rb_node;
1488 vma = NULL;
1489
1490 while (rb_node) {
1491 struct vm_area_struct * vma_tmp;
1492
1493 vma_tmp = rb_entry(rb_node,
1494 struct vm_area_struct, vm_rb);
1495
1496 if (vma_tmp->vm_end > addr) {
1497 vma = vma_tmp;
1498 if (vma_tmp->vm_start <= addr)
1499 break;
1500 rb_node = rb_node->rb_left;
1501 } else
1502 rb_node = rb_node->rb_right;
1503 }
1504 if (vma)
1505 mm->mmap_cache = vma;
1506 }
1507 }
1508 return vma;
1509}
1510
1511EXPORT_SYMBOL(find_vma);
1512
1513
1514struct vm_area_struct *
1515find_vma_prev(struct mm_struct *mm, unsigned long addr,
1516 struct vm_area_struct **pprev)
1517{
1518 struct vm_area_struct *vma = NULL, *prev = NULL;
1519 struct rb_node *rb_node;
1520 if (!mm)
1521 goto out;
1522
1523
1524 vma = mm->mmap;
1525
1526
1527 rb_node = mm->mm_rb.rb_node;
1528
1529 while (rb_node) {
1530 struct vm_area_struct *vma_tmp;
1531 vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
1532
1533 if (addr < vma_tmp->vm_end) {
1534 rb_node = rb_node->rb_left;
1535 } else {
1536 prev = vma_tmp;
1537 if (!prev->vm_next || (addr < prev->vm_next->vm_end))
1538 break;
1539 rb_node = rb_node->rb_right;
1540 }
1541 }
1542
1543out:
1544 *pprev = prev;
1545 return prev ? prev->vm_next : vma;
1546}
1547
1548
1549
1550
1551
1552
1553static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
1554{
1555 struct mm_struct *mm = vma->vm_mm;
1556 struct rlimit *rlim = current->signal->rlim;
1557 unsigned long new_start;
1558
1559
1560 if (!may_expand_vm(mm, grow))
1561 return -ENOMEM;
1562
1563
1564 if (size > rlim[RLIMIT_STACK].rlim_cur)
1565 return -ENOMEM;
1566
1567
1568 if (vma->vm_flags & VM_LOCKED) {
1569 unsigned long locked;
1570 unsigned long limit;
1571 locked = mm->locked_vm + grow;
1572 limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
1573 if (locked > limit && !capable(CAP_IPC_LOCK))
1574 return -ENOMEM;
1575 }
1576
1577
1578 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
1579 vma->vm_end - size;
1580 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
1581 return -EFAULT;
1582
1583
1584
1585
1586
1587 if (security_vm_enough_memory_mm(mm, grow))
1588 return -ENOMEM;
1589
1590
1591 mm->total_vm += grow;
1592 if (vma->vm_flags & VM_LOCKED)
1593 mm->locked_vm += grow;
1594 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
1595 return 0;
1596}
1597
1598#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
1599
1600
1601
1602
1603int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1604{
1605 int error;
1606
1607 if (!(vma->vm_flags & VM_GROWSUP))
1608 return -EFAULT;
1609
1610
1611
1612
1613
1614 if (unlikely(anon_vma_prepare(vma)))
1615 return -ENOMEM;
1616 anon_vma_lock(vma);
1617
1618
1619
1620
1621
1622
1623
1624 if (address < PAGE_ALIGN(address+4))
1625 address = PAGE_ALIGN(address+4);
1626 else {
1627 anon_vma_unlock(vma);
1628 return -ENOMEM;
1629 }
1630 error = 0;
1631
1632
1633 if (address > vma->vm_end) {
1634 unsigned long size, grow;
1635
1636 size = address - vma->vm_start;
1637 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1638
1639 error = -ENOMEM;
1640 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
1641 error = acct_stack_growth(vma, size, grow);
1642 if (!error)
1643 vma->vm_end = address;
1644 }
1645 }
1646 anon_vma_unlock(vma);
1647 return error;
1648}
1649#endif
1650
1651
1652
1653
1654static int expand_downwards(struct vm_area_struct *vma,
1655 unsigned long address)
1656{
1657 int error;
1658
1659
1660
1661
1662
1663 if (unlikely(anon_vma_prepare(vma)))
1664 return -ENOMEM;
1665
1666 address &= PAGE_MASK;
1667 error = security_file_mmap(NULL, 0, 0, 0, address, 1);
1668 if (error)
1669 return error;
1670
1671 anon_vma_lock(vma);
1672
1673
1674
1675
1676
1677
1678
1679
1680 if (address < vma->vm_start) {
1681 unsigned long size, grow;
1682
1683 size = vma->vm_end - address;
1684 grow = (vma->vm_start - address) >> PAGE_SHIFT;
1685
1686 error = -ENOMEM;
1687 if (grow <= vma->vm_pgoff) {
1688 error = acct_stack_growth(vma, size, grow);
1689 if (!error) {
1690 vma->vm_start = address;
1691 vma->vm_pgoff -= grow;
1692 }
1693 }
1694 }
1695 anon_vma_unlock(vma);
1696 return error;
1697}
1698
1699int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
1700{
1701 return expand_downwards(vma, address);
1702}
1703
1704#ifdef CONFIG_STACK_GROWSUP
1705int expand_stack(struct vm_area_struct *vma, unsigned long address)
1706{
1707 return expand_upwards(vma, address);
1708}
1709
1710struct vm_area_struct *
1711find_extend_vma(struct mm_struct *mm, unsigned long addr)
1712{
1713 struct vm_area_struct *vma, *prev;
1714
1715 addr &= PAGE_MASK;
1716 vma = find_vma_prev(mm, addr, &prev);
1717 if (vma && (vma->vm_start <= addr))
1718 return vma;
1719 if (!prev || expand_stack(prev, addr))
1720 return NULL;
1721 if (prev->vm_flags & VM_LOCKED) {
1722 if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
1723 return NULL;
1724 }
1725 return prev;
1726}
1727#else
1728int expand_stack(struct vm_area_struct *vma, unsigned long address)
1729{
1730 return expand_downwards(vma, address);
1731}
1732
1733struct vm_area_struct *
1734find_extend_vma(struct mm_struct * mm, unsigned long addr)
1735{
1736 struct vm_area_struct * vma;
1737 unsigned long start;
1738
1739 addr &= PAGE_MASK;
1740 vma = find_vma(mm,addr);
1741 if (!vma)
1742 return NULL;
1743 if (vma->vm_start <= addr)
1744 return vma;
1745 if (!(vma->vm_flags & VM_GROWSDOWN))
1746 return NULL;
1747 start = vma->vm_start;
1748 if (expand_stack(vma, addr))
1749 return NULL;
1750 if (vma->vm_flags & VM_LOCKED) {
1751 if (mlock_vma_pages_range(vma, addr, start) < 0)
1752 return NULL;
1753 }
1754 return vma;
1755}
1756#endif
1757
1758
1759
1760
1761
1762
1763
1764static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
1765{
1766
1767 update_hiwater_vm(mm);
1768 do {
1769 long nrpages = vma_pages(vma);
1770
1771 mm->total_vm -= nrpages;
1772 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
1773 vma = remove_vma(vma);
1774 } while (vma);
1775 validate_mm(mm);
1776}
1777
1778
1779
1780
1781
1782
1783static void unmap_region(struct mm_struct *mm,
1784 struct vm_area_struct *vma, struct vm_area_struct *prev,
1785 unsigned long start, unsigned long end)
1786{
1787 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
1788 struct mmu_gather *tlb;
1789 unsigned long nr_accounted = 0;
1790
1791 lru_add_drain();
1792 tlb = tlb_gather_mmu(mm, 0);
1793 update_hiwater_rss(mm);
1794 unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
1795 vm_unacct_memory(nr_accounted);
1796 free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
1797 next? next->vm_start: 0);
1798 tlb_finish_mmu(tlb, start, end);
1799}
1800
1801
1802
1803
1804
1805static void
1806detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1807 struct vm_area_struct *prev, unsigned long end)
1808{
1809 struct vm_area_struct **insertion_point;
1810 struct vm_area_struct *tail_vma = NULL;
1811 unsigned long addr;
1812
1813 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1814 vma->vm_prev = NULL;
1815 do {
1816 rb_erase(&vma->vm_rb, &mm->mm_rb);
1817 mm->map_count--;
1818 tail_vma = vma;
1819 vma = vma->vm_next;
1820 } while (vma && vma->vm_start < end);
1821 *insertion_point = vma;
1822 if (vma)
1823 vma->vm_prev = prev;
1824 tail_vma->vm_next = NULL;
1825 if (mm->unmap_area == arch_unmap_area)
1826 addr = prev ? prev->vm_end : mm->mmap_base;
1827 else
1828 addr = vma ? vma->vm_start : mm->mmap_base;
1829 mm->unmap_area(mm, addr);
1830 mm->mmap_cache = NULL;
1831}
1832
1833
1834
1835
1836
1837int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1838 unsigned long addr, int new_below)
1839{
1840 struct mempolicy *pol;
1841 struct vm_area_struct *new;
1842
1843 if (is_vm_hugetlb_page(vma) && (addr &
1844 ~(huge_page_mask(hstate_vma(vma)))))
1845 return -EINVAL;
1846
1847 if (mm->map_count >= sysctl_max_map_count)
1848 return -ENOMEM;
1849
1850 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1851 if (!new)
1852 return -ENOMEM;
1853
1854
1855 *new = *vma;
1856
1857 if (new_below)
1858 new->vm_end = addr;
1859 else {
1860 new->vm_start = addr;
1861 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
1862 }
1863
1864 pol = mpol_dup(vma_policy(vma));
1865 if (IS_ERR(pol)) {
1866 kmem_cache_free(vm_area_cachep, new);
1867 return PTR_ERR(pol);
1868 }
1869 vma_set_policy(new, pol);
1870
1871 if (new->vm_file) {
1872 get_file(new->vm_file);
1873 if (vma->vm_flags & VM_EXECUTABLE)
1874 added_exe_file_vma(mm);
1875 }
1876
1877 if (new->vm_ops && new->vm_ops->open)
1878 new->vm_ops->open(new);
1879
1880 if (new_below)
1881 vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1882 ((addr - new->vm_start) >> PAGE_SHIFT), new);
1883 else
1884 vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1885
1886 return 0;
1887}
1888
1889
1890
1891
1892
1893
1894int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1895{
1896 unsigned long end;
1897 struct vm_area_struct *vma, *prev, *last;
1898
1899 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
1900 return -EINVAL;
1901
1902 if ((len = PAGE_ALIGN(len)) == 0)
1903 return -EINVAL;
1904
1905
1906 vma = find_vma_prev(mm, start, &prev);
1907 if (!vma)
1908 return 0;
1909
1910
1911
1912 end = start + len;
1913 if (vma->vm_start >= end)
1914 return 0;
1915
1916
1917
1918
1919
1920
1921
1922
1923 if (start > vma->vm_start) {
1924 int error = split_vma(mm, vma, start, 0);
1925 if (error)
1926 return error;
1927 prev = vma;
1928 }
1929
1930
1931 last = find_vma(mm, end);
1932 if (last && end > last->vm_start) {
1933 int error = split_vma(mm, last, end, 1);
1934 if (error)
1935 return error;
1936 }
1937 vma = prev? prev->vm_next: mm->mmap;
1938
1939
1940
1941
1942 if (mm->locked_vm) {
1943 struct vm_area_struct *tmp = vma;
1944 while (tmp && tmp->vm_start < end) {
1945 if (tmp->vm_flags & VM_LOCKED) {
1946 mm->locked_vm -= vma_pages(tmp);
1947 munlock_vma_pages_all(tmp);
1948 }
1949 tmp = tmp->vm_next;
1950 }
1951 }
1952
1953
1954
1955
1956 detach_vmas_to_be_unmapped(mm, vma, prev, end);
1957 unmap_region(mm, vma, prev, start, end);
1958
1959
1960 remove_vma_list(mm, vma);
1961
1962 return 0;
1963}
1964
1965EXPORT_SYMBOL(do_munmap);
1966
1967SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
1968{
1969 int ret;
1970 struct mm_struct *mm = current->mm;
1971
1972 profile_munmap(addr);
1973
1974 down_write(&mm->mmap_sem);
1975 ret = do_munmap(mm, addr, len);
1976 up_write(&mm->mmap_sem);
1977 return ret;
1978}
1979
1980static inline void verify_mm_writelocked(struct mm_struct *mm)
1981{
1982#ifdef CONFIG_DEBUG_VM
1983 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
1984 WARN_ON(1);
1985 up_read(&mm->mmap_sem);
1986 }
1987#endif
1988}
1989
1990
1991
1992
1993
1994
1995unsigned long do_brk(unsigned long addr, unsigned long len)
1996{
1997 struct mm_struct * mm = current->mm;
1998 struct vm_area_struct * vma, * prev;
1999 unsigned long flags;
2000 struct rb_node ** rb_link, * rb_parent;
2001 pgoff_t pgoff = addr >> PAGE_SHIFT;
2002 int error;
2003
2004 len = PAGE_ALIGN(len);
2005 if (!len)
2006 return addr;
2007
2008 error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
2009 if (error)
2010 return error;
2011
2012 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2013
2014 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2015 if (error & ~PAGE_MASK)
2016 return error;
2017
2018
2019
2020
2021 if (mm->def_flags & VM_LOCKED) {
2022 unsigned long locked, lock_limit;
2023 locked = len >> PAGE_SHIFT;
2024 locked += mm->locked_vm;
2025 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
2026 lock_limit >>= PAGE_SHIFT;
2027 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2028 return -EAGAIN;
2029 }
2030
2031
2032
2033
2034
2035 verify_mm_writelocked(mm);
2036
2037
2038
2039
2040 munmap_back:
2041 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2042 if (vma && vma->vm_start < addr + len) {
2043 if (do_munmap(mm, addr, len))
2044 return -ENOMEM;
2045 goto munmap_back;
2046 }
2047
2048
2049 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2050 return -ENOMEM;
2051
2052 if (mm->map_count > sysctl_max_map_count)
2053 return -ENOMEM;
2054
2055 if (security_vm_enough_memory(len >> PAGE_SHIFT))
2056 return -ENOMEM;
2057
2058
2059 vma = vma_merge(mm, prev, addr, addr + len, flags,
2060 NULL, NULL, pgoff, NULL);
2061 if (vma)
2062 goto out;
2063
2064
2065
2066
2067 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2068 if (!vma) {
2069 vm_unacct_memory(len >> PAGE_SHIFT);
2070 return -ENOMEM;
2071 }
2072
2073 vma->vm_mm = mm;
2074 vma->vm_start = addr;
2075 vma->vm_end = addr + len;
2076 vma->vm_pgoff = pgoff;
2077 vma->vm_flags = flags;
2078 vma->vm_page_prot = vm_get_page_prot(flags);
2079 vma_link(mm, vma, prev, rb_link, rb_parent);
2080out:
2081 mm->total_vm += len >> PAGE_SHIFT;
2082 if (flags & VM_LOCKED) {
2083 if (!mlock_vma_pages_range(vma, addr, addr + len))
2084 mm->locked_vm += (len >> PAGE_SHIFT);
2085 }
2086 return addr;
2087}
2088
2089EXPORT_SYMBOL(do_brk);
2090
2091
2092void exit_mmap(struct mm_struct *mm)
2093{
2094 struct mmu_gather *tlb;
2095 struct vm_area_struct *vma;
2096 unsigned long nr_accounted = 0;
2097 unsigned long end;
2098
2099
2100 mmu_notifier_release(mm);
2101
2102 if (mm->locked_vm) {
2103 vma = mm->mmap;
2104 while (vma) {
2105 if (vma->vm_flags & VM_LOCKED)
2106 munlock_vma_pages_all(vma);
2107 vma = vma->vm_next;
2108 }
2109 }
2110
2111 arch_exit_mmap(mm);
2112
2113 vma = mm->mmap;
2114 if (!vma)
2115 return;
2116
2117 lru_add_drain();
2118 flush_cache_mm(mm);
2119 tlb = tlb_gather_mmu(mm, 1);
2120
2121
2122 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
2123 vm_unacct_memory(nr_accounted);
2124
2125 free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
2126 tlb_finish_mmu(tlb, 0, end);
2127
2128
2129
2130
2131
2132 while (vma)
2133 vma = remove_vma(vma);
2134
2135 BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2136}
2137
2138
2139
2140
2141
2142int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2143{
2144 struct vm_area_struct * __vma, * prev;
2145 struct rb_node ** rb_link, * rb_parent;
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159 if (!vma->vm_file) {
2160 BUG_ON(vma->anon_vma);
2161 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2162 }
2163 __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
2164 if (__vma && __vma->vm_start < vma->vm_end)
2165 return -ENOMEM;
2166 if ((vma->vm_flags & VM_ACCOUNT) &&
2167 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2168 return -ENOMEM;
2169 vma_link(mm, vma, prev, rb_link, rb_parent);
2170 return 0;
2171}
2172
2173
2174
2175
2176
2177struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2178 unsigned long addr, unsigned long len, pgoff_t pgoff)
2179{
2180 struct vm_area_struct *vma = *vmap;
2181 unsigned long vma_start = vma->vm_start;
2182 struct mm_struct *mm = vma->vm_mm;
2183 struct vm_area_struct *new_vma, *prev;
2184 struct rb_node **rb_link, *rb_parent;
2185 struct mempolicy *pol;
2186
2187
2188
2189
2190
2191 if (!vma->vm_file && !vma->anon_vma)
2192 pgoff = addr >> PAGE_SHIFT;
2193
2194 find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2195 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2196 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2197 if (new_vma) {
2198
2199
2200
2201 if (vma_start >= new_vma->vm_start &&
2202 vma_start < new_vma->vm_end)
2203 *vmap = new_vma;
2204 } else {
2205 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2206 if (new_vma) {
2207 *new_vma = *vma;
2208 pol = mpol_dup(vma_policy(vma));
2209 if (IS_ERR(pol)) {
2210 kmem_cache_free(vm_area_cachep, new_vma);
2211 return NULL;
2212 }
2213 vma_set_policy(new_vma, pol);
2214 new_vma->vm_start = addr;
2215 new_vma->vm_end = addr + len;
2216 new_vma->vm_pgoff = pgoff;
2217 if (new_vma->vm_file) {
2218 get_file(new_vma->vm_file);
2219 if (vma->vm_flags & VM_EXECUTABLE)
2220 added_exe_file_vma(mm);
2221 }
2222 if (new_vma->vm_ops && new_vma->vm_ops->open)
2223 new_vma->vm_ops->open(new_vma);
2224 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2225 }
2226 }
2227 return new_vma;
2228}
2229
2230
2231
2232
2233
2234int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2235{
2236 unsigned long cur = mm->total_vm;
2237 unsigned long lim;
2238
2239 lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
2240
2241 if (cur + npages > lim)
2242 return 0;
2243 return 1;
2244}
2245
2246
2247static int special_mapping_fault(struct vm_area_struct *vma,
2248 struct vm_fault *vmf)
2249{
2250 pgoff_t pgoff;
2251 struct page **pages;
2252
2253
2254
2255
2256
2257
2258
2259 pgoff = vmf->pgoff - vma->vm_pgoff;
2260
2261 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
2262 pgoff--;
2263
2264 if (*pages) {
2265 struct page *page = *pages;
2266 get_page(page);
2267 vmf->page = page;
2268 return 0;
2269 }
2270
2271 return VM_FAULT_SIGBUS;
2272}
2273
2274
2275
2276
2277static void special_mapping_close(struct vm_area_struct *vma)
2278{
2279}
2280
2281static const struct vm_operations_struct special_mapping_vmops = {
2282 .close = special_mapping_close,
2283 .fault = special_mapping_fault,
2284};
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295int install_special_mapping(struct mm_struct *mm,
2296 unsigned long addr, unsigned long len,
2297 unsigned long vm_flags, struct page **pages)
2298{
2299 int ret;
2300 struct vm_area_struct *vma;
2301
2302 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2303 if (unlikely(vma == NULL))
2304 return -ENOMEM;
2305
2306 vma->vm_mm = mm;
2307 vma->vm_start = addr;
2308 vma->vm_end = addr + len;
2309
2310 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
2311 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2312
2313 vma->vm_ops = &special_mapping_vmops;
2314 vma->vm_private_data = pages;
2315
2316 ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1);
2317 if (ret)
2318 goto out;
2319
2320 ret = insert_vm_struct(mm, vma);
2321 if (ret)
2322 goto out;
2323
2324 mm->total_vm += len >> PAGE_SHIFT;
2325
2326 perf_event_mmap(vma);
2327
2328 return 0;
2329
2330out:
2331 kmem_cache_free(vm_area_cachep, vma);
2332 return ret;
2333}
2334
2335static DEFINE_MUTEX(mm_all_locks_mutex);
2336
2337static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2338{
2339 if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2340
2341
2342
2343
2344 spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354 if (__test_and_set_bit(0, (unsigned long *)
2355 &anon_vma->head.next))
2356 BUG();
2357 }
2358}
2359
2360static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2361{
2362 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2373 BUG();
2374 spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
2375 }
2376}
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410int mm_take_all_locks(struct mm_struct *mm)
2411{
2412 struct vm_area_struct *vma;
2413 int ret = -EINTR;
2414
2415 BUG_ON(down_read_trylock(&mm->mmap_sem));
2416
2417 mutex_lock(&mm_all_locks_mutex);
2418
2419 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2420 if (signal_pending(current))
2421 goto out_unlock;
2422 if (vma->vm_file && vma->vm_file->f_mapping)
2423 vm_lock_mapping(mm, vma->vm_file->f_mapping);
2424 }
2425
2426 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2427 if (signal_pending(current))
2428 goto out_unlock;
2429 if (vma->anon_vma)
2430 vm_lock_anon_vma(mm, vma->anon_vma);
2431 }
2432
2433 ret = 0;
2434
2435out_unlock:
2436 if (ret)
2437 mm_drop_all_locks(mm);
2438
2439 return ret;
2440}
2441
2442static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2443{
2444 if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457 if (!__test_and_clear_bit(0, (unsigned long *)
2458 &anon_vma->head.next))
2459 BUG();
2460 spin_unlock(&anon_vma->lock);
2461 }
2462}
2463
2464static void vm_unlock_mapping(struct address_space *mapping)
2465{
2466 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2467
2468
2469
2470
2471 spin_unlock(&mapping->i_mmap_lock);
2472 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
2473 &mapping->flags))
2474 BUG();
2475 }
2476}
2477
2478
2479
2480
2481
2482void mm_drop_all_locks(struct mm_struct *mm)
2483{
2484 struct vm_area_struct *vma;
2485
2486 BUG_ON(down_read_trylock(&mm->mmap_sem));
2487 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2488
2489 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2490 if (vma->anon_vma)
2491 vm_unlock_anon_vma(vma->anon_vma);
2492 if (vma->vm_file && vma->vm_file->f_mapping)
2493 vm_unlock_mapping(vma->vm_file->f_mapping);
2494 }
2495
2496 mutex_unlock(&mm_all_locks_mutex);
2497}
2498
2499
2500
2501
2502void __init mmap_init(void)
2503{
2504 int ret;
2505
2506 ret = percpu_counter_init(&vm_committed_as, 0);
2507 VM_BUG_ON(ret);
2508}
2509