1
2
3
4
5
6
7
8
9#include <linux/slab.h>
10#include <linux/backing-dev.h>
11#include <linux/mm.h>
12#include <linux/shm.h>
13#include <linux/mman.h>
14#include <linux/pagemap.h>
15#include <linux/swap.h>
16#include <linux/syscalls.h>
17#include <linux/capability.h>
18#include <linux/init.h>
19#include <linux/file.h>
20#include <linux/fs.h>
21#include <linux/personality.h>
22#include <linux/security.h>
23#include <linux/hugetlb.h>
24#include <linux/profile.h>
25#include <linux/module.h>
26#include <linux/mount.h>
27#include <linux/mempolicy.h>
28#include <linux/rmap.h>
29#include <linux/mmu_notifier.h>
30#include <linux/perf_event.h>
31
32#include <asm/uaccess.h>
33#include <asm/cacheflush.h>
34#include <asm/tlb.h>
35#include <asm/mmu_context.h>
36
37#include "internal.h"
38
39#ifndef arch_mmap_check
40#define arch_mmap_check(addr, len, flags) (0)
41#endif
42
43#ifndef arch_rebalance_pgtables
44#define arch_rebalance_pgtables(addr, len) (addr)
45#endif
46
47static void unmap_region(struct mm_struct *mm,
48 struct vm_area_struct *vma, struct vm_area_struct *prev,
49 unsigned long start, unsigned long end);
50
51
52
53
54
55#undef DEBUG_MM_RB
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72pgprot_t protection_map[16] = {
73 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
74 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
75};
76
77pgprot_t vm_get_page_prot(unsigned long vm_flags)
78{
79 return __pgprot(pgprot_val(protection_map[vm_flags &
80 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
81 pgprot_val(arch_vm_get_page_prot(vm_flags)));
82}
83EXPORT_SYMBOL(vm_get_page_prot);
84
85int sysctl_overcommit_memory = OVERCOMMIT_GUESS;
86int sysctl_overcommit_ratio = 50;
87int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
88struct percpu_counter vm_committed_as;
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
107{
108 unsigned long free, allowed;
109
110 vm_acct_memory(pages);
111
112
113
114
115 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
116 return 0;
117
118 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
119 unsigned long n;
120
121 free = global_page_state(NR_FILE_PAGES);
122 free += nr_swap_pages;
123
124
125
126
127
128
129
130 free += global_page_state(NR_SLAB_RECLAIMABLE);
131
132
133
134
135 if (!cap_sys_admin)
136 free -= free / 32;
137
138 if (free > pages)
139 return 0;
140
141
142
143
144
145 n = nr_free_pages();
146
147
148
149
150 if (n <= totalreserve_pages)
151 goto error;
152 else
153 n -= totalreserve_pages;
154
155
156
157
158 if (!cap_sys_admin)
159 n -= n / 32;
160 free += n;
161
162 if (free > pages)
163 return 0;
164
165 goto error;
166 }
167
168 allowed = (totalram_pages - hugetlb_total_pages())
169 * sysctl_overcommit_ratio / 100;
170
171
172
173 if (!cap_sys_admin)
174 allowed -= allowed / 32;
175 allowed += total_swap_pages;
176
177
178
179 if (mm)
180 allowed -= mm->total_vm / 32;
181
182 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
183 return 0;
184error:
185 vm_unacct_memory(pages);
186
187 return -ENOMEM;
188}
189
190
191
192
193static void __remove_shared_vm_struct(struct vm_area_struct *vma,
194 struct file *file, struct address_space *mapping)
195{
196 if (vma->vm_flags & VM_DENYWRITE)
197 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
198 if (vma->vm_flags & VM_SHARED)
199 mapping->i_mmap_writable--;
200
201 flush_dcache_mmap_lock(mapping);
202 if (unlikely(vma->vm_flags & VM_NONLINEAR))
203 list_del_init(&vma->shared.vm_set.list);
204 else
205 vma_prio_tree_remove(vma, &mapping->i_mmap);
206 flush_dcache_mmap_unlock(mapping);
207}
208
209
210
211
212
213void unlink_file_vma(struct vm_area_struct *vma)
214{
215 struct file *file = vma->vm_file;
216
217 if (file) {
218 struct address_space *mapping = file->f_mapping;
219 spin_lock(&mapping->i_mmap_lock);
220 __remove_shared_vm_struct(vma, file, mapping);
221 spin_unlock(&mapping->i_mmap_lock);
222 }
223}
224
225
226
227
228static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
229{
230 struct vm_area_struct *next = vma->vm_next;
231
232 might_sleep();
233 if (vma->vm_ops && vma->vm_ops->close)
234 vma->vm_ops->close(vma);
235 if (vma->vm_file) {
236 fput(vma->vm_file);
237 if (vma->vm_flags & VM_EXECUTABLE)
238 removed_exe_file_vma(vma->vm_mm);
239 }
240 mpol_put(vma_policy(vma));
241 kmem_cache_free(vm_area_cachep, vma);
242 return next;
243}
244
245SYSCALL_DEFINE1(brk, unsigned long, brk)
246{
247 unsigned long rlim, retval;
248 unsigned long newbrk, oldbrk;
249 struct mm_struct *mm = current->mm;
250 unsigned long min_brk;
251
252 down_write(&mm->mmap_sem);
253
254#ifdef CONFIG_COMPAT_BRK
255 min_brk = mm->end_code;
256#else
257 min_brk = mm->start_brk;
258#endif
259 if (brk < min_brk)
260 goto out;
261
262
263
264
265
266
267
268 rlim = rlimit(RLIMIT_DATA);
269 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
270 (mm->end_data - mm->start_data) > rlim)
271 goto out;
272
273 newbrk = PAGE_ALIGN(brk);
274 oldbrk = PAGE_ALIGN(mm->brk);
275 if (oldbrk == newbrk)
276 goto set_brk;
277
278
279 if (brk <= mm->brk) {
280 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
281 goto set_brk;
282 goto out;
283 }
284
285
286 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
287 goto out;
288
289
290 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
291 goto out;
292set_brk:
293 mm->brk = brk;
294out:
295 retval = mm->brk;
296 up_write(&mm->mmap_sem);
297 return retval;
298}
299
300#ifdef DEBUG_MM_RB
301static int browse_rb(struct rb_root *root)
302{
303 int i = 0, j;
304 struct rb_node *nd, *pn = NULL;
305 unsigned long prev = 0, pend = 0;
306
307 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
308 struct vm_area_struct *vma;
309 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
310 if (vma->vm_start < prev)
311 printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
312 if (vma->vm_start < pend)
313 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
314 if (vma->vm_start > vma->vm_end)
315 printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
316 i++;
317 pn = nd;
318 prev = vma->vm_start;
319 pend = vma->vm_end;
320 }
321 j = 0;
322 for (nd = pn; nd; nd = rb_prev(nd)) {
323 j++;
324 }
325 if (i != j)
326 printk("backwards %d, forwards %d\n", j, i), i = 0;
327 return i;
328}
329
330void validate_mm(struct mm_struct *mm)
331{
332 int bug = 0;
333 int i = 0;
334 struct vm_area_struct *tmp = mm->mmap;
335 while (tmp) {
336 tmp = tmp->vm_next;
337 i++;
338 }
339 if (i != mm->map_count)
340 printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
341 i = browse_rb(&mm->mm_rb);
342 if (i != mm->map_count)
343 printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
344 BUG_ON(bug);
345}
346#else
347#define validate_mm(mm) do { } while (0)
348#endif
349
350static struct vm_area_struct *
351find_vma_prepare(struct mm_struct *mm, unsigned long addr,
352 struct vm_area_struct **pprev, struct rb_node ***rb_link,
353 struct rb_node ** rb_parent)
354{
355 struct vm_area_struct * vma;
356 struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
357
358 __rb_link = &mm->mm_rb.rb_node;
359 rb_prev = __rb_parent = NULL;
360 vma = NULL;
361
362 while (*__rb_link) {
363 struct vm_area_struct *vma_tmp;
364
365 __rb_parent = *__rb_link;
366 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
367
368 if (vma_tmp->vm_end > addr) {
369 vma = vma_tmp;
370 if (vma_tmp->vm_start <= addr)
371 break;
372 __rb_link = &__rb_parent->rb_left;
373 } else {
374 rb_prev = __rb_parent;
375 __rb_link = &__rb_parent->rb_right;
376 }
377 }
378
379 *pprev = NULL;
380 if (rb_prev)
381 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
382 *rb_link = __rb_link;
383 *rb_parent = __rb_parent;
384 return vma;
385}
386
387static inline void
388__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
389 struct vm_area_struct *prev, struct rb_node *rb_parent)
390{
391 if (prev) {
392 vma->vm_next = prev->vm_next;
393 prev->vm_next = vma;
394 } else {
395 mm->mmap = vma;
396 if (rb_parent)
397 vma->vm_next = rb_entry(rb_parent,
398 struct vm_area_struct, vm_rb);
399 else
400 vma->vm_next = NULL;
401 }
402}
403
404void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
405 struct rb_node **rb_link, struct rb_node *rb_parent)
406{
407 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
408 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
409}
410
411static void __vma_link_file(struct vm_area_struct *vma)
412{
413 struct file *file;
414
415 file = vma->vm_file;
416 if (file) {
417 struct address_space *mapping = file->f_mapping;
418
419 if (vma->vm_flags & VM_DENYWRITE)
420 atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
421 if (vma->vm_flags & VM_SHARED)
422 mapping->i_mmap_writable++;
423
424 flush_dcache_mmap_lock(mapping);
425 if (unlikely(vma->vm_flags & VM_NONLINEAR))
426 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
427 else
428 vma_prio_tree_insert(vma, &mapping->i_mmap);
429 flush_dcache_mmap_unlock(mapping);
430 }
431}
432
433static void
434__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
435 struct vm_area_struct *prev, struct rb_node **rb_link,
436 struct rb_node *rb_parent)
437{
438 __vma_link_list(mm, vma, prev, rb_parent);
439 __vma_link_rb(mm, vma, rb_link, rb_parent);
440}
441
442static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
443 struct vm_area_struct *prev, struct rb_node **rb_link,
444 struct rb_node *rb_parent)
445{
446 struct address_space *mapping = NULL;
447
448 if (vma->vm_file)
449 mapping = vma->vm_file->f_mapping;
450
451 if (mapping) {
452 spin_lock(&mapping->i_mmap_lock);
453 vma->vm_truncate_count = mapping->truncate_count;
454 }
455 anon_vma_lock(vma);
456
457 __vma_link(mm, vma, prev, rb_link, rb_parent);
458 __vma_link_file(vma);
459
460 anon_vma_unlock(vma);
461 if (mapping)
462 spin_unlock(&mapping->i_mmap_lock);
463
464 mm->map_count++;
465 validate_mm(mm);
466}
467
468
469
470
471
472
473static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
474{
475 struct vm_area_struct *__vma, *prev;
476 struct rb_node **rb_link, *rb_parent;
477
478 __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
479 BUG_ON(__vma && __vma->vm_start < vma->vm_end);
480 __vma_link(mm, vma, prev, rb_link, rb_parent);
481 mm->map_count++;
482}
483
484static inline void
485__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
486 struct vm_area_struct *prev)
487{
488 prev->vm_next = vma->vm_next;
489 rb_erase(&vma->vm_rb, &mm->mm_rb);
490 if (mm->mmap_cache == vma)
491 mm->mmap_cache = prev;
492}
493
494
495
496
497
498
499
500
501int vma_adjust(struct vm_area_struct *vma, unsigned long start,
502 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
503{
504 struct mm_struct *mm = vma->vm_mm;
505 struct vm_area_struct *next = vma->vm_next;
506 struct vm_area_struct *importer = NULL;
507 struct address_space *mapping = NULL;
508 struct prio_tree_root *root = NULL;
509 struct file *file = vma->vm_file;
510 long adjust_next = 0;
511 int remove_next = 0;
512
513 if (next && !insert) {
514 struct vm_area_struct *exporter = NULL;
515
516 if (end >= next->vm_end) {
517
518
519
520
521again: remove_next = 1 + (end > next->vm_end);
522 end = next->vm_end;
523 exporter = next;
524 importer = vma;
525 } else if (end > next->vm_start) {
526
527
528
529
530 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
531 exporter = next;
532 importer = vma;
533 } else if (end < vma->vm_end) {
534
535
536
537
538
539 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
540 exporter = vma;
541 importer = next;
542 }
543
544
545
546
547
548
549 if (exporter && exporter->anon_vma && !importer->anon_vma) {
550 if (anon_vma_clone(importer, exporter))
551 return -ENOMEM;
552 importer->anon_vma = exporter->anon_vma;
553 }
554 }
555
556 if (file) {
557 mapping = file->f_mapping;
558 if (!(vma->vm_flags & VM_NONLINEAR))
559 root = &mapping->i_mmap;
560 spin_lock(&mapping->i_mmap_lock);
561 if (importer &&
562 vma->vm_truncate_count != next->vm_truncate_count) {
563
564
565
566
567 importer->vm_truncate_count = 0;
568 }
569 if (insert) {
570 insert->vm_truncate_count = vma->vm_truncate_count;
571
572
573
574
575
576
577 __vma_link_file(insert);
578 }
579 }
580
581 if (root) {
582 flush_dcache_mmap_lock(mapping);
583 vma_prio_tree_remove(vma, root);
584 if (adjust_next)
585 vma_prio_tree_remove(next, root);
586 }
587
588 vma->vm_start = start;
589 vma->vm_end = end;
590 vma->vm_pgoff = pgoff;
591 if (adjust_next) {
592 next->vm_start += adjust_next << PAGE_SHIFT;
593 next->vm_pgoff += adjust_next;
594 }
595
596 if (root) {
597 if (adjust_next)
598 vma_prio_tree_insert(next, root);
599 vma_prio_tree_insert(vma, root);
600 flush_dcache_mmap_unlock(mapping);
601 }
602
603 if (remove_next) {
604
605
606
607
608 __vma_unlink(mm, next, vma);
609 if (file)
610 __remove_shared_vm_struct(next, file, mapping);
611 } else if (insert) {
612
613
614
615
616
617 __insert_vm_struct(mm, insert);
618 }
619
620 if (mapping)
621 spin_unlock(&mapping->i_mmap_lock);
622
623 if (remove_next) {
624 if (file) {
625 fput(file);
626 if (next->vm_flags & VM_EXECUTABLE)
627 removed_exe_file_vma(mm);
628 }
629 if (next->anon_vma)
630 anon_vma_merge(vma, next);
631 mm->map_count--;
632 mpol_put(vma_policy(next));
633 kmem_cache_free(vm_area_cachep, next);
634
635
636
637
638
639 if (remove_next == 2) {
640 next = vma->vm_next;
641 goto again;
642 }
643 }
644
645 validate_mm(mm);
646
647 return 0;
648}
649
650
651
652
653
654static inline int is_mergeable_vma(struct vm_area_struct *vma,
655 struct file *file, unsigned long vm_flags)
656{
657
658 if ((vma->vm_flags ^ vm_flags) & ~VM_CAN_NONLINEAR)
659 return 0;
660 if (vma->vm_file != file)
661 return 0;
662 if (vma->vm_ops && vma->vm_ops->close)
663 return 0;
664 return 1;
665}
666
667static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
668 struct anon_vma *anon_vma2)
669{
670 return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
671}
672
673
674
675
676
677
678
679
680
681
682
683
684static int
685can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
686 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
687{
688 if (is_mergeable_vma(vma, file, vm_flags) &&
689 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
690 if (vma->vm_pgoff == vm_pgoff)
691 return 1;
692 }
693 return 0;
694}
695
696
697
698
699
700
701
702
703static int
704can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
705 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
706{
707 if (is_mergeable_vma(vma, file, vm_flags) &&
708 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
709 pgoff_t vm_pglen;
710 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
711 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
712 return 1;
713 }
714 return 0;
715}
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746struct vm_area_struct *vma_merge(struct mm_struct *mm,
747 struct vm_area_struct *prev, unsigned long addr,
748 unsigned long end, unsigned long vm_flags,
749 struct anon_vma *anon_vma, struct file *file,
750 pgoff_t pgoff, struct mempolicy *policy)
751{
752 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
753 struct vm_area_struct *area, *next;
754 int err;
755
756
757
758
759
760 if (vm_flags & VM_SPECIAL)
761 return NULL;
762
763 if (prev)
764 next = prev->vm_next;
765 else
766 next = mm->mmap;
767 area = next;
768 if (next && next->vm_end == end)
769 next = next->vm_next;
770
771
772
773
774 if (prev && prev->vm_end == addr &&
775 mpol_equal(vma_policy(prev), policy) &&
776 can_vma_merge_after(prev, vm_flags,
777 anon_vma, file, pgoff)) {
778
779
780
781 if (next && end == next->vm_start &&
782 mpol_equal(policy, vma_policy(next)) &&
783 can_vma_merge_before(next, vm_flags,
784 anon_vma, file, pgoff+pglen) &&
785 is_mergeable_anon_vma(prev->anon_vma,
786 next->anon_vma)) {
787
788 err = vma_adjust(prev, prev->vm_start,
789 next->vm_end, prev->vm_pgoff, NULL);
790 } else
791 err = vma_adjust(prev, prev->vm_start,
792 end, prev->vm_pgoff, NULL);
793 if (err)
794 return NULL;
795 return prev;
796 }
797
798
799
800
801 if (next && end == next->vm_start &&
802 mpol_equal(policy, vma_policy(next)) &&
803 can_vma_merge_before(next, vm_flags,
804 anon_vma, file, pgoff+pglen)) {
805 if (prev && addr < prev->vm_end)
806 err = vma_adjust(prev, prev->vm_start,
807 addr, prev->vm_pgoff, NULL);
808 else
809 err = vma_adjust(area, addr, next->vm_end,
810 next->vm_pgoff - pglen, NULL);
811 if (err)
812 return NULL;
813 return area;
814 }
815
816 return NULL;
817}
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
833{
834 return a->vm_end == b->vm_start &&
835 mpol_equal(vma_policy(a), vma_policy(b)) &&
836 a->vm_file == b->vm_file &&
837 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
838 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
839}
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
864{
865 if (anon_vma_compatible(a, b)) {
866 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
867
868 if (anon_vma && list_is_singular(&old->anon_vma_chain))
869 return anon_vma;
870 }
871 return NULL;
872}
873
874
875
876
877
878
879
880
881
882struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
883{
884 struct anon_vma *anon_vma;
885 struct vm_area_struct *near;
886
887 near = vma->vm_next;
888 if (!near)
889 goto try_prev;
890
891 anon_vma = reusable_anon_vma(near, vma, near);
892 if (anon_vma)
893 return anon_vma;
894try_prev:
895
896
897
898
899
900
901
902 BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
903 if (!near)
904 goto none;
905
906 anon_vma = reusable_anon_vma(near, near, vma);
907 if (anon_vma)
908 return anon_vma;
909none:
910
911
912
913
914
915
916
917
918 return NULL;
919}
920
921#ifdef CONFIG_PROC_FS
922void vm_stat_account(struct mm_struct *mm, unsigned long flags,
923 struct file *file, long pages)
924{
925 const unsigned long stack_flags
926 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
927
928 if (file) {
929 mm->shared_vm += pages;
930 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
931 mm->exec_vm += pages;
932 } else if (flags & stack_flags)
933 mm->stack_vm += pages;
934 if (flags & (VM_RESERVED|VM_IO))
935 mm->reserved_vm += pages;
936}
937#endif
938
939
940
941
942
943unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
944 unsigned long len, unsigned long prot,
945 unsigned long flags, unsigned long pgoff)
946{
947 struct mm_struct * mm = current->mm;
948 struct inode *inode;
949 unsigned int vm_flags;
950 int error;
951 unsigned long reqprot = prot;
952
953
954
955
956
957
958
959 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
960 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
961 prot |= PROT_EXEC;
962
963 if (!len)
964 return -EINVAL;
965
966 if (!(flags & MAP_FIXED))
967 addr = round_hint_to_min(addr);
968
969
970 len = PAGE_ALIGN(len);
971 if (!len)
972 return -ENOMEM;
973
974
975 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
976 return -EOVERFLOW;
977
978
979 if (mm->map_count > sysctl_max_map_count)
980 return -ENOMEM;
981
982
983
984
985 addr = get_unmapped_area(file, addr, len, pgoff, flags);
986 if (addr & ~PAGE_MASK)
987 return addr;
988
989
990
991
992
993 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
994 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
995
996 if (flags & MAP_LOCKED)
997 if (!can_do_mlock())
998 return -EPERM;
999
1000
1001 if (vm_flags & VM_LOCKED) {
1002 unsigned long locked, lock_limit;
1003 locked = len >> PAGE_SHIFT;
1004 locked += mm->locked_vm;
1005 lock_limit = rlimit(RLIMIT_MEMLOCK);
1006 lock_limit >>= PAGE_SHIFT;
1007 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1008 return -EAGAIN;
1009 }
1010
1011 inode = file ? file->f_path.dentry->d_inode : NULL;
1012
1013 if (file) {
1014 switch (flags & MAP_TYPE) {
1015 case MAP_SHARED:
1016 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1017 return -EACCES;
1018
1019
1020
1021
1022
1023 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1024 return -EACCES;
1025
1026
1027
1028
1029 if (locks_verify_locked(inode))
1030 return -EAGAIN;
1031
1032 vm_flags |= VM_SHARED | VM_MAYSHARE;
1033 if (!(file->f_mode & FMODE_WRITE))
1034 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1035
1036
1037 case MAP_PRIVATE:
1038 if (!(file->f_mode & FMODE_READ))
1039 return -EACCES;
1040 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1041 if (vm_flags & VM_EXEC)
1042 return -EPERM;
1043 vm_flags &= ~VM_MAYEXEC;
1044 }
1045
1046 if (!file->f_op || !file->f_op->mmap)
1047 return -ENODEV;
1048 break;
1049
1050 default:
1051 return -EINVAL;
1052 }
1053 } else {
1054 switch (flags & MAP_TYPE) {
1055 case MAP_SHARED:
1056
1057
1058
1059 pgoff = 0;
1060 vm_flags |= VM_SHARED | VM_MAYSHARE;
1061 break;
1062 case MAP_PRIVATE:
1063
1064
1065
1066 pgoff = addr >> PAGE_SHIFT;
1067 break;
1068 default:
1069 return -EINVAL;
1070 }
1071 }
1072
1073 error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
1074 if (error)
1075 return error;
1076
1077 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1078}
1079EXPORT_SYMBOL(do_mmap_pgoff);
1080
1081SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1082 unsigned long, prot, unsigned long, flags,
1083 unsigned long, fd, unsigned long, pgoff)
1084{
1085 struct file *file = NULL;
1086 unsigned long retval = -EBADF;
1087
1088 if (!(flags & MAP_ANONYMOUS)) {
1089 if (unlikely(flags & MAP_HUGETLB))
1090 return -EINVAL;
1091 file = fget(fd);
1092 if (!file)
1093 goto out;
1094 } else if (flags & MAP_HUGETLB) {
1095 struct user_struct *user = NULL;
1096
1097
1098
1099
1100
1101
1102 len = ALIGN(len, huge_page_size(&default_hstate));
1103 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
1104 &user, HUGETLB_ANONHUGE_INODE);
1105 if (IS_ERR(file))
1106 return PTR_ERR(file);
1107 }
1108
1109 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1110
1111 down_write(¤t->mm->mmap_sem);
1112 retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1113 up_write(¤t->mm->mmap_sem);
1114
1115 if (file)
1116 fput(file);
1117out:
1118 return retval;
1119}
1120
1121#ifdef __ARCH_WANT_SYS_OLD_MMAP
1122struct mmap_arg_struct {
1123 unsigned long addr;
1124 unsigned long len;
1125 unsigned long prot;
1126 unsigned long flags;
1127 unsigned long fd;
1128 unsigned long offset;
1129};
1130
1131SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1132{
1133 struct mmap_arg_struct a;
1134
1135 if (copy_from_user(&a, arg, sizeof(a)))
1136 return -EFAULT;
1137 if (a.offset & ~PAGE_MASK)
1138 return -EINVAL;
1139
1140 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1141 a.offset >> PAGE_SHIFT);
1142}
1143#endif
1144
1145
1146
1147
1148
1149
1150
1151int vma_wants_writenotify(struct vm_area_struct *vma)
1152{
1153 unsigned int vm_flags = vma->vm_flags;
1154
1155
1156 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1157 return 0;
1158
1159
1160 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1161 return 1;
1162
1163
1164 if (pgprot_val(vma->vm_page_prot) !=
1165 pgprot_val(vm_get_page_prot(vm_flags)))
1166 return 0;
1167
1168
1169 if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
1170 return 0;
1171
1172
1173 return vma->vm_file && vma->vm_file->f_mapping &&
1174 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1175}
1176
1177
1178
1179
1180
1181static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
1182{
1183
1184
1185
1186
1187 if (file && is_file_hugepages(file))
1188 return 0;
1189
1190 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1191}
1192
1193unsigned long mmap_region(struct file *file, unsigned long addr,
1194 unsigned long len, unsigned long flags,
1195 unsigned int vm_flags, unsigned long pgoff)
1196{
1197 struct mm_struct *mm = current->mm;
1198 struct vm_area_struct *vma, *prev;
1199 int correct_wcount = 0;
1200 int error;
1201 struct rb_node **rb_link, *rb_parent;
1202 unsigned long charged = 0;
1203 struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
1204
1205
1206 error = -ENOMEM;
1207munmap_back:
1208 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
1209 if (vma && vma->vm_start < addr + len) {
1210 if (do_munmap(mm, addr, len))
1211 return -ENOMEM;
1212 goto munmap_back;
1213 }
1214
1215
1216 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1217 return -ENOMEM;
1218
1219
1220
1221
1222
1223 if ((flags & MAP_NORESERVE)) {
1224
1225 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1226 vm_flags |= VM_NORESERVE;
1227
1228
1229 if (file && is_file_hugepages(file))
1230 vm_flags |= VM_NORESERVE;
1231 }
1232
1233
1234
1235
1236 if (accountable_mapping(file, vm_flags)) {
1237 charged = len >> PAGE_SHIFT;
1238 if (security_vm_enough_memory(charged))
1239 return -ENOMEM;
1240 vm_flags |= VM_ACCOUNT;
1241 }
1242
1243
1244
1245
1246 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
1247 if (vma)
1248 goto out;
1249
1250
1251
1252
1253
1254
1255 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1256 if (!vma) {
1257 error = -ENOMEM;
1258 goto unacct_error;
1259 }
1260
1261 vma->vm_mm = mm;
1262 vma->vm_start = addr;
1263 vma->vm_end = addr + len;
1264 vma->vm_flags = vm_flags;
1265 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1266 vma->vm_pgoff = pgoff;
1267 INIT_LIST_HEAD(&vma->anon_vma_chain);
1268
1269 if (file) {
1270 error = -EINVAL;
1271 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1272 goto free_vma;
1273 if (vm_flags & VM_DENYWRITE) {
1274 error = deny_write_access(file);
1275 if (error)
1276 goto free_vma;
1277 correct_wcount = 1;
1278 }
1279 vma->vm_file = file;
1280 get_file(file);
1281 error = file->f_op->mmap(file, vma);
1282 if (error)
1283 goto unmap_and_free_vma;
1284 if (vm_flags & VM_EXECUTABLE)
1285 added_exe_file_vma(mm);
1286
1287
1288
1289
1290
1291
1292 addr = vma->vm_start;
1293 pgoff = vma->vm_pgoff;
1294 vm_flags = vma->vm_flags;
1295 } else if (vm_flags & VM_SHARED) {
1296 error = shmem_zero_setup(vma);
1297 if (error)
1298 goto free_vma;
1299 }
1300
1301 if (vma_wants_writenotify(vma)) {
1302 pgprot_t pprot = vma->vm_page_prot;
1303
1304
1305
1306
1307
1308
1309
1310
1311 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1312 if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
1313 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1314 }
1315
1316 vma_link(mm, vma, prev, rb_link, rb_parent);
1317 file = vma->vm_file;
1318
1319
1320 if (correct_wcount)
1321 atomic_inc(&inode->i_writecount);
1322out:
1323 perf_event_mmap(vma);
1324
1325 mm->total_vm += len >> PAGE_SHIFT;
1326 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1327 if (vm_flags & VM_LOCKED) {
1328 if (!mlock_vma_pages_range(vma, addr, addr + len))
1329 mm->locked_vm += (len >> PAGE_SHIFT);
1330 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1331 make_pages_present(addr, addr + len);
1332 return addr;
1333
1334unmap_and_free_vma:
1335 if (correct_wcount)
1336 atomic_inc(&inode->i_writecount);
1337 vma->vm_file = NULL;
1338 fput(file);
1339
1340
1341 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1342 charged = 0;
1343free_vma:
1344 kmem_cache_free(vm_area_cachep, vma);
1345unacct_error:
1346 if (charged)
1347 vm_unacct_memory(charged);
1348 return error;
1349}
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362#ifndef HAVE_ARCH_UNMAPPED_AREA
1363unsigned long
1364arch_get_unmapped_area(struct file *filp, unsigned long addr,
1365 unsigned long len, unsigned long pgoff, unsigned long flags)
1366{
1367 struct mm_struct *mm = current->mm;
1368 struct vm_area_struct *vma;
1369 unsigned long start_addr;
1370
1371 if (len > TASK_SIZE)
1372 return -ENOMEM;
1373
1374 if (flags & MAP_FIXED)
1375 return addr;
1376
1377 if (addr) {
1378 addr = PAGE_ALIGN(addr);
1379 vma = find_vma(mm, addr);
1380 if (TASK_SIZE - len >= addr &&
1381 (!vma || addr + len <= vma->vm_start))
1382 return addr;
1383 }
1384 if (len > mm->cached_hole_size) {
1385 start_addr = addr = mm->free_area_cache;
1386 } else {
1387 start_addr = addr = TASK_UNMAPPED_BASE;
1388 mm->cached_hole_size = 0;
1389 }
1390
1391full_search:
1392 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
1393
1394 if (TASK_SIZE - len < addr) {
1395
1396
1397
1398
1399 if (start_addr != TASK_UNMAPPED_BASE) {
1400 addr = TASK_UNMAPPED_BASE;
1401 start_addr = addr;
1402 mm->cached_hole_size = 0;
1403 goto full_search;
1404 }
1405 return -ENOMEM;
1406 }
1407 if (!vma || addr + len <= vma->vm_start) {
1408
1409
1410
1411 mm->free_area_cache = addr + len;
1412 return addr;
1413 }
1414 if (addr + mm->cached_hole_size < vma->vm_start)
1415 mm->cached_hole_size = vma->vm_start - addr;
1416 addr = vma->vm_end;
1417 }
1418}
1419#endif
1420
1421void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1422{
1423
1424
1425
1426 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
1427 mm->free_area_cache = addr;
1428 mm->cached_hole_size = ~0UL;
1429 }
1430}
1431
1432
1433
1434
1435
1436#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1437unsigned long
1438arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1439 const unsigned long len, const unsigned long pgoff,
1440 const unsigned long flags)
1441{
1442 struct vm_area_struct *vma;
1443 struct mm_struct *mm = current->mm;
1444 unsigned long addr = addr0;
1445
1446
1447 if (len > TASK_SIZE)
1448 return -ENOMEM;
1449
1450 if (flags & MAP_FIXED)
1451 return addr;
1452
1453
1454 if (addr) {
1455 addr = PAGE_ALIGN(addr);
1456 vma = find_vma(mm, addr);
1457 if (TASK_SIZE - len >= addr &&
1458 (!vma || addr + len <= vma->vm_start))
1459 return addr;
1460 }
1461
1462
1463 if (len <= mm->cached_hole_size) {
1464 mm->cached_hole_size = 0;
1465 mm->free_area_cache = mm->mmap_base;
1466 }
1467
1468
1469 addr = mm->free_area_cache;
1470
1471
1472 if (addr > len) {
1473 vma = find_vma(mm, addr-len);
1474 if (!vma || addr <= vma->vm_start)
1475
1476 return (mm->free_area_cache = addr-len);
1477 }
1478
1479 if (mm->mmap_base < len)
1480 goto bottomup;
1481
1482 addr = mm->mmap_base-len;
1483
1484 do {
1485
1486
1487
1488
1489
1490 vma = find_vma(mm, addr);
1491 if (!vma || addr+len <= vma->vm_start)
1492
1493 return (mm->free_area_cache = addr);
1494
1495
1496 if (addr + mm->cached_hole_size < vma->vm_start)
1497 mm->cached_hole_size = vma->vm_start - addr;
1498
1499
1500 addr = vma->vm_start-len;
1501 } while (len < vma->vm_start);
1502
1503bottomup:
1504
1505
1506
1507
1508
1509
1510 mm->cached_hole_size = ~0UL;
1511 mm->free_area_cache = TASK_UNMAPPED_BASE;
1512 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
1513
1514
1515
1516 mm->free_area_cache = mm->mmap_base;
1517 mm->cached_hole_size = ~0UL;
1518
1519 return addr;
1520}
1521#endif
1522
1523void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
1524{
1525
1526
1527
1528 if (addr > mm->free_area_cache)
1529 mm->free_area_cache = addr;
1530
1531
1532 if (mm->free_area_cache > mm->mmap_base)
1533 mm->free_area_cache = mm->mmap_base;
1534}
1535
1536unsigned long
1537get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1538 unsigned long pgoff, unsigned long flags)
1539{
1540 unsigned long (*get_area)(struct file *, unsigned long,
1541 unsigned long, unsigned long, unsigned long);
1542
1543 unsigned long error = arch_mmap_check(addr, len, flags);
1544 if (error)
1545 return error;
1546
1547
1548 if (len > TASK_SIZE)
1549 return -ENOMEM;
1550
1551 get_area = current->mm->get_unmapped_area;
1552 if (file && file->f_op && file->f_op->get_unmapped_area)
1553 get_area = file->f_op->get_unmapped_area;
1554 addr = get_area(file, addr, len, pgoff, flags);
1555 if (IS_ERR_VALUE(addr))
1556 return addr;
1557
1558 if (addr > TASK_SIZE - len)
1559 return -ENOMEM;
1560 if (addr & ~PAGE_MASK)
1561 return -EINVAL;
1562
1563 return arch_rebalance_pgtables(addr, len);
1564}
1565
1566EXPORT_SYMBOL(get_unmapped_area);
1567
1568
1569struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1570{
1571 struct vm_area_struct *vma = NULL;
1572
1573 if (mm) {
1574
1575
1576 vma = mm->mmap_cache;
1577 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1578 struct rb_node * rb_node;
1579
1580 rb_node = mm->mm_rb.rb_node;
1581 vma = NULL;
1582
1583 while (rb_node) {
1584 struct vm_area_struct * vma_tmp;
1585
1586 vma_tmp = rb_entry(rb_node,
1587 struct vm_area_struct, vm_rb);
1588
1589 if (vma_tmp->vm_end > addr) {
1590 vma = vma_tmp;
1591 if (vma_tmp->vm_start <= addr)
1592 break;
1593 rb_node = rb_node->rb_left;
1594 } else
1595 rb_node = rb_node->rb_right;
1596 }
1597 if (vma)
1598 mm->mmap_cache = vma;
1599 }
1600 }
1601 return vma;
1602}
1603
1604EXPORT_SYMBOL(find_vma);
1605
1606
1607struct vm_area_struct *
1608find_vma_prev(struct mm_struct *mm, unsigned long addr,
1609 struct vm_area_struct **pprev)
1610{
1611 struct vm_area_struct *vma = NULL, *prev = NULL;
1612 struct rb_node *rb_node;
1613 if (!mm)
1614 goto out;
1615
1616
1617 vma = mm->mmap;
1618
1619
1620 rb_node = mm->mm_rb.rb_node;
1621
1622 while (rb_node) {
1623 struct vm_area_struct *vma_tmp;
1624 vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
1625
1626 if (addr < vma_tmp->vm_end) {
1627 rb_node = rb_node->rb_left;
1628 } else {
1629 prev = vma_tmp;
1630 if (!prev->vm_next || (addr < prev->vm_next->vm_end))
1631 break;
1632 rb_node = rb_node->rb_right;
1633 }
1634 }
1635
1636out:
1637 *pprev = prev;
1638 return prev ? prev->vm_next : vma;
1639}
1640
1641
1642
1643
1644
1645
1646static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
1647{
1648 struct mm_struct *mm = vma->vm_mm;
1649 struct rlimit *rlim = current->signal->rlim;
1650 unsigned long new_start;
1651
1652
1653 if (!may_expand_vm(mm, grow))
1654 return -ENOMEM;
1655
1656
1657 if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
1658 return -ENOMEM;
1659
1660
1661 if (vma->vm_flags & VM_LOCKED) {
1662 unsigned long locked;
1663 unsigned long limit;
1664 locked = mm->locked_vm + grow;
1665 limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
1666 limit >>= PAGE_SHIFT;
1667 if (locked > limit && !capable(CAP_IPC_LOCK))
1668 return -ENOMEM;
1669 }
1670
1671
1672 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
1673 vma->vm_end - size;
1674 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
1675 return -EFAULT;
1676
1677
1678
1679
1680
1681 if (security_vm_enough_memory_mm(mm, grow))
1682 return -ENOMEM;
1683
1684
1685 mm->total_vm += grow;
1686 if (vma->vm_flags & VM_LOCKED)
1687 mm->locked_vm += grow;
1688 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
1689 return 0;
1690}
1691
1692#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
1693
1694
1695
1696
1697#ifndef CONFIG_IA64
1698static
1699#endif
1700int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1701{
1702 int error;
1703
1704 if (!(vma->vm_flags & VM_GROWSUP))
1705 return -EFAULT;
1706
1707
1708
1709
1710
1711 if (unlikely(anon_vma_prepare(vma)))
1712 return -ENOMEM;
1713 anon_vma_lock(vma);
1714
1715
1716
1717
1718
1719
1720
1721 if (address < PAGE_ALIGN(address+4))
1722 address = PAGE_ALIGN(address+4);
1723 else {
1724 anon_vma_unlock(vma);
1725 return -ENOMEM;
1726 }
1727 error = 0;
1728
1729
1730 if (address > vma->vm_end) {
1731 unsigned long size, grow;
1732
1733 size = address - vma->vm_start;
1734 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1735
1736 error = acct_stack_growth(vma, size, grow);
1737 if (!error)
1738 vma->vm_end = address;
1739 }
1740 anon_vma_unlock(vma);
1741 return error;
1742}
1743#endif
1744
1745
1746
1747
1748static int expand_downwards(struct vm_area_struct *vma,
1749 unsigned long address)
1750{
1751 int error;
1752
1753
1754
1755
1756
1757 if (unlikely(anon_vma_prepare(vma)))
1758 return -ENOMEM;
1759
1760 address &= PAGE_MASK;
1761 error = security_file_mmap(NULL, 0, 0, 0, address, 1);
1762 if (error)
1763 return error;
1764
1765 anon_vma_lock(vma);
1766
1767
1768
1769
1770
1771
1772
1773
1774 if (address < vma->vm_start) {
1775 unsigned long size, grow;
1776
1777 size = vma->vm_end - address;
1778 grow = (vma->vm_start - address) >> PAGE_SHIFT;
1779
1780 error = acct_stack_growth(vma, size, grow);
1781 if (!error) {
1782 vma->vm_start = address;
1783 vma->vm_pgoff -= grow;
1784 }
1785 }
1786 anon_vma_unlock(vma);
1787 return error;
1788}
1789
1790int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
1791{
1792 return expand_downwards(vma, address);
1793}
1794
1795#ifdef CONFIG_STACK_GROWSUP
1796int expand_stack(struct vm_area_struct *vma, unsigned long address)
1797{
1798 return expand_upwards(vma, address);
1799}
1800
1801struct vm_area_struct *
1802find_extend_vma(struct mm_struct *mm, unsigned long addr)
1803{
1804 struct vm_area_struct *vma, *prev;
1805
1806 addr &= PAGE_MASK;
1807 vma = find_vma_prev(mm, addr, &prev);
1808 if (vma && (vma->vm_start <= addr))
1809 return vma;
1810 if (!prev || expand_stack(prev, addr))
1811 return NULL;
1812 if (prev->vm_flags & VM_LOCKED) {
1813 mlock_vma_pages_range(prev, addr, prev->vm_end);
1814 }
1815 return prev;
1816}
1817#else
1818int expand_stack(struct vm_area_struct *vma, unsigned long address)
1819{
1820 return expand_downwards(vma, address);
1821}
1822
1823struct vm_area_struct *
1824find_extend_vma(struct mm_struct * mm, unsigned long addr)
1825{
1826 struct vm_area_struct * vma;
1827 unsigned long start;
1828
1829 addr &= PAGE_MASK;
1830 vma = find_vma(mm,addr);
1831 if (!vma)
1832 return NULL;
1833 if (vma->vm_start <= addr)
1834 return vma;
1835 if (!(vma->vm_flags & VM_GROWSDOWN))
1836 return NULL;
1837 start = vma->vm_start;
1838 if (expand_stack(vma, addr))
1839 return NULL;
1840 if (vma->vm_flags & VM_LOCKED) {
1841 mlock_vma_pages_range(vma, addr, start);
1842 }
1843 return vma;
1844}
1845#endif
1846
1847
1848
1849
1850
1851
1852
1853static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
1854{
1855
1856 update_hiwater_vm(mm);
1857 do {
1858 long nrpages = vma_pages(vma);
1859
1860 mm->total_vm -= nrpages;
1861 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
1862 vma = remove_vma(vma);
1863 } while (vma);
1864 validate_mm(mm);
1865}
1866
1867
1868
1869
1870
1871
1872static void unmap_region(struct mm_struct *mm,
1873 struct vm_area_struct *vma, struct vm_area_struct *prev,
1874 unsigned long start, unsigned long end)
1875{
1876 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
1877 struct mmu_gather *tlb;
1878 unsigned long nr_accounted = 0;
1879
1880 lru_add_drain();
1881 tlb = tlb_gather_mmu(mm, 0);
1882 update_hiwater_rss(mm);
1883 unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
1884 vm_unacct_memory(nr_accounted);
1885 free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
1886 next? next->vm_start: 0);
1887 tlb_finish_mmu(tlb, start, end);
1888}
1889
1890
1891
1892
1893
1894static void
1895detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1896 struct vm_area_struct *prev, unsigned long end)
1897{
1898 struct vm_area_struct **insertion_point;
1899 struct vm_area_struct *tail_vma = NULL;
1900 unsigned long addr;
1901
1902 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1903 do {
1904 rb_erase(&vma->vm_rb, &mm->mm_rb);
1905 mm->map_count--;
1906 tail_vma = vma;
1907 vma = vma->vm_next;
1908 } while (vma && vma->vm_start < end);
1909 *insertion_point = vma;
1910 tail_vma->vm_next = NULL;
1911 if (mm->unmap_area == arch_unmap_area)
1912 addr = prev ? prev->vm_end : mm->mmap_base;
1913 else
1914 addr = vma ? vma->vm_start : mm->mmap_base;
1915 mm->unmap_area(mm, addr);
1916 mm->mmap_cache = NULL;
1917}
1918
1919
1920
1921
1922
1923static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1924 unsigned long addr, int new_below)
1925{
1926 struct mempolicy *pol;
1927 struct vm_area_struct *new;
1928 int err = -ENOMEM;
1929
1930 if (is_vm_hugetlb_page(vma) && (addr &
1931 ~(huge_page_mask(hstate_vma(vma)))))
1932 return -EINVAL;
1933
1934 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1935 if (!new)
1936 goto out_err;
1937
1938
1939 *new = *vma;
1940
1941 INIT_LIST_HEAD(&new->anon_vma_chain);
1942
1943 if (new_below)
1944 new->vm_end = addr;
1945 else {
1946 new->vm_start = addr;
1947 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
1948 }
1949
1950 pol = mpol_dup(vma_policy(vma));
1951 if (IS_ERR(pol)) {
1952 err = PTR_ERR(pol);
1953 goto out_free_vma;
1954 }
1955 vma_set_policy(new, pol);
1956
1957 if (anon_vma_clone(new, vma))
1958 goto out_free_mpol;
1959
1960 if (new->vm_file) {
1961 get_file(new->vm_file);
1962 if (vma->vm_flags & VM_EXECUTABLE)
1963 added_exe_file_vma(mm);
1964 }
1965
1966 if (new->vm_ops && new->vm_ops->open)
1967 new->vm_ops->open(new);
1968
1969 if (new_below)
1970 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1971 ((addr - new->vm_start) >> PAGE_SHIFT), new);
1972 else
1973 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1974
1975
1976 if (!err)
1977 return 0;
1978
1979
1980 if (new->vm_ops && new->vm_ops->close)
1981 new->vm_ops->close(new);
1982 if (new->vm_file) {
1983 if (vma->vm_flags & VM_EXECUTABLE)
1984 removed_exe_file_vma(mm);
1985 fput(new->vm_file);
1986 }
1987 out_free_mpol:
1988 mpol_put(pol);
1989 out_free_vma:
1990 kmem_cache_free(vm_area_cachep, new);
1991 out_err:
1992 return err;
1993}
1994
1995
1996
1997
1998
1999int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2000 unsigned long addr, int new_below)
2001{
2002 if (mm->map_count >= sysctl_max_map_count)
2003 return -ENOMEM;
2004
2005 return __split_vma(mm, vma, addr, new_below);
2006}
2007
2008
2009
2010
2011
2012
2013int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
2014{
2015 unsigned long end;
2016 struct vm_area_struct *vma, *prev, *last;
2017
2018 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
2019 return -EINVAL;
2020
2021 if ((len = PAGE_ALIGN(len)) == 0)
2022 return -EINVAL;
2023
2024
2025 vma = find_vma_prev(mm, start, &prev);
2026 if (!vma)
2027 return 0;
2028
2029
2030
2031 end = start + len;
2032 if (vma->vm_start >= end)
2033 return 0;
2034
2035
2036
2037
2038
2039
2040
2041
2042 if (start > vma->vm_start) {
2043 int error;
2044
2045
2046
2047
2048
2049
2050 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2051 return -ENOMEM;
2052
2053 error = __split_vma(mm, vma, start, 0);
2054 if (error)
2055 return error;
2056 prev = vma;
2057 }
2058
2059
2060 last = find_vma(mm, end);
2061 if (last && end > last->vm_start) {
2062 int error = __split_vma(mm, last, end, 1);
2063 if (error)
2064 return error;
2065 }
2066 vma = prev? prev->vm_next: mm->mmap;
2067
2068
2069
2070
2071 if (mm->locked_vm) {
2072 struct vm_area_struct *tmp = vma;
2073 while (tmp && tmp->vm_start < end) {
2074 if (tmp->vm_flags & VM_LOCKED) {
2075 mm->locked_vm -= vma_pages(tmp);
2076 munlock_vma_pages_all(tmp);
2077 }
2078 tmp = tmp->vm_next;
2079 }
2080 }
2081
2082
2083
2084
2085 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2086 unmap_region(mm, vma, prev, start, end);
2087
2088
2089 remove_vma_list(mm, vma);
2090
2091 return 0;
2092}
2093
2094EXPORT_SYMBOL(do_munmap);
2095
2096SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2097{
2098 int ret;
2099 struct mm_struct *mm = current->mm;
2100
2101 profile_munmap(addr);
2102
2103 down_write(&mm->mmap_sem);
2104 ret = do_munmap(mm, addr, len);
2105 up_write(&mm->mmap_sem);
2106 return ret;
2107}
2108
2109static inline void verify_mm_writelocked(struct mm_struct *mm)
2110{
2111#ifdef CONFIG_DEBUG_VM
2112 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
2113 WARN_ON(1);
2114 up_read(&mm->mmap_sem);
2115 }
2116#endif
2117}
2118
2119
2120
2121
2122
2123
2124unsigned long do_brk(unsigned long addr, unsigned long len)
2125{
2126 struct mm_struct * mm = current->mm;
2127 struct vm_area_struct * vma, * prev;
2128 unsigned long flags;
2129 struct rb_node ** rb_link, * rb_parent;
2130 pgoff_t pgoff = addr >> PAGE_SHIFT;
2131 int error;
2132
2133 len = PAGE_ALIGN(len);
2134 if (!len)
2135 return addr;
2136
2137 error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
2138 if (error)
2139 return error;
2140
2141 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2142
2143 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2144 if (error & ~PAGE_MASK)
2145 return error;
2146
2147
2148
2149
2150 if (mm->def_flags & VM_LOCKED) {
2151 unsigned long locked, lock_limit;
2152 locked = len >> PAGE_SHIFT;
2153 locked += mm->locked_vm;
2154 lock_limit = rlimit(RLIMIT_MEMLOCK);
2155 lock_limit >>= PAGE_SHIFT;
2156 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2157 return -EAGAIN;
2158 }
2159
2160
2161
2162
2163
2164 verify_mm_writelocked(mm);
2165
2166
2167
2168
2169 munmap_back:
2170 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2171 if (vma && vma->vm_start < addr + len) {
2172 if (do_munmap(mm, addr, len))
2173 return -ENOMEM;
2174 goto munmap_back;
2175 }
2176
2177
2178 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2179 return -ENOMEM;
2180
2181 if (mm->map_count > sysctl_max_map_count)
2182 return -ENOMEM;
2183
2184 if (security_vm_enough_memory(len >> PAGE_SHIFT))
2185 return -ENOMEM;
2186
2187
2188 vma = vma_merge(mm, prev, addr, addr + len, flags,
2189 NULL, NULL, pgoff, NULL);
2190 if (vma)
2191 goto out;
2192
2193
2194
2195
2196 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2197 if (!vma) {
2198 vm_unacct_memory(len >> PAGE_SHIFT);
2199 return -ENOMEM;
2200 }
2201
2202 INIT_LIST_HEAD(&vma->anon_vma_chain);
2203 vma->vm_mm = mm;
2204 vma->vm_start = addr;
2205 vma->vm_end = addr + len;
2206 vma->vm_pgoff = pgoff;
2207 vma->vm_flags = flags;
2208 vma->vm_page_prot = vm_get_page_prot(flags);
2209 vma_link(mm, vma, prev, rb_link, rb_parent);
2210out:
2211 mm->total_vm += len >> PAGE_SHIFT;
2212 if (flags & VM_LOCKED) {
2213 if (!mlock_vma_pages_range(vma, addr, addr + len))
2214 mm->locked_vm += (len >> PAGE_SHIFT);
2215 }
2216 return addr;
2217}
2218
2219EXPORT_SYMBOL(do_brk);
2220
2221
2222void exit_mmap(struct mm_struct *mm)
2223{
2224 struct mmu_gather *tlb;
2225 struct vm_area_struct *vma;
2226 unsigned long nr_accounted = 0;
2227 unsigned long end;
2228
2229
2230 mmu_notifier_release(mm);
2231
2232 if (mm->locked_vm) {
2233 vma = mm->mmap;
2234 while (vma) {
2235 if (vma->vm_flags & VM_LOCKED)
2236 munlock_vma_pages_all(vma);
2237 vma = vma->vm_next;
2238 }
2239 }
2240
2241 arch_exit_mmap(mm);
2242
2243 vma = mm->mmap;
2244 if (!vma)
2245 return;
2246
2247 lru_add_drain();
2248 flush_cache_mm(mm);
2249 tlb = tlb_gather_mmu(mm, 1);
2250
2251
2252 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
2253 vm_unacct_memory(nr_accounted);
2254
2255 free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
2256 tlb_finish_mmu(tlb, 0, end);
2257
2258
2259
2260
2261
2262 while (vma)
2263 vma = remove_vma(vma);
2264
2265 BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2266}
2267
2268
2269
2270
2271
2272int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2273{
2274 struct vm_area_struct * __vma, * prev;
2275 struct rb_node ** rb_link, * rb_parent;
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289 if (!vma->vm_file) {
2290 BUG_ON(vma->anon_vma);
2291 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2292 }
2293 __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
2294 if (__vma && __vma->vm_start < vma->vm_end)
2295 return -ENOMEM;
2296 if ((vma->vm_flags & VM_ACCOUNT) &&
2297 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2298 return -ENOMEM;
2299 vma_link(mm, vma, prev, rb_link, rb_parent);
2300 return 0;
2301}
2302
2303
2304
2305
2306
2307struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2308 unsigned long addr, unsigned long len, pgoff_t pgoff)
2309{
2310 struct vm_area_struct *vma = *vmap;
2311 unsigned long vma_start = vma->vm_start;
2312 struct mm_struct *mm = vma->vm_mm;
2313 struct vm_area_struct *new_vma, *prev;
2314 struct rb_node **rb_link, *rb_parent;
2315 struct mempolicy *pol;
2316
2317
2318
2319
2320
2321 if (!vma->vm_file && !vma->anon_vma)
2322 pgoff = addr >> PAGE_SHIFT;
2323
2324 find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2325 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2326 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2327 if (new_vma) {
2328
2329
2330
2331 if (vma_start >= new_vma->vm_start &&
2332 vma_start < new_vma->vm_end)
2333 *vmap = new_vma;
2334 } else {
2335 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2336 if (new_vma) {
2337 *new_vma = *vma;
2338 pol = mpol_dup(vma_policy(vma));
2339 if (IS_ERR(pol))
2340 goto out_free_vma;
2341 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
2342 if (anon_vma_clone(new_vma, vma))
2343 goto out_free_mempol;
2344 vma_set_policy(new_vma, pol);
2345 new_vma->vm_start = addr;
2346 new_vma->vm_end = addr + len;
2347 new_vma->vm_pgoff = pgoff;
2348 if (new_vma->vm_file) {
2349 get_file(new_vma->vm_file);
2350 if (vma->vm_flags & VM_EXECUTABLE)
2351 added_exe_file_vma(mm);
2352 }
2353 if (new_vma->vm_ops && new_vma->vm_ops->open)
2354 new_vma->vm_ops->open(new_vma);
2355 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2356 }
2357 }
2358 return new_vma;
2359
2360 out_free_mempol:
2361 mpol_put(pol);
2362 out_free_vma:
2363 kmem_cache_free(vm_area_cachep, new_vma);
2364 return NULL;
2365}
2366
2367
2368
2369
2370
2371int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2372{
2373 unsigned long cur = mm->total_vm;
2374 unsigned long lim;
2375
2376 lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT;
2377
2378 if (cur + npages > lim)
2379 return 0;
2380 return 1;
2381}
2382
2383
2384static int special_mapping_fault(struct vm_area_struct *vma,
2385 struct vm_fault *vmf)
2386{
2387 pgoff_t pgoff;
2388 struct page **pages;
2389
2390
2391
2392
2393
2394
2395
2396 pgoff = vmf->pgoff - vma->vm_pgoff;
2397
2398 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
2399 pgoff--;
2400
2401 if (*pages) {
2402 struct page *page = *pages;
2403 get_page(page);
2404 vmf->page = page;
2405 return 0;
2406 }
2407
2408 return VM_FAULT_SIGBUS;
2409}
2410
2411
2412
2413
2414static void special_mapping_close(struct vm_area_struct *vma)
2415{
2416}
2417
2418static const struct vm_operations_struct special_mapping_vmops = {
2419 .close = special_mapping_close,
2420 .fault = special_mapping_fault,
2421};
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432int install_special_mapping(struct mm_struct *mm,
2433 unsigned long addr, unsigned long len,
2434 unsigned long vm_flags, struct page **pages)
2435{
2436 struct vm_area_struct *vma;
2437
2438 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2439 if (unlikely(vma == NULL))
2440 return -ENOMEM;
2441
2442 INIT_LIST_HEAD(&vma->anon_vma_chain);
2443 vma->vm_mm = mm;
2444 vma->vm_start = addr;
2445 vma->vm_end = addr + len;
2446
2447 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
2448 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2449
2450 vma->vm_ops = &special_mapping_vmops;
2451 vma->vm_private_data = pages;
2452
2453 if (unlikely(insert_vm_struct(mm, vma))) {
2454 kmem_cache_free(vm_area_cachep, vma);
2455 return -ENOMEM;
2456 }
2457
2458 mm->total_vm += len >> PAGE_SHIFT;
2459
2460 perf_event_mmap(vma);
2461
2462 return 0;
2463}
2464
2465static DEFINE_MUTEX(mm_all_locks_mutex);
2466
2467static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2468{
2469 if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2470
2471
2472
2473
2474 spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484 if (__test_and_set_bit(0, (unsigned long *)
2485 &anon_vma->head.next))
2486 BUG();
2487 }
2488}
2489
2490static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2491{
2492 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2503 BUG();
2504 spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
2505 }
2506}
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540int mm_take_all_locks(struct mm_struct *mm)
2541{
2542 struct vm_area_struct *vma;
2543 struct anon_vma_chain *avc;
2544 int ret = -EINTR;
2545
2546 BUG_ON(down_read_trylock(&mm->mmap_sem));
2547
2548 mutex_lock(&mm_all_locks_mutex);
2549
2550 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2551 if (signal_pending(current))
2552 goto out_unlock;
2553 if (vma->vm_file && vma->vm_file->f_mapping)
2554 vm_lock_mapping(mm, vma->vm_file->f_mapping);
2555 }
2556
2557 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2558 if (signal_pending(current))
2559 goto out_unlock;
2560 if (vma->anon_vma)
2561 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
2562 vm_lock_anon_vma(mm, avc->anon_vma);
2563 }
2564
2565 ret = 0;
2566
2567out_unlock:
2568 if (ret)
2569 mm_drop_all_locks(mm);
2570
2571 return ret;
2572}
2573
2574static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2575{
2576 if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589 if (!__test_and_clear_bit(0, (unsigned long *)
2590 &anon_vma->head.next))
2591 BUG();
2592 spin_unlock(&anon_vma->lock);
2593 }
2594}
2595
2596static void vm_unlock_mapping(struct address_space *mapping)
2597{
2598 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2599
2600
2601
2602
2603 spin_unlock(&mapping->i_mmap_lock);
2604 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
2605 &mapping->flags))
2606 BUG();
2607 }
2608}
2609
2610
2611
2612
2613
2614void mm_drop_all_locks(struct mm_struct *mm)
2615{
2616 struct vm_area_struct *vma;
2617 struct anon_vma_chain *avc;
2618
2619 BUG_ON(down_read_trylock(&mm->mmap_sem));
2620 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2621
2622 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2623 if (vma->anon_vma)
2624 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
2625 vm_unlock_anon_vma(avc->anon_vma);
2626 if (vma->vm_file && vma->vm_file->f_mapping)
2627 vm_unlock_mapping(vma->vm_file->f_mapping);
2628 }
2629
2630 mutex_unlock(&mm_all_locks_mutex);
2631}
2632
2633
2634
2635
2636void __init mmap_init(void)
2637{
2638 int ret;
2639
2640 ret = percpu_counter_init(&vm_committed_as, 0);
2641 VM_BUG_ON(ret);
2642}
2643