1
2
3
4
5
6
7
8
9#include <linux/slab.h>
10#include <linux/backing-dev.h>
11#include <linux/mm.h>
12#include <linux/shm.h>
13#include <linux/mman.h>
14#include <linux/pagemap.h>
15#include <linux/swap.h>
16#include <linux/syscalls.h>
17#include <linux/capability.h>
18#include <linux/init.h>
19#include <linux/file.h>
20#include <linux/fs.h>
21#include <linux/personality.h>
22#include <linux/security.h>
23#include <linux/hugetlb.h>
24#include <linux/profile.h>
25#include <linux/module.h>
26#include <linux/mount.h>
27#include <linux/mempolicy.h>
28#include <linux/rmap.h>
29#include <linux/mmu_notifier.h>
30
31#include <asm/uaccess.h>
32#include <asm/cacheflush.h>
33#include <asm/tlb.h>
34#include <asm/mmu_context.h>
35
36#include "internal.h"
37
38#ifndef arch_mmap_check
39#define arch_mmap_check(addr, len, flags) (0)
40#endif
41
42#ifndef arch_rebalance_pgtables
43#define arch_rebalance_pgtables(addr, len) (addr)
44#endif
45
46static void unmap_region(struct mm_struct *mm,
47 struct vm_area_struct *vma, struct vm_area_struct *prev,
48 unsigned long start, unsigned long end);
49
50
51
52
53
54#undef DEBUG_MM_RB
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71pgprot_t protection_map[16] = {
72 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
73 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
74};
75
76pgprot_t vm_get_page_prot(unsigned long vm_flags)
77{
78 return __pgprot(pgprot_val(protection_map[vm_flags &
79 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
80 pgprot_val(arch_vm_get_page_prot(vm_flags)));
81}
82EXPORT_SYMBOL(vm_get_page_prot);
83
84int sysctl_overcommit_memory = OVERCOMMIT_GUESS;
85int sysctl_overcommit_ratio = 50;
86int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
87atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
88
89
90unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
109{
110 unsigned long free, allowed;
111
112 vm_acct_memory(pages);
113
114
115
116
117 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
118 return 0;
119
120 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
121 unsigned long n;
122
123 free = global_page_state(NR_FILE_PAGES);
124 free += nr_swap_pages;
125
126
127
128
129
130
131
132 free += global_page_state(NR_SLAB_RECLAIMABLE);
133
134
135
136
137 if (!cap_sys_admin)
138 free -= free / 32;
139
140 if (free > pages)
141 return 0;
142
143
144
145
146
147 n = nr_free_pages();
148
149
150
151
152 if (n <= totalreserve_pages)
153 goto error;
154 else
155 n -= totalreserve_pages;
156
157
158
159
160 if (!cap_sys_admin)
161 n -= n / 32;
162 free += n;
163
164 if (free > pages)
165 return 0;
166
167 goto error;
168 }
169
170 allowed = (totalram_pages - hugetlb_total_pages())
171 * sysctl_overcommit_ratio / 100;
172
173
174
175 if (!cap_sys_admin)
176 allowed -= allowed / 32;
177 allowed += total_swap_pages;
178
179
180
181 allowed -= mm->total_vm / 32;
182
183
184
185
186
187 if (atomic_long_read(&vm_committed_space) < (long)allowed)
188 return 0;
189error:
190 vm_unacct_memory(pages);
191
192 return -ENOMEM;
193}
194
195
196
197
198static void __remove_shared_vm_struct(struct vm_area_struct *vma,
199 struct file *file, struct address_space *mapping)
200{
201 if (vma->vm_flags & VM_DENYWRITE)
202 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
203 if (vma->vm_flags & VM_SHARED)
204 mapping->i_mmap_writable--;
205
206 flush_dcache_mmap_lock(mapping);
207 if (unlikely(vma->vm_flags & VM_NONLINEAR))
208 list_del_init(&vma->shared.vm_set.list);
209 else
210 vma_prio_tree_remove(vma, &mapping->i_mmap);
211 flush_dcache_mmap_unlock(mapping);
212}
213
214
215
216
217
218void unlink_file_vma(struct vm_area_struct *vma)
219{
220 struct file *file = vma->vm_file;
221
222 if (file) {
223 struct address_space *mapping = file->f_mapping;
224 spin_lock(&mapping->i_mmap_lock);
225 __remove_shared_vm_struct(vma, file, mapping);
226 spin_unlock(&mapping->i_mmap_lock);
227 }
228}
229
230
231
232
233static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
234{
235 struct vm_area_struct *next = vma->vm_next;
236
237 might_sleep();
238 if (vma->vm_ops && vma->vm_ops->close)
239 vma->vm_ops->close(vma);
240 if (vma->vm_file) {
241 fput(vma->vm_file);
242 if (vma->vm_flags & VM_EXECUTABLE)
243 removed_exe_file_vma(vma->vm_mm);
244 }
245 mpol_put(vma_policy(vma));
246 kmem_cache_free(vm_area_cachep, vma);
247 return next;
248}
249
250SYSCALL_DEFINE1(brk, unsigned long, brk)
251{
252 unsigned long rlim, retval;
253 unsigned long newbrk, oldbrk;
254 struct mm_struct *mm = current->mm;
255 unsigned long min_brk;
256
257 down_write(&mm->mmap_sem);
258
259#ifdef CONFIG_COMPAT_BRK
260 min_brk = mm->end_code;
261#else
262 min_brk = mm->start_brk;
263#endif
264 if (brk < min_brk)
265 goto out;
266
267
268
269
270
271
272
273 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
274 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
275 (mm->end_data - mm->start_data) > rlim)
276 goto out;
277
278 newbrk = PAGE_ALIGN(brk);
279 oldbrk = PAGE_ALIGN(mm->brk);
280 if (oldbrk == newbrk)
281 goto set_brk;
282
283
284 if (brk <= mm->brk) {
285 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
286 goto set_brk;
287 goto out;
288 }
289
290
291 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
292 goto out;
293
294
295 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
296 goto out;
297set_brk:
298 mm->brk = brk;
299out:
300 retval = mm->brk;
301 up_write(&mm->mmap_sem);
302 return retval;
303}
304
305#ifdef DEBUG_MM_RB
306static int browse_rb(struct rb_root *root)
307{
308 int i = 0, j;
309 struct rb_node *nd, *pn = NULL;
310 unsigned long prev = 0, pend = 0;
311
312 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
313 struct vm_area_struct *vma;
314 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
315 if (vma->vm_start < prev)
316 printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
317 if (vma->vm_start < pend)
318 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
319 if (vma->vm_start > vma->vm_end)
320 printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
321 i++;
322 pn = nd;
323 prev = vma->vm_start;
324 pend = vma->vm_end;
325 }
326 j = 0;
327 for (nd = pn; nd; nd = rb_prev(nd)) {
328 j++;
329 }
330 if (i != j)
331 printk("backwards %d, forwards %d\n", j, i), i = 0;
332 return i;
333}
334
335void validate_mm(struct mm_struct *mm)
336{
337 int bug = 0;
338 int i = 0;
339 struct vm_area_struct *tmp = mm->mmap;
340 while (tmp) {
341 tmp = tmp->vm_next;
342 i++;
343 }
344 if (i != mm->map_count)
345 printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
346 i = browse_rb(&mm->mm_rb);
347 if (i != mm->map_count)
348 printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
349 BUG_ON(bug);
350}
351#else
352#define validate_mm(mm) do { } while (0)
353#endif
354
355static struct vm_area_struct *
356find_vma_prepare(struct mm_struct *mm, unsigned long addr,
357 struct vm_area_struct **pprev, struct rb_node ***rb_link,
358 struct rb_node ** rb_parent)
359{
360 struct vm_area_struct * vma;
361 struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
362
363 __rb_link = &mm->mm_rb.rb_node;
364 rb_prev = __rb_parent = NULL;
365 vma = NULL;
366
367 while (*__rb_link) {
368 struct vm_area_struct *vma_tmp;
369
370 __rb_parent = *__rb_link;
371 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
372
373 if (vma_tmp->vm_end > addr) {
374 vma = vma_tmp;
375 if (vma_tmp->vm_start <= addr)
376 break;
377 __rb_link = &__rb_parent->rb_left;
378 } else {
379 rb_prev = __rb_parent;
380 __rb_link = &__rb_parent->rb_right;
381 }
382 }
383
384 *pprev = NULL;
385 if (rb_prev)
386 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
387 *rb_link = __rb_link;
388 *rb_parent = __rb_parent;
389 return vma;
390}
391
392static inline void
393__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
394 struct vm_area_struct *prev, struct rb_node *rb_parent)
395{
396 if (prev) {
397 vma->vm_next = prev->vm_next;
398 prev->vm_next = vma;
399 } else {
400 mm->mmap = vma;
401 if (rb_parent)
402 vma->vm_next = rb_entry(rb_parent,
403 struct vm_area_struct, vm_rb);
404 else
405 vma->vm_next = NULL;
406 }
407}
408
409void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
410 struct rb_node **rb_link, struct rb_node *rb_parent)
411{
412 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
413 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
414}
415
416static inline void __vma_link_file(struct vm_area_struct *vma)
417{
418 struct file * file;
419
420 file = vma->vm_file;
421 if (file) {
422 struct address_space *mapping = file->f_mapping;
423
424 if (vma->vm_flags & VM_DENYWRITE)
425 atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
426 if (vma->vm_flags & VM_SHARED)
427 mapping->i_mmap_writable++;
428
429 flush_dcache_mmap_lock(mapping);
430 if (unlikely(vma->vm_flags & VM_NONLINEAR))
431 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
432 else
433 vma_prio_tree_insert(vma, &mapping->i_mmap);
434 flush_dcache_mmap_unlock(mapping);
435 }
436}
437
438static void
439__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
440 struct vm_area_struct *prev, struct rb_node **rb_link,
441 struct rb_node *rb_parent)
442{
443 __vma_link_list(mm, vma, prev, rb_parent);
444 __vma_link_rb(mm, vma, rb_link, rb_parent);
445 __anon_vma_link(vma);
446}
447
448static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
449 struct vm_area_struct *prev, struct rb_node **rb_link,
450 struct rb_node *rb_parent)
451{
452 struct address_space *mapping = NULL;
453
454 if (vma->vm_file)
455 mapping = vma->vm_file->f_mapping;
456
457 if (mapping) {
458 spin_lock(&mapping->i_mmap_lock);
459 vma->vm_truncate_count = mapping->truncate_count;
460 }
461 anon_vma_lock(vma);
462
463 __vma_link(mm, vma, prev, rb_link, rb_parent);
464 __vma_link_file(vma);
465
466 anon_vma_unlock(vma);
467 if (mapping)
468 spin_unlock(&mapping->i_mmap_lock);
469
470 mm->map_count++;
471 validate_mm(mm);
472}
473
474
475
476
477
478
479static void
480__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
481{
482 struct vm_area_struct * __vma, * prev;
483 struct rb_node ** rb_link, * rb_parent;
484
485 __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
486 BUG_ON(__vma && __vma->vm_start < vma->vm_end);
487 __vma_link(mm, vma, prev, rb_link, rb_parent);
488 mm->map_count++;
489}
490
491static inline void
492__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
493 struct vm_area_struct *prev)
494{
495 prev->vm_next = vma->vm_next;
496 rb_erase(&vma->vm_rb, &mm->mm_rb);
497 if (mm->mmap_cache == vma)
498 mm->mmap_cache = prev;
499}
500
501
502
503
504
505
506
507
508void vma_adjust(struct vm_area_struct *vma, unsigned long start,
509 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
510{
511 struct mm_struct *mm = vma->vm_mm;
512 struct vm_area_struct *next = vma->vm_next;
513 struct vm_area_struct *importer = NULL;
514 struct address_space *mapping = NULL;
515 struct prio_tree_root *root = NULL;
516 struct file *file = vma->vm_file;
517 struct anon_vma *anon_vma = NULL;
518 long adjust_next = 0;
519 int remove_next = 0;
520
521 if (next && !insert) {
522 if (end >= next->vm_end) {
523
524
525
526
527again: remove_next = 1 + (end > next->vm_end);
528 end = next->vm_end;
529 anon_vma = next->anon_vma;
530 importer = vma;
531 } else if (end > next->vm_start) {
532
533
534
535
536 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
537 anon_vma = next->anon_vma;
538 importer = vma;
539 } else if (end < vma->vm_end) {
540
541
542
543
544
545 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
546 anon_vma = next->anon_vma;
547 importer = next;
548 }
549 }
550
551 if (file) {
552 mapping = file->f_mapping;
553 if (!(vma->vm_flags & VM_NONLINEAR))
554 root = &mapping->i_mmap;
555 spin_lock(&mapping->i_mmap_lock);
556 if (importer &&
557 vma->vm_truncate_count != next->vm_truncate_count) {
558
559
560
561
562 importer->vm_truncate_count = 0;
563 }
564 if (insert) {
565 insert->vm_truncate_count = vma->vm_truncate_count;
566
567
568
569
570
571
572 __vma_link_file(insert);
573 }
574 }
575
576
577
578
579
580 if (vma->anon_vma && (insert || importer || start != vma->vm_start))
581 anon_vma = vma->anon_vma;
582 if (anon_vma) {
583 spin_lock(&anon_vma->lock);
584
585
586
587
588
589 if (importer && !importer->anon_vma) {
590 importer->anon_vma = anon_vma;
591 __anon_vma_link(importer);
592 }
593 }
594
595 if (root) {
596 flush_dcache_mmap_lock(mapping);
597 vma_prio_tree_remove(vma, root);
598 if (adjust_next)
599 vma_prio_tree_remove(next, root);
600 }
601
602 vma->vm_start = start;
603 vma->vm_end = end;
604 vma->vm_pgoff = pgoff;
605 if (adjust_next) {
606 next->vm_start += adjust_next << PAGE_SHIFT;
607 next->vm_pgoff += adjust_next;
608 }
609
610 if (root) {
611 if (adjust_next)
612 vma_prio_tree_insert(next, root);
613 vma_prio_tree_insert(vma, root);
614 flush_dcache_mmap_unlock(mapping);
615 }
616
617 if (remove_next) {
618
619
620
621
622 __vma_unlink(mm, next, vma);
623 if (file)
624 __remove_shared_vm_struct(next, file, mapping);
625 if (next->anon_vma)
626 __anon_vma_merge(vma, next);
627 } else if (insert) {
628
629
630
631
632
633 __insert_vm_struct(mm, insert);
634 }
635
636 if (anon_vma)
637 spin_unlock(&anon_vma->lock);
638 if (mapping)
639 spin_unlock(&mapping->i_mmap_lock);
640
641 if (remove_next) {
642 if (file) {
643 fput(file);
644 if (next->vm_flags & VM_EXECUTABLE)
645 removed_exe_file_vma(mm);
646 }
647 mm->map_count--;
648 mpol_put(vma_policy(next));
649 kmem_cache_free(vm_area_cachep, next);
650
651
652
653
654
655 if (remove_next == 2) {
656 next = vma->vm_next;
657 goto again;
658 }
659 }
660
661 validate_mm(mm);
662}
663
664
665
666
667
668#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
669
670static inline int is_mergeable_vma(struct vm_area_struct *vma,
671 struct file *file, unsigned long vm_flags)
672{
673 if (vma->vm_flags != vm_flags)
674 return 0;
675 if (vma->vm_file != file)
676 return 0;
677 if (vma->vm_ops && vma->vm_ops->close)
678 return 0;
679 return 1;
680}
681
682static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
683 struct anon_vma *anon_vma2)
684{
685 return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
686}
687
688
689
690
691
692
693
694
695
696
697
698
699static int
700can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
701 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
702{
703 if (is_mergeable_vma(vma, file, vm_flags) &&
704 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
705 if (vma->vm_pgoff == vm_pgoff)
706 return 1;
707 }
708 return 0;
709}
710
711
712
713
714
715
716
717
718static int
719can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
720 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
721{
722 if (is_mergeable_vma(vma, file, vm_flags) &&
723 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
724 pgoff_t vm_pglen;
725 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
726 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
727 return 1;
728 }
729 return 0;
730}
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761struct vm_area_struct *vma_merge(struct mm_struct *mm,
762 struct vm_area_struct *prev, unsigned long addr,
763 unsigned long end, unsigned long vm_flags,
764 struct anon_vma *anon_vma, struct file *file,
765 pgoff_t pgoff, struct mempolicy *policy)
766{
767 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
768 struct vm_area_struct *area, *next;
769
770
771
772
773
774 if (vm_flags & VM_SPECIAL)
775 return NULL;
776
777 if (prev)
778 next = prev->vm_next;
779 else
780 next = mm->mmap;
781 area = next;
782 if (next && next->vm_end == end)
783 next = next->vm_next;
784
785
786
787
788 if (prev && prev->vm_end == addr &&
789 mpol_equal(vma_policy(prev), policy) &&
790 can_vma_merge_after(prev, vm_flags,
791 anon_vma, file, pgoff)) {
792
793
794
795 if (next && end == next->vm_start &&
796 mpol_equal(policy, vma_policy(next)) &&
797 can_vma_merge_before(next, vm_flags,
798 anon_vma, file, pgoff+pglen) &&
799 is_mergeable_anon_vma(prev->anon_vma,
800 next->anon_vma)) {
801
802 vma_adjust(prev, prev->vm_start,
803 next->vm_end, prev->vm_pgoff, NULL);
804 } else
805 vma_adjust(prev, prev->vm_start,
806 end, prev->vm_pgoff, NULL);
807 return prev;
808 }
809
810
811
812
813 if (next && end == next->vm_start &&
814 mpol_equal(policy, vma_policy(next)) &&
815 can_vma_merge_before(next, vm_flags,
816 anon_vma, file, pgoff+pglen)) {
817 if (prev && addr < prev->vm_end)
818 vma_adjust(prev, prev->vm_start,
819 addr, prev->vm_pgoff, NULL);
820 else
821 vma_adjust(area, addr, next->vm_end,
822 next->vm_pgoff - pglen, NULL);
823 return area;
824 }
825
826 return NULL;
827}
828
829
830
831
832
833
834
835
836
837struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
838{
839 struct vm_area_struct *near;
840 unsigned long vm_flags;
841
842 near = vma->vm_next;
843 if (!near)
844 goto try_prev;
845
846
847
848
849
850
851
852 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
853 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
854
855 if (near->anon_vma && vma->vm_end == near->vm_start &&
856 mpol_equal(vma_policy(vma), vma_policy(near)) &&
857 can_vma_merge_before(near, vm_flags,
858 NULL, vma->vm_file, vma->vm_pgoff +
859 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
860 return near->anon_vma;
861try_prev:
862
863
864
865
866
867
868
869 BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
870 if (!near)
871 goto none;
872
873 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
874 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
875
876 if (near->anon_vma && near->vm_end == vma->vm_start &&
877 mpol_equal(vma_policy(near), vma_policy(vma)) &&
878 can_vma_merge_after(near, vm_flags,
879 NULL, vma->vm_file, vma->vm_pgoff))
880 return near->anon_vma;
881none:
882
883
884
885
886
887
888
889
890 return NULL;
891}
892
893#ifdef CONFIG_PROC_FS
894void vm_stat_account(struct mm_struct *mm, unsigned long flags,
895 struct file *file, long pages)
896{
897 const unsigned long stack_flags
898 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
899
900 if (file) {
901 mm->shared_vm += pages;
902 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
903 mm->exec_vm += pages;
904 } else if (flags & stack_flags)
905 mm->stack_vm += pages;
906 if (flags & (VM_RESERVED|VM_IO))
907 mm->reserved_vm += pages;
908}
909#endif
910
911
912
913
914
915unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
916 unsigned long len, unsigned long prot,
917 unsigned long flags, unsigned long pgoff)
918{
919 struct mm_struct * mm = current->mm;
920 struct inode *inode;
921 unsigned int vm_flags;
922 int error;
923 int accountable = 1;
924 unsigned long reqprot = prot;
925
926
927
928
929
930
931
932 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
933 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
934 prot |= PROT_EXEC;
935
936 if (!len)
937 return -EINVAL;
938
939 if (!(flags & MAP_FIXED))
940 addr = round_hint_to_min(addr);
941
942 error = arch_mmap_check(addr, len, flags);
943 if (error)
944 return error;
945
946
947 len = PAGE_ALIGN(len);
948 if (!len || len > TASK_SIZE)
949 return -ENOMEM;
950
951
952 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
953 return -EOVERFLOW;
954
955
956 if (mm->map_count > sysctl_max_map_count)
957 return -ENOMEM;
958
959
960
961
962 addr = get_unmapped_area(file, addr, len, pgoff, flags);
963 if (addr & ~PAGE_MASK)
964 return addr;
965
966
967
968
969
970 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
971 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
972
973 if (flags & MAP_LOCKED) {
974 if (!can_do_mlock())
975 return -EPERM;
976 vm_flags |= VM_LOCKED;
977 }
978
979 if (vm_flags & VM_LOCKED) {
980 unsigned long locked, lock_limit;
981 locked = len >> PAGE_SHIFT;
982 locked += mm->locked_vm;
983 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
984 lock_limit >>= PAGE_SHIFT;
985 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
986 return -EAGAIN;
987 }
988
989 inode = file ? file->f_path.dentry->d_inode : NULL;
990
991 if (file) {
992 switch (flags & MAP_TYPE) {
993 case MAP_SHARED:
994 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
995 return -EACCES;
996
997
998
999
1000
1001 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1002 return -EACCES;
1003
1004
1005
1006
1007 if (locks_verify_locked(inode))
1008 return -EAGAIN;
1009
1010 vm_flags |= VM_SHARED | VM_MAYSHARE;
1011 if (!(file->f_mode & FMODE_WRITE))
1012 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1013
1014
1015 case MAP_PRIVATE:
1016 if (!(file->f_mode & FMODE_READ))
1017 return -EACCES;
1018 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1019 if (vm_flags & VM_EXEC)
1020 return -EPERM;
1021 vm_flags &= ~VM_MAYEXEC;
1022 }
1023 if (is_file_hugepages(file))
1024 accountable = 0;
1025
1026 if (!file->f_op || !file->f_op->mmap)
1027 return -ENODEV;
1028 break;
1029
1030 default:
1031 return -EINVAL;
1032 }
1033 } else {
1034 switch (flags & MAP_TYPE) {
1035 case MAP_SHARED:
1036
1037
1038
1039 pgoff = 0;
1040 vm_flags |= VM_SHARED | VM_MAYSHARE;
1041 break;
1042 case MAP_PRIVATE:
1043
1044
1045
1046 pgoff = addr >> PAGE_SHIFT;
1047 break;
1048 default:
1049 return -EINVAL;
1050 }
1051 }
1052
1053 error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
1054 if (error)
1055 return error;
1056
1057 return mmap_region(file, addr, len, flags, vm_flags, pgoff,
1058 accountable);
1059}
1060EXPORT_SYMBOL(do_mmap_pgoff);
1061
1062
1063
1064
1065
1066
1067
1068int vma_wants_writenotify(struct vm_area_struct *vma)
1069{
1070 unsigned int vm_flags = vma->vm_flags;
1071
1072
1073 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1074 return 0;
1075
1076
1077 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1078 return 1;
1079
1080
1081 if (pgprot_val(vma->vm_page_prot) !=
1082 pgprot_val(vm_get_page_prot(vm_flags)))
1083 return 0;
1084
1085
1086 if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
1087 return 0;
1088
1089
1090 return vma->vm_file && vma->vm_file->f_mapping &&
1091 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1092}
1093
1094unsigned long mmap_region(struct file *file, unsigned long addr,
1095 unsigned long len, unsigned long flags,
1096 unsigned int vm_flags, unsigned long pgoff,
1097 int accountable)
1098{
1099 struct mm_struct *mm = current->mm;
1100 struct vm_area_struct *vma, *prev;
1101 int correct_wcount = 0;
1102 int error;
1103 struct rb_node **rb_link, *rb_parent;
1104 unsigned long charged = 0;
1105 struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
1106
1107
1108 error = -ENOMEM;
1109munmap_back:
1110 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
1111 if (vma && vma->vm_start < addr + len) {
1112 if (do_munmap(mm, addr, len))
1113 return -ENOMEM;
1114 goto munmap_back;
1115 }
1116
1117
1118 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1119 return -ENOMEM;
1120
1121 if (flags & MAP_NORESERVE)
1122 vm_flags |= VM_NORESERVE;
1123
1124 if (accountable && (!(flags & MAP_NORESERVE) ||
1125 sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
1126 if (vm_flags & VM_SHARED) {
1127
1128 vm_flags |= VM_ACCOUNT;
1129 } else if (vm_flags & VM_WRITE) {
1130
1131
1132
1133 charged = len >> PAGE_SHIFT;
1134 if (security_vm_enough_memory(charged))
1135 return -ENOMEM;
1136 vm_flags |= VM_ACCOUNT;
1137 }
1138 }
1139
1140
1141
1142
1143
1144
1145 if (!file && !(vm_flags & VM_SHARED) &&
1146 vma_merge(mm, prev, addr, addr + len, vm_flags,
1147 NULL, NULL, pgoff, NULL))
1148 goto out;
1149
1150
1151
1152
1153
1154
1155 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1156 if (!vma) {
1157 error = -ENOMEM;
1158 goto unacct_error;
1159 }
1160
1161 vma->vm_mm = mm;
1162 vma->vm_start = addr;
1163 vma->vm_end = addr + len;
1164 vma->vm_flags = vm_flags;
1165 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1166 vma->vm_pgoff = pgoff;
1167
1168 if (file) {
1169 error = -EINVAL;
1170 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1171 goto free_vma;
1172 if (vm_flags & VM_DENYWRITE) {
1173 error = deny_write_access(file);
1174 if (error)
1175 goto free_vma;
1176 correct_wcount = 1;
1177 }
1178 vma->vm_file = file;
1179 get_file(file);
1180 error = file->f_op->mmap(file, vma);
1181 if (error)
1182 goto unmap_and_free_vma;
1183 if (vm_flags & VM_EXECUTABLE)
1184 added_exe_file_vma(mm);
1185 } else if (vm_flags & VM_SHARED) {
1186 error = shmem_zero_setup(vma);
1187 if (error)
1188 goto free_vma;
1189 }
1190
1191
1192
1193
1194
1195
1196 if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
1197 vma->vm_flags &= ~VM_ACCOUNT;
1198
1199
1200
1201
1202
1203
1204 addr = vma->vm_start;
1205 pgoff = vma->vm_pgoff;
1206 vm_flags = vma->vm_flags;
1207
1208 if (vma_wants_writenotify(vma))
1209 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1210
1211 if (file && vma_merge(mm, prev, addr, vma->vm_end,
1212 vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
1213 mpol_put(vma_policy(vma));
1214 kmem_cache_free(vm_area_cachep, vma);
1215 fput(file);
1216 if (vm_flags & VM_EXECUTABLE)
1217 removed_exe_file_vma(mm);
1218 } else {
1219 vma_link(mm, vma, prev, rb_link, rb_parent);
1220 file = vma->vm_file;
1221 }
1222
1223
1224 if (correct_wcount)
1225 atomic_inc(&inode->i_writecount);
1226out:
1227 mm->total_vm += len >> PAGE_SHIFT;
1228 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1229 if (vm_flags & VM_LOCKED) {
1230 mm->locked_vm += len >> PAGE_SHIFT;
1231 make_pages_present(addr, addr + len);
1232 }
1233 if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1234 make_pages_present(addr, addr + len);
1235 return addr;
1236
1237unmap_and_free_vma:
1238 if (correct_wcount)
1239 atomic_inc(&inode->i_writecount);
1240 vma->vm_file = NULL;
1241 fput(file);
1242
1243
1244 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1245 charged = 0;
1246free_vma:
1247 kmem_cache_free(vm_area_cachep, vma);
1248unacct_error:
1249 if (charged)
1250 vm_unacct_memory(charged);
1251 return error;
1252}
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265#ifndef HAVE_ARCH_UNMAPPED_AREA
1266unsigned long
1267arch_get_unmapped_area(struct file *filp, unsigned long addr,
1268 unsigned long len, unsigned long pgoff, unsigned long flags)
1269{
1270 struct mm_struct *mm = current->mm;
1271 struct vm_area_struct *vma;
1272 unsigned long start_addr;
1273
1274 if (len > TASK_SIZE)
1275 return -ENOMEM;
1276
1277 if (flags & MAP_FIXED)
1278 return addr;
1279
1280 if (addr) {
1281 addr = PAGE_ALIGN(addr);
1282 vma = find_vma(mm, addr);
1283 if (TASK_SIZE - len >= addr &&
1284 (!vma || addr + len <= vma->vm_start))
1285 return addr;
1286 }
1287 if (len > mm->cached_hole_size) {
1288 start_addr = addr = mm->free_area_cache;
1289 } else {
1290 start_addr = addr = TASK_UNMAPPED_BASE;
1291 mm->cached_hole_size = 0;
1292 }
1293
1294full_search:
1295 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
1296
1297 if (TASK_SIZE - len < addr) {
1298
1299
1300
1301
1302 if (start_addr != TASK_UNMAPPED_BASE) {
1303 addr = TASK_UNMAPPED_BASE;
1304 start_addr = addr;
1305 mm->cached_hole_size = 0;
1306 goto full_search;
1307 }
1308 return -ENOMEM;
1309 }
1310 if (!vma || addr + len <= vma->vm_start) {
1311
1312
1313
1314 mm->free_area_cache = addr + len;
1315 return addr;
1316 }
1317 if (addr + mm->cached_hole_size < vma->vm_start)
1318 mm->cached_hole_size = vma->vm_start - addr;
1319 addr = vma->vm_end;
1320 }
1321}
1322#endif
1323
1324void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1325{
1326
1327
1328
1329 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
1330 mm->free_area_cache = addr;
1331 mm->cached_hole_size = ~0UL;
1332 }
1333}
1334
1335
1336
1337
1338
1339#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1340unsigned long
1341arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1342 const unsigned long len, const unsigned long pgoff,
1343 const unsigned long flags)
1344{
1345 struct vm_area_struct *vma;
1346 struct mm_struct *mm = current->mm;
1347 unsigned long addr = addr0;
1348
1349
1350 if (len > TASK_SIZE)
1351 return -ENOMEM;
1352
1353 if (flags & MAP_FIXED)
1354 return addr;
1355
1356
1357 if (addr) {
1358 addr = PAGE_ALIGN(addr);
1359 vma = find_vma(mm, addr);
1360 if (TASK_SIZE - len >= addr &&
1361 (!vma || addr + len <= vma->vm_start))
1362 return addr;
1363 }
1364
1365
1366 if (len <= mm->cached_hole_size) {
1367 mm->cached_hole_size = 0;
1368 mm->free_area_cache = mm->mmap_base;
1369 }
1370
1371
1372 addr = mm->free_area_cache;
1373
1374
1375 if (addr > len) {
1376 vma = find_vma(mm, addr-len);
1377 if (!vma || addr <= vma->vm_start)
1378
1379 return (mm->free_area_cache = addr-len);
1380 }
1381
1382 if (mm->mmap_base < len)
1383 goto bottomup;
1384
1385 addr = mm->mmap_base-len;
1386
1387 do {
1388
1389
1390
1391
1392
1393 vma = find_vma(mm, addr);
1394 if (!vma || addr+len <= vma->vm_start)
1395
1396 return (mm->free_area_cache = addr);
1397
1398
1399 if (addr + mm->cached_hole_size < vma->vm_start)
1400 mm->cached_hole_size = vma->vm_start - addr;
1401
1402
1403 addr = vma->vm_start-len;
1404 } while (len < vma->vm_start);
1405
1406bottomup:
1407
1408
1409
1410
1411
1412
1413 mm->cached_hole_size = ~0UL;
1414 mm->free_area_cache = TASK_UNMAPPED_BASE;
1415 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
1416
1417
1418
1419 mm->free_area_cache = mm->mmap_base;
1420 mm->cached_hole_size = ~0UL;
1421
1422 return addr;
1423}
1424#endif
1425
1426void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
1427{
1428
1429
1430
1431 if (addr > mm->free_area_cache)
1432 mm->free_area_cache = addr;
1433
1434
1435 if (mm->free_area_cache > mm->mmap_base)
1436 mm->free_area_cache = mm->mmap_base;
1437}
1438
1439unsigned long
1440get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1441 unsigned long pgoff, unsigned long flags)
1442{
1443 unsigned long (*get_area)(struct file *, unsigned long,
1444 unsigned long, unsigned long, unsigned long);
1445
1446 get_area = current->mm->get_unmapped_area;
1447 if (file && file->f_op && file->f_op->get_unmapped_area)
1448 get_area = file->f_op->get_unmapped_area;
1449 addr = get_area(file, addr, len, pgoff, flags);
1450 if (IS_ERR_VALUE(addr))
1451 return addr;
1452
1453 if (addr > TASK_SIZE - len)
1454 return -ENOMEM;
1455 if (addr & ~PAGE_MASK)
1456 return -EINVAL;
1457
1458 return arch_rebalance_pgtables(addr, len);
1459}
1460
1461EXPORT_SYMBOL(get_unmapped_area);
1462
1463
1464struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
1465{
1466 struct vm_area_struct *vma = NULL;
1467
1468 if (mm) {
1469
1470
1471 vma = mm->mmap_cache;
1472 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1473 struct rb_node * rb_node;
1474
1475 rb_node = mm->mm_rb.rb_node;
1476 vma = NULL;
1477
1478 while (rb_node) {
1479 struct vm_area_struct * vma_tmp;
1480
1481 vma_tmp = rb_entry(rb_node,
1482 struct vm_area_struct, vm_rb);
1483
1484 if (vma_tmp->vm_end > addr) {
1485 vma = vma_tmp;
1486 if (vma_tmp->vm_start <= addr)
1487 break;
1488 rb_node = rb_node->rb_left;
1489 } else
1490 rb_node = rb_node->rb_right;
1491 }
1492 if (vma)
1493 mm->mmap_cache = vma;
1494 }
1495 }
1496 return vma;
1497}
1498
1499EXPORT_SYMBOL(find_vma);
1500
1501
1502struct vm_area_struct *
1503find_vma_prev(struct mm_struct *mm, unsigned long addr,
1504 struct vm_area_struct **pprev)
1505{
1506 struct vm_area_struct *vma = NULL, *prev = NULL;
1507 struct rb_node * rb_node;
1508 if (!mm)
1509 goto out;
1510
1511
1512 vma = mm->mmap;
1513
1514
1515 rb_node = mm->mm_rb.rb_node;
1516
1517 while (rb_node) {
1518 struct vm_area_struct *vma_tmp;
1519 vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
1520
1521 if (addr < vma_tmp->vm_end) {
1522 rb_node = rb_node->rb_left;
1523 } else {
1524 prev = vma_tmp;
1525 if (!prev->vm_next || (addr < prev->vm_next->vm_end))
1526 break;
1527 rb_node = rb_node->rb_right;
1528 }
1529 }
1530
1531out:
1532 *pprev = prev;
1533 return prev ? prev->vm_next : vma;
1534}
1535
1536
1537
1538
1539
1540
1541static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, unsigned long grow)
1542{
1543 struct mm_struct *mm = vma->vm_mm;
1544 struct rlimit *rlim = current->signal->rlim;
1545 unsigned long new_start;
1546
1547
1548 if (!may_expand_vm(mm, grow))
1549 return -ENOMEM;
1550
1551
1552 if (size > rlim[RLIMIT_STACK].rlim_cur)
1553 return -ENOMEM;
1554
1555
1556 if (vma->vm_flags & VM_LOCKED) {
1557 unsigned long locked;
1558 unsigned long limit;
1559 locked = mm->locked_vm + grow;
1560 limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
1561 if (locked > limit && !capable(CAP_IPC_LOCK))
1562 return -ENOMEM;
1563 }
1564
1565
1566 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
1567 vma->vm_end - size;
1568 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
1569 return -EFAULT;
1570
1571
1572
1573
1574
1575 if (security_vm_enough_memory(grow))
1576 return -ENOMEM;
1577
1578
1579 mm->total_vm += grow;
1580 if (vma->vm_flags & VM_LOCKED)
1581 mm->locked_vm += grow;
1582 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
1583 return 0;
1584}
1585
1586#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
1587
1588
1589
1590
1591#ifndef CONFIG_IA64
1592static inline
1593#endif
1594int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1595{
1596 int error;
1597
1598 if (!(vma->vm_flags & VM_GROWSUP))
1599 return -EFAULT;
1600
1601
1602
1603
1604
1605 if (unlikely(anon_vma_prepare(vma)))
1606 return -ENOMEM;
1607 anon_vma_lock(vma);
1608
1609
1610
1611
1612
1613
1614
1615 if (address < PAGE_ALIGN(address+4))
1616 address = PAGE_ALIGN(address+4);
1617 else {
1618 anon_vma_unlock(vma);
1619 return -ENOMEM;
1620 }
1621 error = 0;
1622
1623
1624 if (address > vma->vm_end) {
1625 unsigned long size, grow;
1626
1627 size = address - vma->vm_start;
1628 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1629
1630 error = acct_stack_growth(vma, size, grow);
1631 if (!error)
1632 vma->vm_end = address;
1633 }
1634 anon_vma_unlock(vma);
1635 return error;
1636}
1637#endif
1638
1639
1640
1641
1642static inline int expand_downwards(struct vm_area_struct *vma,
1643 unsigned long address)
1644{
1645 int error;
1646
1647
1648
1649
1650
1651 if (unlikely(anon_vma_prepare(vma)))
1652 return -ENOMEM;
1653
1654 address &= PAGE_MASK;
1655 error = security_file_mmap(NULL, 0, 0, 0, address, 1);
1656 if (error)
1657 return error;
1658
1659 anon_vma_lock(vma);
1660
1661
1662
1663
1664
1665
1666
1667
1668 if (address < vma->vm_start) {
1669 unsigned long size, grow;
1670
1671 size = vma->vm_end - address;
1672 grow = (vma->vm_start - address) >> PAGE_SHIFT;
1673
1674 error = acct_stack_growth(vma, size, grow);
1675 if (!error) {
1676 vma->vm_start = address;
1677 vma->vm_pgoff -= grow;
1678 }
1679 }
1680 anon_vma_unlock(vma);
1681 return error;
1682}
1683
1684int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
1685{
1686 return expand_downwards(vma, address);
1687}
1688
1689#ifdef CONFIG_STACK_GROWSUP
1690int expand_stack(struct vm_area_struct *vma, unsigned long address)
1691{
1692 return expand_upwards(vma, address);
1693}
1694
1695struct vm_area_struct *
1696find_extend_vma(struct mm_struct *mm, unsigned long addr)
1697{
1698 struct vm_area_struct *vma, *prev;
1699
1700 addr &= PAGE_MASK;
1701 vma = find_vma_prev(mm, addr, &prev);
1702 if (vma && (vma->vm_start <= addr))
1703 return vma;
1704 if (!prev || expand_stack(prev, addr))
1705 return NULL;
1706 if (prev->vm_flags & VM_LOCKED)
1707 make_pages_present(addr, prev->vm_end);
1708 return prev;
1709}
1710#else
1711int expand_stack(struct vm_area_struct *vma, unsigned long address)
1712{
1713 return expand_downwards(vma, address);
1714}
1715
1716struct vm_area_struct *
1717find_extend_vma(struct mm_struct * mm, unsigned long addr)
1718{
1719 struct vm_area_struct * vma;
1720 unsigned long start;
1721
1722 addr &= PAGE_MASK;
1723 vma = find_vma(mm,addr);
1724 if (!vma)
1725 return NULL;
1726 if (vma->vm_start <= addr)
1727 return vma;
1728 if (!(vma->vm_flags & VM_GROWSDOWN))
1729 return NULL;
1730 start = vma->vm_start;
1731 if (expand_stack(vma, addr))
1732 return NULL;
1733 if (vma->vm_flags & VM_LOCKED)
1734 make_pages_present(addr, start);
1735 return vma;
1736}
1737#endif
1738
1739
1740
1741
1742
1743
1744
1745static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
1746{
1747
1748 update_hiwater_vm(mm);
1749 do {
1750 long nrpages = vma_pages(vma);
1751
1752 mm->total_vm -= nrpages;
1753 if (vma->vm_flags & VM_LOCKED)
1754 mm->locked_vm -= nrpages;
1755 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
1756 vma = remove_vma(vma);
1757 } while (vma);
1758 validate_mm(mm);
1759}
1760
1761
1762
1763
1764
1765
1766static void unmap_region(struct mm_struct *mm,
1767 struct vm_area_struct *vma, struct vm_area_struct *prev,
1768 unsigned long start, unsigned long end)
1769{
1770 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
1771 struct mmu_gather *tlb;
1772 unsigned long nr_accounted = 0;
1773
1774 lru_add_drain();
1775 tlb = tlb_gather_mmu(mm, 0);
1776 update_hiwater_rss(mm);
1777 unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
1778 vm_unacct_memory(nr_accounted);
1779 free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
1780 next? next->vm_start: 0);
1781 tlb_finish_mmu(tlb, start, end);
1782}
1783
1784
1785
1786
1787
1788static void
1789detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1790 struct vm_area_struct *prev, unsigned long end)
1791{
1792 struct vm_area_struct **insertion_point;
1793 struct vm_area_struct *tail_vma = NULL;
1794 unsigned long addr;
1795
1796 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1797 do {
1798 rb_erase(&vma->vm_rb, &mm->mm_rb);
1799 mm->map_count--;
1800 tail_vma = vma;
1801 vma = vma->vm_next;
1802 } while (vma && vma->vm_start < end);
1803 *insertion_point = vma;
1804 tail_vma->vm_next = NULL;
1805 if (mm->unmap_area == arch_unmap_area)
1806 addr = prev ? prev->vm_end : mm->mmap_base;
1807 else
1808 addr = vma ? vma->vm_start : mm->mmap_base;
1809 mm->unmap_area(mm, addr);
1810 mm->mmap_cache = NULL;
1811}
1812
1813
1814
1815
1816
1817int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1818 unsigned long addr, int new_below)
1819{
1820 struct mempolicy *pol;
1821 struct vm_area_struct *new;
1822
1823 if (is_vm_hugetlb_page(vma) && (addr &
1824 ~(huge_page_mask(hstate_vma(vma)))))
1825 return -EINVAL;
1826
1827 if (mm->map_count >= sysctl_max_map_count)
1828 return -ENOMEM;
1829
1830 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1831 if (!new)
1832 return -ENOMEM;
1833
1834
1835 *new = *vma;
1836
1837 if (new_below)
1838 new->vm_end = addr;
1839 else {
1840 new->vm_start = addr;
1841 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
1842 }
1843
1844 pol = mpol_dup(vma_policy(vma));
1845 if (IS_ERR(pol)) {
1846 kmem_cache_free(vm_area_cachep, new);
1847 return PTR_ERR(pol);
1848 }
1849 vma_set_policy(new, pol);
1850
1851 if (new->vm_file) {
1852 get_file(new->vm_file);
1853 if (vma->vm_flags & VM_EXECUTABLE)
1854 added_exe_file_vma(mm);
1855 }
1856
1857 if (new->vm_ops && new->vm_ops->open)
1858 new->vm_ops->open(new);
1859
1860 if (new_below)
1861 vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1862 ((addr - new->vm_start) >> PAGE_SHIFT), new);
1863 else
1864 vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1865
1866 return 0;
1867}
1868
1869
1870
1871
1872
1873
1874int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1875{
1876 unsigned long end;
1877 struct vm_area_struct *vma, *prev, *last;
1878
1879 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
1880 return -EINVAL;
1881
1882 if ((len = PAGE_ALIGN(len)) == 0)
1883 return -EINVAL;
1884
1885
1886 vma = find_vma_prev(mm, start, &prev);
1887 if (!vma)
1888 return 0;
1889
1890
1891
1892 end = start + len;
1893 if (vma->vm_start >= end)
1894 return 0;
1895
1896
1897
1898
1899
1900
1901
1902
1903 if (start > vma->vm_start) {
1904 int error = split_vma(mm, vma, start, 0);
1905 if (error)
1906 return error;
1907 prev = vma;
1908 }
1909
1910
1911 last = find_vma(mm, end);
1912 if (last && end > last->vm_start) {
1913 int error = split_vma(mm, last, end, 1);
1914 if (error)
1915 return error;
1916 }
1917 vma = prev? prev->vm_next: mm->mmap;
1918
1919
1920
1921
1922 detach_vmas_to_be_unmapped(mm, vma, prev, end);
1923 unmap_region(mm, vma, prev, start, end);
1924
1925
1926 remove_vma_list(mm, vma);
1927
1928 return 0;
1929}
1930
1931EXPORT_SYMBOL(do_munmap);
1932
1933SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
1934{
1935 int ret;
1936 struct mm_struct *mm = current->mm;
1937
1938 profile_munmap(addr);
1939
1940 down_write(&mm->mmap_sem);
1941 ret = do_munmap(mm, addr, len);
1942 up_write(&mm->mmap_sem);
1943 return ret;
1944}
1945
1946static inline void verify_mm_writelocked(struct mm_struct *mm)
1947{
1948#ifdef CONFIG_DEBUG_VM
1949 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
1950 WARN_ON(1);
1951 up_read(&mm->mmap_sem);
1952 }
1953#endif
1954}
1955
1956
1957
1958
1959
1960
1961unsigned long do_brk(unsigned long addr, unsigned long len)
1962{
1963 struct mm_struct * mm = current->mm;
1964 struct vm_area_struct * vma, * prev;
1965 unsigned long flags;
1966 struct rb_node ** rb_link, * rb_parent;
1967 pgoff_t pgoff = addr >> PAGE_SHIFT;
1968 int error;
1969
1970 len = PAGE_ALIGN(len);
1971 if (!len)
1972 return addr;
1973
1974 if ((addr + len) > TASK_SIZE || (addr + len) < addr)
1975 return -EINVAL;
1976
1977 if (is_hugepage_only_range(mm, addr, len))
1978 return -EINVAL;
1979
1980 error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
1981 if (error)
1982 return error;
1983
1984 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
1985
1986 error = arch_mmap_check(addr, len, flags);
1987 if (error)
1988 return error;
1989
1990
1991
1992
1993 if (mm->def_flags & VM_LOCKED) {
1994 unsigned long locked, lock_limit;
1995 locked = len >> PAGE_SHIFT;
1996 locked += mm->locked_vm;
1997 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
1998 lock_limit >>= PAGE_SHIFT;
1999 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2000 return -EAGAIN;
2001 }
2002
2003
2004
2005
2006
2007 verify_mm_writelocked(mm);
2008
2009
2010
2011
2012 munmap_back:
2013 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2014 if (vma && vma->vm_start < addr + len) {
2015 if (do_munmap(mm, addr, len))
2016 return -ENOMEM;
2017 goto munmap_back;
2018 }
2019
2020
2021 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2022 return -ENOMEM;
2023
2024 if (mm->map_count > sysctl_max_map_count)
2025 return -ENOMEM;
2026
2027 if (security_vm_enough_memory(len >> PAGE_SHIFT))
2028 return -ENOMEM;
2029
2030
2031 if (vma_merge(mm, prev, addr, addr + len, flags,
2032 NULL, NULL, pgoff, NULL))
2033 goto out;
2034
2035
2036
2037
2038 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2039 if (!vma) {
2040 vm_unacct_memory(len >> PAGE_SHIFT);
2041 return -ENOMEM;
2042 }
2043
2044 vma->vm_mm = mm;
2045 vma->vm_start = addr;
2046 vma->vm_end = addr + len;
2047 vma->vm_pgoff = pgoff;
2048 vma->vm_flags = flags;
2049 vma->vm_page_prot = vm_get_page_prot(flags);
2050 vma_link(mm, vma, prev, rb_link, rb_parent);
2051out:
2052 mm->total_vm += len >> PAGE_SHIFT;
2053 if (flags & VM_LOCKED) {
2054 mm->locked_vm += len >> PAGE_SHIFT;
2055 make_pages_present(addr, addr + len);
2056 }
2057 return addr;
2058}
2059
2060EXPORT_SYMBOL(do_brk);
2061
2062
2063void exit_mmap(struct mm_struct *mm)
2064{
2065 struct mmu_gather *tlb;
2066 struct vm_area_struct *vma = mm->mmap;
2067 unsigned long nr_accounted = 0;
2068 unsigned long end;
2069
2070
2071 arch_exit_mmap(mm);
2072 mmu_notifier_release(mm);
2073
2074 if (!mm->mmap)
2075 return;
2076
2077 lru_add_drain();
2078 flush_cache_mm(mm);
2079 tlb = tlb_gather_mmu(mm, 1);
2080
2081
2082 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
2083 vm_unacct_memory(nr_accounted);
2084 free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
2085 tlb_finish_mmu(tlb, 0, end);
2086
2087
2088
2089
2090
2091 while (vma)
2092 vma = remove_vma(vma);
2093
2094 BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2095}
2096
2097
2098
2099
2100
2101int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2102{
2103 struct vm_area_struct * __vma, * prev;
2104 struct rb_node ** rb_link, * rb_parent;
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118 if (!vma->vm_file) {
2119 BUG_ON(vma->anon_vma);
2120 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2121 }
2122 __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
2123 if (__vma && __vma->vm_start < vma->vm_end)
2124 return -ENOMEM;
2125 if ((vma->vm_flags & VM_ACCOUNT) &&
2126 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2127 return -ENOMEM;
2128 vma_link(mm, vma, prev, rb_link, rb_parent);
2129 return 0;
2130}
2131
2132
2133
2134
2135
2136struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2137 unsigned long addr, unsigned long len, pgoff_t pgoff)
2138{
2139 struct vm_area_struct *vma = *vmap;
2140 unsigned long vma_start = vma->vm_start;
2141 struct mm_struct *mm = vma->vm_mm;
2142 struct vm_area_struct *new_vma, *prev;
2143 struct rb_node **rb_link, *rb_parent;
2144 struct mempolicy *pol;
2145
2146
2147
2148
2149
2150 if (!vma->vm_file && !vma->anon_vma)
2151 pgoff = addr >> PAGE_SHIFT;
2152
2153 find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2154 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2155 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2156 if (new_vma) {
2157
2158
2159
2160 if (vma_start >= new_vma->vm_start &&
2161 vma_start < new_vma->vm_end)
2162 *vmap = new_vma;
2163 } else {
2164 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2165 if (new_vma) {
2166 *new_vma = *vma;
2167 pol = mpol_dup(vma_policy(vma));
2168 if (IS_ERR(pol)) {
2169 kmem_cache_free(vm_area_cachep, new_vma);
2170 return NULL;
2171 }
2172 vma_set_policy(new_vma, pol);
2173 new_vma->vm_start = addr;
2174 new_vma->vm_end = addr + len;
2175 new_vma->vm_pgoff = pgoff;
2176 if (new_vma->vm_file) {
2177 get_file(new_vma->vm_file);
2178 if (vma->vm_flags & VM_EXECUTABLE)
2179 added_exe_file_vma(mm);
2180 }
2181 if (new_vma->vm_ops && new_vma->vm_ops->open)
2182 new_vma->vm_ops->open(new_vma);
2183 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2184 }
2185 }
2186 return new_vma;
2187}
2188
2189
2190
2191
2192
2193int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2194{
2195 unsigned long cur = mm->total_vm;
2196 unsigned long lim;
2197
2198 lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
2199
2200 if (cur + npages > lim)
2201 return 0;
2202 return 1;
2203}
2204
2205
2206static int special_mapping_fault(struct vm_area_struct *vma,
2207 struct vm_fault *vmf)
2208{
2209 pgoff_t pgoff;
2210 struct page **pages;
2211
2212
2213
2214
2215
2216
2217
2218 pgoff = vmf->pgoff - vma->vm_pgoff;
2219
2220 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
2221 pgoff--;
2222
2223 if (*pages) {
2224 struct page *page = *pages;
2225 get_page(page);
2226 vmf->page = page;
2227 return 0;
2228 }
2229
2230 return VM_FAULT_SIGBUS;
2231}
2232
2233
2234
2235
2236static void special_mapping_close(struct vm_area_struct *vma)
2237{
2238}
2239
2240static struct vm_operations_struct special_mapping_vmops = {
2241 .close = special_mapping_close,
2242 .fault = special_mapping_fault,
2243};
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254int install_special_mapping(struct mm_struct *mm,
2255 unsigned long addr, unsigned long len,
2256 unsigned long vm_flags, struct page **pages)
2257{
2258 struct vm_area_struct *vma;
2259
2260 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2261 if (unlikely(vma == NULL))
2262 return -ENOMEM;
2263
2264 vma->vm_mm = mm;
2265 vma->vm_start = addr;
2266 vma->vm_end = addr + len;
2267
2268 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
2269 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2270
2271 vma->vm_ops = &special_mapping_vmops;
2272 vma->vm_private_data = pages;
2273
2274 if (unlikely(insert_vm_struct(mm, vma))) {
2275 kmem_cache_free(vm_area_cachep, vma);
2276 return -ENOMEM;
2277 }
2278
2279 mm->total_vm += len >> PAGE_SHIFT;
2280
2281 return 0;
2282}
2283
2284static DEFINE_MUTEX(mm_all_locks_mutex);
2285
2286static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2287{
2288 if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2289
2290
2291
2292
2293 spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303 if (__test_and_set_bit(0, (unsigned long *)
2304 &anon_vma->head.next))
2305 BUG();
2306 }
2307}
2308
2309static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2310{
2311 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2322 BUG();
2323 spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
2324 }
2325}
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359int mm_take_all_locks(struct mm_struct *mm)
2360{
2361 struct vm_area_struct *vma;
2362 int ret = -EINTR;
2363
2364 BUG_ON(down_read_trylock(&mm->mmap_sem));
2365
2366 mutex_lock(&mm_all_locks_mutex);
2367
2368 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2369 if (signal_pending(current))
2370 goto out_unlock;
2371 if (vma->vm_file && vma->vm_file->f_mapping)
2372 vm_lock_mapping(mm, vma->vm_file->f_mapping);
2373 }
2374
2375 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2376 if (signal_pending(current))
2377 goto out_unlock;
2378 if (vma->anon_vma)
2379 vm_lock_anon_vma(mm, vma->anon_vma);
2380 }
2381
2382 ret = 0;
2383
2384out_unlock:
2385 if (ret)
2386 mm_drop_all_locks(mm);
2387
2388 return ret;
2389}
2390
2391static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2392{
2393 if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406 if (!__test_and_clear_bit(0, (unsigned long *)
2407 &anon_vma->head.next))
2408 BUG();
2409 spin_unlock(&anon_vma->lock);
2410 }
2411}
2412
2413static void vm_unlock_mapping(struct address_space *mapping)
2414{
2415 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2416
2417
2418
2419
2420 spin_unlock(&mapping->i_mmap_lock);
2421 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
2422 &mapping->flags))
2423 BUG();
2424 }
2425}
2426
2427
2428
2429
2430
2431void mm_drop_all_locks(struct mm_struct *mm)
2432{
2433 struct vm_area_struct *vma;
2434
2435 BUG_ON(down_read_trylock(&mm->mmap_sem));
2436 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2437
2438 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2439 if (vma->anon_vma)
2440 vm_unlock_anon_vma(vma->anon_vma);
2441 if (vma->vm_file && vma->vm_file->f_mapping)
2442 vm_unlock_mapping(vma->vm_file->f_mapping);
2443 }
2444
2445 mutex_unlock(&mm_all_locks_mutex);
2446}
2447