1
2
3
4
5
6
7
8
9#include <linux/slab.h>
10#include <linux/backing-dev.h>
11#include <linux/mm.h>
12#include <linux/shm.h>
13#include <linux/mman.h>
14#include <linux/pagemap.h>
15#include <linux/swap.h>
16#include <linux/syscalls.h>
17#include <linux/capability.h>
18#include <linux/init.h>
19#include <linux/file.h>
20#include <linux/fs.h>
21#include <linux/personality.h>
22#include <linux/security.h>
23#include <linux/hugetlb.h>
24#include <linux/profile.h>
25#include <linux/module.h>
26#include <linux/mount.h>
27#include <linux/mempolicy.h>
28#include <linux/rmap.h>
29#include <linux/mmu_notifier.h>
30
31#include <asm/uaccess.h>
32#include <asm/cacheflush.h>
33#include <asm/tlb.h>
34#include <asm/mmu_context.h>
35
36#include "internal.h"
37
38#ifndef arch_mmap_check
39#define arch_mmap_check(addr, len, flags) (0)
40#endif
41
42#ifndef arch_rebalance_pgtables
43#define arch_rebalance_pgtables(addr, len) (addr)
44#endif
45
46static void unmap_region(struct mm_struct *mm,
47 struct vm_area_struct *vma, struct vm_area_struct *prev,
48 unsigned long start, unsigned long end);
49
50
51
52
53
54#undef DEBUG_MM_RB
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71pgprot_t protection_map[16] = {
72 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
73 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
74};
75
76pgprot_t vm_get_page_prot(unsigned long vm_flags)
77{
78 return __pgprot(pgprot_val(protection_map[vm_flags &
79 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
80 pgprot_val(arch_vm_get_page_prot(vm_flags)));
81}
82EXPORT_SYMBOL(vm_get_page_prot);
83
84int sysctl_overcommit_memory = OVERCOMMIT_GUESS;
85int sysctl_overcommit_ratio = 50;
86int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
87atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
106{
107 unsigned long free, allowed;
108
109 vm_acct_memory(pages);
110
111
112
113
114 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
115 return 0;
116
117 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
118 unsigned long n;
119
120 free = global_page_state(NR_FILE_PAGES);
121 free += nr_swap_pages;
122
123
124
125
126
127
128
129 free += global_page_state(NR_SLAB_RECLAIMABLE);
130
131
132
133
134 if (!cap_sys_admin)
135 free -= free / 32;
136
137 if (free > pages)
138 return 0;
139
140
141
142
143
144 n = nr_free_pages();
145
146
147
148
149 if (n <= totalreserve_pages)
150 goto error;
151 else
152 n -= totalreserve_pages;
153
154
155
156
157 if (!cap_sys_admin)
158 n -= n / 32;
159 free += n;
160
161 if (free > pages)
162 return 0;
163
164 goto error;
165 }
166
167 allowed = (totalram_pages - hugetlb_total_pages())
168 * sysctl_overcommit_ratio / 100;
169
170
171
172 if (!cap_sys_admin)
173 allowed -= allowed / 32;
174 allowed += total_swap_pages;
175
176
177
178 if (mm)
179 allowed -= mm->total_vm / 32;
180
181
182
183
184
185 if (atomic_long_read(&vm_committed_space) < (long)allowed)
186 return 0;
187error:
188 vm_unacct_memory(pages);
189
190 return -ENOMEM;
191}
192
193
194
195
196static void __remove_shared_vm_struct(struct vm_area_struct *vma,
197 struct file *file, struct address_space *mapping)
198{
199 if (vma->vm_flags & VM_DENYWRITE)
200 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
201 if (vma->vm_flags & VM_SHARED)
202 mapping->i_mmap_writable--;
203
204 flush_dcache_mmap_lock(mapping);
205 if (unlikely(vma->vm_flags & VM_NONLINEAR))
206 list_del_init(&vma->shared.vm_set.list);
207 else
208 vma_prio_tree_remove(vma, &mapping->i_mmap);
209 flush_dcache_mmap_unlock(mapping);
210}
211
212
213
214
215
216void unlink_file_vma(struct vm_area_struct *vma)
217{
218 struct file *file = vma->vm_file;
219
220 if (file) {
221 struct address_space *mapping = file->f_mapping;
222 spin_lock(&mapping->i_mmap_lock);
223 __remove_shared_vm_struct(vma, file, mapping);
224 spin_unlock(&mapping->i_mmap_lock);
225 }
226}
227
228
229
230
231static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
232{
233 struct vm_area_struct *next = vma->vm_next;
234
235 might_sleep();
236 if (vma->vm_ops && vma->vm_ops->close)
237 vma->vm_ops->close(vma);
238 if (vma->vm_file) {
239 fput(vma->vm_file);
240 if (vma->vm_flags & VM_EXECUTABLE)
241 removed_exe_file_vma(vma->vm_mm);
242 }
243 mpol_put(vma_policy(vma));
244 kmem_cache_free(vm_area_cachep, vma);
245 return next;
246}
247
248SYSCALL_DEFINE1(brk, unsigned long, brk)
249{
250 unsigned long rlim, retval;
251 unsigned long newbrk, oldbrk;
252 struct mm_struct *mm = current->mm;
253 unsigned long min_brk;
254
255 down_write(&mm->mmap_sem);
256
257#ifdef CONFIG_COMPAT_BRK
258 min_brk = mm->end_code;
259#else
260 min_brk = mm->start_brk;
261#endif
262 if (brk < min_brk)
263 goto out;
264
265
266
267
268
269
270
271 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
272 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
273 (mm->end_data - mm->start_data) > rlim)
274 goto out;
275
276 newbrk = PAGE_ALIGN(brk);
277 oldbrk = PAGE_ALIGN(mm->brk);
278 if (oldbrk == newbrk)
279 goto set_brk;
280
281
282 if (brk <= mm->brk) {
283 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
284 goto set_brk;
285 goto out;
286 }
287
288
289 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
290 goto out;
291
292
293 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
294 goto out;
295set_brk:
296 mm->brk = brk;
297out:
298 retval = mm->brk;
299 up_write(&mm->mmap_sem);
300 return retval;
301}
302
303#ifdef DEBUG_MM_RB
304static int browse_rb(struct rb_root *root)
305{
306 int i = 0, j;
307 struct rb_node *nd, *pn = NULL;
308 unsigned long prev = 0, pend = 0;
309
310 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
311 struct vm_area_struct *vma;
312 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
313 if (vma->vm_start < prev)
314 printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
315 if (vma->vm_start < pend)
316 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
317 if (vma->vm_start > vma->vm_end)
318 printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
319 i++;
320 pn = nd;
321 prev = vma->vm_start;
322 pend = vma->vm_end;
323 }
324 j = 0;
325 for (nd = pn; nd; nd = rb_prev(nd)) {
326 j++;
327 }
328 if (i != j)
329 printk("backwards %d, forwards %d\n", j, i), i = 0;
330 return i;
331}
332
333void validate_mm(struct mm_struct *mm)
334{
335 int bug = 0;
336 int i = 0;
337 struct vm_area_struct *tmp = mm->mmap;
338 while (tmp) {
339 tmp = tmp->vm_next;
340 i++;
341 }
342 if (i != mm->map_count)
343 printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
344 i = browse_rb(&mm->mm_rb);
345 if (i != mm->map_count)
346 printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
347 BUG_ON(bug);
348}
349#else
350#define validate_mm(mm) do { } while (0)
351#endif
352
353static struct vm_area_struct *
354find_vma_prepare(struct mm_struct *mm, unsigned long addr,
355 struct vm_area_struct **pprev, struct rb_node ***rb_link,
356 struct rb_node ** rb_parent)
357{
358 struct vm_area_struct * vma;
359 struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
360
361 __rb_link = &mm->mm_rb.rb_node;
362 rb_prev = __rb_parent = NULL;
363 vma = NULL;
364
365 while (*__rb_link) {
366 struct vm_area_struct *vma_tmp;
367
368 __rb_parent = *__rb_link;
369 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
370
371 if (vma_tmp->vm_end > addr) {
372 vma = vma_tmp;
373 if (vma_tmp->vm_start <= addr)
374 break;
375 __rb_link = &__rb_parent->rb_left;
376 } else {
377 rb_prev = __rb_parent;
378 __rb_link = &__rb_parent->rb_right;
379 }
380 }
381
382 *pprev = NULL;
383 if (rb_prev)
384 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
385 *rb_link = __rb_link;
386 *rb_parent = __rb_parent;
387 return vma;
388}
389
390static inline void
391__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
392 struct vm_area_struct *prev, struct rb_node *rb_parent)
393{
394 if (prev) {
395 vma->vm_next = prev->vm_next;
396 prev->vm_next = vma;
397 } else {
398 mm->mmap = vma;
399 if (rb_parent)
400 vma->vm_next = rb_entry(rb_parent,
401 struct vm_area_struct, vm_rb);
402 else
403 vma->vm_next = NULL;
404 }
405}
406
407void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
408 struct rb_node **rb_link, struct rb_node *rb_parent)
409{
410 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
411 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
412}
413
414static void __vma_link_file(struct vm_area_struct *vma)
415{
416 struct file *file;
417
418 file = vma->vm_file;
419 if (file) {
420 struct address_space *mapping = file->f_mapping;
421
422 if (vma->vm_flags & VM_DENYWRITE)
423 atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
424 if (vma->vm_flags & VM_SHARED)
425 mapping->i_mmap_writable++;
426
427 flush_dcache_mmap_lock(mapping);
428 if (unlikely(vma->vm_flags & VM_NONLINEAR))
429 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
430 else
431 vma_prio_tree_insert(vma, &mapping->i_mmap);
432 flush_dcache_mmap_unlock(mapping);
433 }
434}
435
436static void
437__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
438 struct vm_area_struct *prev, struct rb_node **rb_link,
439 struct rb_node *rb_parent)
440{
441 __vma_link_list(mm, vma, prev, rb_parent);
442 __vma_link_rb(mm, vma, rb_link, rb_parent);
443 __anon_vma_link(vma);
444}
445
446static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
447 struct vm_area_struct *prev, struct rb_node **rb_link,
448 struct rb_node *rb_parent)
449{
450 struct address_space *mapping = NULL;
451
452 if (vma->vm_file)
453 mapping = vma->vm_file->f_mapping;
454
455 if (mapping) {
456 spin_lock(&mapping->i_mmap_lock);
457 vma->vm_truncate_count = mapping->truncate_count;
458 }
459 anon_vma_lock(vma);
460
461 __vma_link(mm, vma, prev, rb_link, rb_parent);
462 __vma_link_file(vma);
463
464 anon_vma_unlock(vma);
465 if (mapping)
466 spin_unlock(&mapping->i_mmap_lock);
467
468 mm->map_count++;
469 validate_mm(mm);
470}
471
472
473
474
475
476
477static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
478{
479 struct vm_area_struct *__vma, *prev;
480 struct rb_node **rb_link, *rb_parent;
481
482 __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
483 BUG_ON(__vma && __vma->vm_start < vma->vm_end);
484 __vma_link(mm, vma, prev, rb_link, rb_parent);
485 mm->map_count++;
486}
487
488static inline void
489__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
490 struct vm_area_struct *prev)
491{
492 prev->vm_next = vma->vm_next;
493 rb_erase(&vma->vm_rb, &mm->mm_rb);
494 if (mm->mmap_cache == vma)
495 mm->mmap_cache = prev;
496}
497
498
499
500
501
502
503
504
505void vma_adjust(struct vm_area_struct *vma, unsigned long start,
506 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
507{
508 struct mm_struct *mm = vma->vm_mm;
509 struct vm_area_struct *next = vma->vm_next;
510 struct vm_area_struct *importer = NULL;
511 struct address_space *mapping = NULL;
512 struct prio_tree_root *root = NULL;
513 struct file *file = vma->vm_file;
514 struct anon_vma *anon_vma = NULL;
515 long adjust_next = 0;
516 int remove_next = 0;
517
518 if (next && !insert) {
519 if (end >= next->vm_end) {
520
521
522
523
524again: remove_next = 1 + (end > next->vm_end);
525 end = next->vm_end;
526 anon_vma = next->anon_vma;
527 importer = vma;
528 } else if (end > next->vm_start) {
529
530
531
532
533 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
534 anon_vma = next->anon_vma;
535 importer = vma;
536 } else if (end < vma->vm_end) {
537
538
539
540
541
542 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
543 anon_vma = next->anon_vma;
544 importer = next;
545 }
546 }
547
548 if (file) {
549 mapping = file->f_mapping;
550 if (!(vma->vm_flags & VM_NONLINEAR))
551 root = &mapping->i_mmap;
552 spin_lock(&mapping->i_mmap_lock);
553 if (importer &&
554 vma->vm_truncate_count != next->vm_truncate_count) {
555
556
557
558
559 importer->vm_truncate_count = 0;
560 }
561 if (insert) {
562 insert->vm_truncate_count = vma->vm_truncate_count;
563
564
565
566
567
568
569 __vma_link_file(insert);
570 }
571 }
572
573
574
575
576
577 if (vma->anon_vma)
578 anon_vma = vma->anon_vma;
579 if (anon_vma) {
580 spin_lock(&anon_vma->lock);
581
582
583
584
585
586 if (importer && !importer->anon_vma) {
587 importer->anon_vma = anon_vma;
588 __anon_vma_link(importer);
589 }
590 }
591
592 if (root) {
593 flush_dcache_mmap_lock(mapping);
594 vma_prio_tree_remove(vma, root);
595 if (adjust_next)
596 vma_prio_tree_remove(next, root);
597 }
598
599 vma->vm_start = start;
600 vma->vm_end = end;
601 vma->vm_pgoff = pgoff;
602 if (adjust_next) {
603 next->vm_start += adjust_next << PAGE_SHIFT;
604 next->vm_pgoff += adjust_next;
605 }
606
607 if (root) {
608 if (adjust_next)
609 vma_prio_tree_insert(next, root);
610 vma_prio_tree_insert(vma, root);
611 flush_dcache_mmap_unlock(mapping);
612 }
613
614 if (remove_next) {
615
616
617
618
619 __vma_unlink(mm, next, vma);
620 if (file)
621 __remove_shared_vm_struct(next, file, mapping);
622 if (next->anon_vma)
623 __anon_vma_merge(vma, next);
624 } else if (insert) {
625
626
627
628
629
630 __insert_vm_struct(mm, insert);
631 }
632
633 if (anon_vma)
634 spin_unlock(&anon_vma->lock);
635 if (mapping)
636 spin_unlock(&mapping->i_mmap_lock);
637
638 if (remove_next) {
639 if (file) {
640 fput(file);
641 if (next->vm_flags & VM_EXECUTABLE)
642 removed_exe_file_vma(mm);
643 }
644 mm->map_count--;
645 mpol_put(vma_policy(next));
646 kmem_cache_free(vm_area_cachep, next);
647
648
649
650
651
652 if (remove_next == 2) {
653 next = vma->vm_next;
654 goto again;
655 }
656 }
657
658 validate_mm(mm);
659}
660
661
662#define VM_MERGEABLE_FLAGS (VM_CAN_NONLINEAR)
663
664
665
666
667
668static inline int is_mergeable_vma(struct vm_area_struct *vma,
669 struct file *file, unsigned long vm_flags)
670{
671 if ((vma->vm_flags ^ vm_flags) & ~VM_MERGEABLE_FLAGS)
672 return 0;
673 if (vma->vm_file != file)
674 return 0;
675 if (vma->vm_ops && vma->vm_ops->close)
676 return 0;
677 return 1;
678}
679
680static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
681 struct anon_vma *anon_vma2)
682{
683 return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
684}
685
686
687
688
689
690
691
692
693
694
695
696
697static int
698can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
699 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
700{
701 if (is_mergeable_vma(vma, file, vm_flags) &&
702 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
703 if (vma->vm_pgoff == vm_pgoff)
704 return 1;
705 }
706 return 0;
707}
708
709
710
711
712
713
714
715
716static int
717can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
718 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
719{
720 if (is_mergeable_vma(vma, file, vm_flags) &&
721 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
722 pgoff_t vm_pglen;
723 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
724 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
725 return 1;
726 }
727 return 0;
728}
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759struct vm_area_struct *vma_merge(struct mm_struct *mm,
760 struct vm_area_struct *prev, unsigned long addr,
761 unsigned long end, unsigned long vm_flags,
762 struct anon_vma *anon_vma, struct file *file,
763 pgoff_t pgoff, struct mempolicy *policy)
764{
765 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
766 struct vm_area_struct *area, *next;
767
768
769
770
771
772 if (vm_flags & VM_SPECIAL)
773 return NULL;
774
775 if (prev)
776 next = prev->vm_next;
777 else
778 next = mm->mmap;
779 area = next;
780 if (next && next->vm_end == end)
781 next = next->vm_next;
782
783
784
785
786 if (prev && prev->vm_end == addr &&
787 mpol_equal(vma_policy(prev), policy) &&
788 can_vma_merge_after(prev, vm_flags,
789 anon_vma, file, pgoff)) {
790
791
792
793 if (next && end == next->vm_start &&
794 mpol_equal(policy, vma_policy(next)) &&
795 can_vma_merge_before(next, vm_flags,
796 anon_vma, file, pgoff+pglen) &&
797 is_mergeable_anon_vma(prev->anon_vma,
798 next->anon_vma)) {
799
800 vma_adjust(prev, prev->vm_start,
801 next->vm_end, prev->vm_pgoff, NULL);
802 } else
803 vma_adjust(prev, prev->vm_start,
804 end, prev->vm_pgoff, NULL);
805 return prev;
806 }
807
808
809
810
811 if (next && end == next->vm_start &&
812 mpol_equal(policy, vma_policy(next)) &&
813 can_vma_merge_before(next, vm_flags,
814 anon_vma, file, pgoff+pglen)) {
815 if (prev && addr < prev->vm_end)
816 vma_adjust(prev, prev->vm_start,
817 addr, prev->vm_pgoff, NULL);
818 else
819 vma_adjust(area, addr, next->vm_end,
820 next->vm_pgoff - pglen, NULL);
821 return area;
822 }
823
824 return NULL;
825}
826
827
828
829
830
831
832
833
834
835struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
836{
837 struct vm_area_struct *near;
838 unsigned long vm_flags;
839
840 near = vma->vm_next;
841 if (!near)
842 goto try_prev;
843
844
845
846
847
848
849
850 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
851 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
852
853 if (near->anon_vma && vma->vm_end == near->vm_start &&
854 mpol_equal(vma_policy(vma), vma_policy(near)) &&
855 can_vma_merge_before(near, vm_flags,
856 NULL, vma->vm_file, vma->vm_pgoff +
857 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
858 return near->anon_vma;
859try_prev:
860
861
862
863
864
865
866
867 BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
868 if (!near)
869 goto none;
870
871 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
872 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
873
874 if (near->anon_vma && near->vm_end == vma->vm_start &&
875 mpol_equal(vma_policy(near), vma_policy(vma)) &&
876 can_vma_merge_after(near, vm_flags,
877 NULL, vma->vm_file, vma->vm_pgoff))
878 return near->anon_vma;
879none:
880
881
882
883
884
885
886
887
888 return NULL;
889}
890
891#ifdef CONFIG_PROC_FS
892void vm_stat_account(struct mm_struct *mm, unsigned long flags,
893 struct file *file, long pages)
894{
895 const unsigned long stack_flags
896 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
897
898 if (file) {
899 mm->shared_vm += pages;
900 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
901 mm->exec_vm += pages;
902 } else if (flags & stack_flags)
903 mm->stack_vm += pages;
904 if (flags & (VM_RESERVED|VM_IO))
905 mm->reserved_vm += pages;
906}
907#endif
908
909
910
911
912
913unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
914 unsigned long len, unsigned long prot,
915 unsigned long flags, unsigned long pgoff)
916{
917 struct mm_struct * mm = current->mm;
918 struct inode *inode;
919 unsigned int vm_flags;
920 int error;
921 unsigned long reqprot = prot;
922
923
924
925
926
927
928
929 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
930 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
931 prot |= PROT_EXEC;
932
933 if (!len)
934 return -EINVAL;
935
936 if (!(flags & MAP_FIXED))
937 addr = round_hint_to_min(addr);
938
939 error = arch_mmap_check(addr, len, flags);
940 if (error)
941 return error;
942
943
944 len = PAGE_ALIGN(len);
945 if (!len || len > TASK_SIZE)
946 return -ENOMEM;
947
948
949 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
950 return -EOVERFLOW;
951
952
953 if (mm->map_count > sysctl_max_map_count)
954 return -ENOMEM;
955
956
957
958
959 addr = get_unmapped_area(file, addr, len, pgoff, flags);
960 if (addr & ~PAGE_MASK)
961 return addr;
962
963
964
965
966
967 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
968 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
969
970 if (flags & MAP_LOCKED) {
971 if (!can_do_mlock())
972 return -EPERM;
973 vm_flags |= VM_LOCKED;
974 }
975
976
977 if (vm_flags & VM_LOCKED) {
978 unsigned long locked, lock_limit;
979 locked = len >> PAGE_SHIFT;
980 locked += mm->locked_vm;
981 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
982 lock_limit >>= PAGE_SHIFT;
983 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
984 return -EAGAIN;
985 }
986
987 inode = file ? file->f_path.dentry->d_inode : NULL;
988
989 if (file) {
990 switch (flags & MAP_TYPE) {
991 case MAP_SHARED:
992 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
993 return -EACCES;
994
995
996
997
998
999 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1000 return -EACCES;
1001
1002
1003
1004
1005 if (locks_verify_locked(inode))
1006 return -EAGAIN;
1007
1008 vm_flags |= VM_SHARED | VM_MAYSHARE;
1009 if (!(file->f_mode & FMODE_WRITE))
1010 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1011
1012
1013 case MAP_PRIVATE:
1014 if (!(file->f_mode & FMODE_READ))
1015 return -EACCES;
1016 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1017 if (vm_flags & VM_EXEC)
1018 return -EPERM;
1019 vm_flags &= ~VM_MAYEXEC;
1020 }
1021
1022 if (!file->f_op || !file->f_op->mmap)
1023 return -ENODEV;
1024 break;
1025
1026 default:
1027 return -EINVAL;
1028 }
1029 } else {
1030 switch (flags & MAP_TYPE) {
1031 case MAP_SHARED:
1032
1033
1034
1035 pgoff = 0;
1036 vm_flags |= VM_SHARED | VM_MAYSHARE;
1037 break;
1038 case MAP_PRIVATE:
1039
1040
1041
1042 pgoff = addr >> PAGE_SHIFT;
1043 break;
1044 default:
1045 return -EINVAL;
1046 }
1047 }
1048
1049 error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
1050 if (error)
1051 return error;
1052
1053 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1054}
1055EXPORT_SYMBOL(do_mmap_pgoff);
1056
1057
1058
1059
1060
1061
1062
1063int vma_wants_writenotify(struct vm_area_struct *vma)
1064{
1065 unsigned int vm_flags = vma->vm_flags;
1066
1067
1068 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1069 return 0;
1070
1071
1072 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1073 return 1;
1074
1075
1076 if (pgprot_val(vma->vm_page_prot) !=
1077 pgprot_val(vm_get_page_prot(vm_flags)))
1078 return 0;
1079
1080
1081 if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
1082 return 0;
1083
1084
1085 return vma->vm_file && vma->vm_file->f_mapping &&
1086 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1087}
1088
1089
1090
1091
1092
1093static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
1094{
1095
1096
1097
1098
1099 if (file && is_file_hugepages(file))
1100 return 0;
1101
1102 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1103}
1104
1105unsigned long mmap_region(struct file *file, unsigned long addr,
1106 unsigned long len, unsigned long flags,
1107 unsigned int vm_flags, unsigned long pgoff)
1108{
1109 struct mm_struct *mm = current->mm;
1110 struct vm_area_struct *vma, *prev;
1111 int correct_wcount = 0;
1112 int error;
1113 struct rb_node **rb_link, *rb_parent;
1114 unsigned long charged = 0;
1115 struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
1116
1117
1118 error = -ENOMEM;
1119munmap_back:
1120 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
1121 if (vma && vma->vm_start < addr + len) {
1122 if (do_munmap(mm, addr, len))
1123 return -ENOMEM;
1124 goto munmap_back;
1125 }
1126
1127
1128 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1129 return -ENOMEM;
1130
1131
1132
1133
1134
1135 if ((flags & MAP_NORESERVE)) {
1136
1137 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1138 vm_flags |= VM_NORESERVE;
1139
1140
1141 if (file && is_file_hugepages(file))
1142 vm_flags |= VM_NORESERVE;
1143 }
1144
1145
1146
1147
1148 if (accountable_mapping(file, vm_flags)) {
1149 charged = len >> PAGE_SHIFT;
1150 if (security_vm_enough_memory(charged))
1151 return -ENOMEM;
1152 vm_flags |= VM_ACCOUNT;
1153 }
1154
1155
1156
1157
1158 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
1159 if (vma)
1160 goto out;
1161
1162
1163
1164
1165
1166
1167 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1168 if (!vma) {
1169 error = -ENOMEM;
1170 goto unacct_error;
1171 }
1172
1173 vma->vm_mm = mm;
1174 vma->vm_start = addr;
1175 vma->vm_end = addr + len;
1176 vma->vm_flags = vm_flags;
1177 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1178 vma->vm_pgoff = pgoff;
1179
1180 if (file) {
1181 error = -EINVAL;
1182 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1183 goto free_vma;
1184 if (vm_flags & VM_DENYWRITE) {
1185 error = deny_write_access(file);
1186 if (error)
1187 goto free_vma;
1188 correct_wcount = 1;
1189 }
1190 vma->vm_file = file;
1191 get_file(file);
1192 error = file->f_op->mmap(file, vma);
1193 if (error)
1194 goto unmap_and_free_vma;
1195 if (vm_flags & VM_EXECUTABLE)
1196 added_exe_file_vma(mm);
1197 } else if (vm_flags & VM_SHARED) {
1198 error = shmem_zero_setup(vma);
1199 if (error)
1200 goto free_vma;
1201 }
1202
1203
1204
1205
1206
1207
1208 addr = vma->vm_start;
1209 pgoff = vma->vm_pgoff;
1210 vm_flags = vma->vm_flags;
1211
1212 if (vma_wants_writenotify(vma))
1213 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1214
1215 vma_link(mm, vma, prev, rb_link, rb_parent);
1216 file = vma->vm_file;
1217
1218
1219 if (correct_wcount)
1220 atomic_inc(&inode->i_writecount);
1221out:
1222 mm->total_vm += len >> PAGE_SHIFT;
1223 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1224 if (vm_flags & VM_LOCKED) {
1225
1226
1227
1228 long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
1229 if (nr_pages < 0)
1230 return nr_pages;
1231 mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
1232 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1233 make_pages_present(addr, addr + len);
1234 return addr;
1235
1236unmap_and_free_vma:
1237 if (correct_wcount)
1238 atomic_inc(&inode->i_writecount);
1239 vma->vm_file = NULL;
1240 fput(file);
1241
1242
1243 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1244 charged = 0;
1245free_vma:
1246 kmem_cache_free(vm_area_cachep, vma);
1247unacct_error:
1248 if (charged)
1249 vm_unacct_memory(charged);
1250 return error;
1251}
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264#ifndef HAVE_ARCH_UNMAPPED_AREA
1265unsigned long
1266arch_get_unmapped_area(struct file *filp, unsigned long addr,
1267 unsigned long len, unsigned long pgoff, unsigned long flags)
1268{
1269 struct mm_struct *mm = current->mm;
1270 struct vm_area_struct *vma;
1271 unsigned long start_addr;
1272
1273 if (len > TASK_SIZE)
1274 return -ENOMEM;
1275
1276 if (flags & MAP_FIXED)
1277 return addr;
1278
1279 if (addr) {
1280 addr = PAGE_ALIGN(addr);
1281 vma = find_vma(mm, addr);
1282 if (TASK_SIZE - len >= addr &&
1283 (!vma || addr + len <= vma->vm_start))
1284 return addr;
1285 }
1286 if (len > mm->cached_hole_size) {
1287 start_addr = addr = mm->free_area_cache;
1288 } else {
1289 start_addr = addr = TASK_UNMAPPED_BASE;
1290 mm->cached_hole_size = 0;
1291 }
1292
1293full_search:
1294 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
1295
1296 if (TASK_SIZE - len < addr) {
1297
1298
1299
1300
1301 if (start_addr != TASK_UNMAPPED_BASE) {
1302 addr = TASK_UNMAPPED_BASE;
1303 start_addr = addr;
1304 mm->cached_hole_size = 0;
1305 goto full_search;
1306 }
1307 return -ENOMEM;
1308 }
1309 if (!vma || addr + len <= vma->vm_start) {
1310
1311
1312
1313 mm->free_area_cache = addr + len;
1314 return addr;
1315 }
1316 if (addr + mm->cached_hole_size < vma->vm_start)
1317 mm->cached_hole_size = vma->vm_start - addr;
1318 addr = vma->vm_end;
1319 }
1320}
1321#endif
1322
1323void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1324{
1325
1326
1327
1328 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
1329 mm->free_area_cache = addr;
1330 mm->cached_hole_size = ~0UL;
1331 }
1332}
1333
1334
1335
1336
1337
1338#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1339unsigned long
1340arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1341 const unsigned long len, const unsigned long pgoff,
1342 const unsigned long flags)
1343{
1344 struct vm_area_struct *vma;
1345 struct mm_struct *mm = current->mm;
1346 unsigned long addr = addr0;
1347
1348
1349 if (len > TASK_SIZE)
1350 return -ENOMEM;
1351
1352 if (flags & MAP_FIXED)
1353 return addr;
1354
1355
1356 if (addr) {
1357 addr = PAGE_ALIGN(addr);
1358 vma = find_vma(mm, addr);
1359 if (TASK_SIZE - len >= addr &&
1360 (!vma || addr + len <= vma->vm_start))
1361 return addr;
1362 }
1363
1364
1365 if (len <= mm->cached_hole_size) {
1366 mm->cached_hole_size = 0;
1367 mm->free_area_cache = mm->mmap_base;
1368 }
1369
1370
1371 addr = mm->free_area_cache;
1372
1373
1374 if (addr > len) {
1375 vma = find_vma(mm, addr-len);
1376 if (!vma || addr <= vma->vm_start)
1377
1378 return (mm->free_area_cache = addr-len);
1379 }
1380
1381 if (mm->mmap_base < len)
1382 goto bottomup;
1383
1384 addr = mm->mmap_base-len;
1385
1386 do {
1387
1388
1389
1390
1391
1392 vma = find_vma(mm, addr);
1393 if (!vma || addr+len <= vma->vm_start)
1394
1395 return (mm->free_area_cache = addr);
1396
1397
1398 if (addr + mm->cached_hole_size < vma->vm_start)
1399 mm->cached_hole_size = vma->vm_start - addr;
1400
1401
1402 addr = vma->vm_start-len;
1403 } while (len < vma->vm_start);
1404
1405bottomup:
1406
1407
1408
1409
1410
1411
1412 mm->cached_hole_size = ~0UL;
1413 mm->free_area_cache = TASK_UNMAPPED_BASE;
1414 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
1415
1416
1417
1418 mm->free_area_cache = mm->mmap_base;
1419 mm->cached_hole_size = ~0UL;
1420
1421 return addr;
1422}
1423#endif
1424
1425void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
1426{
1427
1428
1429
1430 if (addr > mm->free_area_cache)
1431 mm->free_area_cache = addr;
1432
1433
1434 if (mm->free_area_cache > mm->mmap_base)
1435 mm->free_area_cache = mm->mmap_base;
1436}
1437
1438unsigned long
1439get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1440 unsigned long pgoff, unsigned long flags)
1441{
1442 unsigned long (*get_area)(struct file *, unsigned long,
1443 unsigned long, unsigned long, unsigned long);
1444
1445 get_area = current->mm->get_unmapped_area;
1446 if (file && file->f_op && file->f_op->get_unmapped_area)
1447 get_area = file->f_op->get_unmapped_area;
1448 addr = get_area(file, addr, len, pgoff, flags);
1449 if (IS_ERR_VALUE(addr))
1450 return addr;
1451
1452 if (addr > TASK_SIZE - len)
1453 return -ENOMEM;
1454 if (addr & ~PAGE_MASK)
1455 return -EINVAL;
1456
1457 return arch_rebalance_pgtables(addr, len);
1458}
1459
1460EXPORT_SYMBOL(get_unmapped_area);
1461
1462
1463struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1464{
1465 struct vm_area_struct *vma = NULL;
1466
1467 if (mm) {
1468
1469
1470 vma = mm->mmap_cache;
1471 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1472 struct rb_node * rb_node;
1473
1474 rb_node = mm->mm_rb.rb_node;
1475 vma = NULL;
1476
1477 while (rb_node) {
1478 struct vm_area_struct * vma_tmp;
1479
1480 vma_tmp = rb_entry(rb_node,
1481 struct vm_area_struct, vm_rb);
1482
1483 if (vma_tmp->vm_end > addr) {
1484 vma = vma_tmp;
1485 if (vma_tmp->vm_start <= addr)
1486 break;
1487 rb_node = rb_node->rb_left;
1488 } else
1489 rb_node = rb_node->rb_right;
1490 }
1491 if (vma)
1492 mm->mmap_cache = vma;
1493 }
1494 }
1495 return vma;
1496}
1497
1498EXPORT_SYMBOL(find_vma);
1499
1500
1501struct vm_area_struct *
1502find_vma_prev(struct mm_struct *mm, unsigned long addr,
1503 struct vm_area_struct **pprev)
1504{
1505 struct vm_area_struct *vma = NULL, *prev = NULL;
1506 struct rb_node *rb_node;
1507 if (!mm)
1508 goto out;
1509
1510
1511 vma = mm->mmap;
1512
1513
1514 rb_node = mm->mm_rb.rb_node;
1515
1516 while (rb_node) {
1517 struct vm_area_struct *vma_tmp;
1518 vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
1519
1520 if (addr < vma_tmp->vm_end) {
1521 rb_node = rb_node->rb_left;
1522 } else {
1523 prev = vma_tmp;
1524 if (!prev->vm_next || (addr < prev->vm_next->vm_end))
1525 break;
1526 rb_node = rb_node->rb_right;
1527 }
1528 }
1529
1530out:
1531 *pprev = prev;
1532 return prev ? prev->vm_next : vma;
1533}
1534
1535
1536
1537
1538
1539
1540static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
1541{
1542 struct mm_struct *mm = vma->vm_mm;
1543 struct rlimit *rlim = current->signal->rlim;
1544 unsigned long new_start;
1545
1546
1547 if (!may_expand_vm(mm, grow))
1548 return -ENOMEM;
1549
1550
1551 if (size > rlim[RLIMIT_STACK].rlim_cur)
1552 return -ENOMEM;
1553
1554
1555 if (vma->vm_flags & VM_LOCKED) {
1556 unsigned long locked;
1557 unsigned long limit;
1558 locked = mm->locked_vm + grow;
1559 limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
1560 if (locked > limit && !capable(CAP_IPC_LOCK))
1561 return -ENOMEM;
1562 }
1563
1564
1565 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
1566 vma->vm_end - size;
1567 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
1568 return -EFAULT;
1569
1570
1571
1572
1573
1574 if (security_vm_enough_memory(grow))
1575 return -ENOMEM;
1576
1577
1578 mm->total_vm += grow;
1579 if (vma->vm_flags & VM_LOCKED)
1580 mm->locked_vm += grow;
1581 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
1582 return 0;
1583}
1584
1585#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
1586
1587
1588
1589
1590#ifndef CONFIG_IA64
1591static
1592#endif
1593int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1594{
1595 int error;
1596
1597 if (!(vma->vm_flags & VM_GROWSUP))
1598 return -EFAULT;
1599
1600
1601
1602
1603
1604 if (unlikely(anon_vma_prepare(vma)))
1605 return -ENOMEM;
1606 anon_vma_lock(vma);
1607
1608
1609
1610
1611
1612
1613
1614 if (address < PAGE_ALIGN(address+4))
1615 address = PAGE_ALIGN(address+4);
1616 else {
1617 anon_vma_unlock(vma);
1618 return -ENOMEM;
1619 }
1620 error = 0;
1621
1622
1623 if (address > vma->vm_end) {
1624 unsigned long size, grow;
1625
1626 size = address - vma->vm_start;
1627 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1628
1629 error = acct_stack_growth(vma, size, grow);
1630 if (!error)
1631 vma->vm_end = address;
1632 }
1633 anon_vma_unlock(vma);
1634 return error;
1635}
1636#endif
1637
1638
1639
1640
1641static int expand_downwards(struct vm_area_struct *vma,
1642 unsigned long address)
1643{
1644 int error;
1645
1646
1647
1648
1649
1650 if (unlikely(anon_vma_prepare(vma)))
1651 return -ENOMEM;
1652
1653 address &= PAGE_MASK;
1654 error = security_file_mmap(NULL, 0, 0, 0, address, 1);
1655 if (error)
1656 return error;
1657
1658 anon_vma_lock(vma);
1659
1660
1661
1662
1663
1664
1665
1666
1667 if (address < vma->vm_start) {
1668 unsigned long size, grow;
1669
1670 size = vma->vm_end - address;
1671 grow = (vma->vm_start - address) >> PAGE_SHIFT;
1672
1673 error = acct_stack_growth(vma, size, grow);
1674 if (!error) {
1675 vma->vm_start = address;
1676 vma->vm_pgoff -= grow;
1677 }
1678 }
1679 anon_vma_unlock(vma);
1680 return error;
1681}
1682
1683int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
1684{
1685 return expand_downwards(vma, address);
1686}
1687
1688#ifdef CONFIG_STACK_GROWSUP
1689int expand_stack(struct vm_area_struct *vma, unsigned long address)
1690{
1691 return expand_upwards(vma, address);
1692}
1693
1694struct vm_area_struct *
1695find_extend_vma(struct mm_struct *mm, unsigned long addr)
1696{
1697 struct vm_area_struct *vma, *prev;
1698
1699 addr &= PAGE_MASK;
1700 vma = find_vma_prev(mm, addr, &prev);
1701 if (vma && (vma->vm_start <= addr))
1702 return vma;
1703 if (!prev || expand_stack(prev, addr))
1704 return NULL;
1705 if (prev->vm_flags & VM_LOCKED) {
1706 if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
1707 return NULL;
1708 }
1709 return prev;
1710}
1711#else
1712int expand_stack(struct vm_area_struct *vma, unsigned long address)
1713{
1714 return expand_downwards(vma, address);
1715}
1716
1717struct vm_area_struct *
1718find_extend_vma(struct mm_struct * mm, unsigned long addr)
1719{
1720 struct vm_area_struct * vma;
1721 unsigned long start;
1722
1723 addr &= PAGE_MASK;
1724 vma = find_vma(mm,addr);
1725 if (!vma)
1726 return NULL;
1727 if (vma->vm_start <= addr)
1728 return vma;
1729 if (!(vma->vm_flags & VM_GROWSDOWN))
1730 return NULL;
1731 start = vma->vm_start;
1732 if (expand_stack(vma, addr))
1733 return NULL;
1734 if (vma->vm_flags & VM_LOCKED) {
1735 if (mlock_vma_pages_range(vma, addr, start) < 0)
1736 return NULL;
1737 }
1738 return vma;
1739}
1740#endif
1741
1742
1743
1744
1745
1746
1747
1748static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
1749{
1750
1751 update_hiwater_vm(mm);
1752 do {
1753 long nrpages = vma_pages(vma);
1754
1755 mm->total_vm -= nrpages;
1756 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
1757 vma = remove_vma(vma);
1758 } while (vma);
1759 validate_mm(mm);
1760}
1761
1762
1763
1764
1765
1766
1767static void unmap_region(struct mm_struct *mm,
1768 struct vm_area_struct *vma, struct vm_area_struct *prev,
1769 unsigned long start, unsigned long end)
1770{
1771 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
1772 struct mmu_gather *tlb;
1773 unsigned long nr_accounted = 0;
1774
1775 lru_add_drain();
1776 tlb = tlb_gather_mmu(mm, 0);
1777 update_hiwater_rss(mm);
1778 unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
1779 vm_unacct_memory(nr_accounted);
1780 free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
1781 next? next->vm_start: 0);
1782 tlb_finish_mmu(tlb, start, end);
1783}
1784
1785
1786
1787
1788
1789static void
1790detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1791 struct vm_area_struct *prev, unsigned long end)
1792{
1793 struct vm_area_struct **insertion_point;
1794 struct vm_area_struct *tail_vma = NULL;
1795 unsigned long addr;
1796
1797 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1798 do {
1799 rb_erase(&vma->vm_rb, &mm->mm_rb);
1800 mm->map_count--;
1801 tail_vma = vma;
1802 vma = vma->vm_next;
1803 } while (vma && vma->vm_start < end);
1804 *insertion_point = vma;
1805 tail_vma->vm_next = NULL;
1806 if (mm->unmap_area == arch_unmap_area)
1807 addr = prev ? prev->vm_end : mm->mmap_base;
1808 else
1809 addr = vma ? vma->vm_start : mm->mmap_base;
1810 mm->unmap_area(mm, addr);
1811 mm->mmap_cache = NULL;
1812}
1813
1814
1815
1816
1817
1818int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1819 unsigned long addr, int new_below)
1820{
1821 struct mempolicy *pol;
1822 struct vm_area_struct *new;
1823
1824 if (is_vm_hugetlb_page(vma) && (addr &
1825 ~(huge_page_mask(hstate_vma(vma)))))
1826 return -EINVAL;
1827
1828 if (mm->map_count >= sysctl_max_map_count)
1829 return -ENOMEM;
1830
1831 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1832 if (!new)
1833 return -ENOMEM;
1834
1835
1836 *new = *vma;
1837
1838 if (new_below)
1839 new->vm_end = addr;
1840 else {
1841 new->vm_start = addr;
1842 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
1843 }
1844
1845 pol = mpol_dup(vma_policy(vma));
1846 if (IS_ERR(pol)) {
1847 kmem_cache_free(vm_area_cachep, new);
1848 return PTR_ERR(pol);
1849 }
1850 vma_set_policy(new, pol);
1851
1852 if (new->vm_file) {
1853 get_file(new->vm_file);
1854 if (vma->vm_flags & VM_EXECUTABLE)
1855 added_exe_file_vma(mm);
1856 }
1857
1858 if (new->vm_ops && new->vm_ops->open)
1859 new->vm_ops->open(new);
1860
1861 if (new_below)
1862 vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1863 ((addr - new->vm_start) >> PAGE_SHIFT), new);
1864 else
1865 vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1866
1867 return 0;
1868}
1869
1870
1871
1872
1873
1874
1875int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1876{
1877 unsigned long end;
1878 struct vm_area_struct *vma, *prev, *last;
1879
1880 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
1881 return -EINVAL;
1882
1883 if ((len = PAGE_ALIGN(len)) == 0)
1884 return -EINVAL;
1885
1886
1887 vma = find_vma_prev(mm, start, &prev);
1888 if (!vma)
1889 return 0;
1890
1891
1892
1893 end = start + len;
1894 if (vma->vm_start >= end)
1895 return 0;
1896
1897
1898
1899
1900
1901
1902
1903
1904 if (start > vma->vm_start) {
1905 int error = split_vma(mm, vma, start, 0);
1906 if (error)
1907 return error;
1908 prev = vma;
1909 }
1910
1911
1912 last = find_vma(mm, end);
1913 if (last && end > last->vm_start) {
1914 int error = split_vma(mm, last, end, 1);
1915 if (error)
1916 return error;
1917 }
1918 vma = prev? prev->vm_next: mm->mmap;
1919
1920
1921
1922
1923 if (mm->locked_vm) {
1924 struct vm_area_struct *tmp = vma;
1925 while (tmp && tmp->vm_start < end) {
1926 if (tmp->vm_flags & VM_LOCKED) {
1927 mm->locked_vm -= vma_pages(tmp);
1928 munlock_vma_pages_all(tmp);
1929 }
1930 tmp = tmp->vm_next;
1931 }
1932 }
1933
1934
1935
1936
1937 detach_vmas_to_be_unmapped(mm, vma, prev, end);
1938 unmap_region(mm, vma, prev, start, end);
1939
1940
1941 remove_vma_list(mm, vma);
1942
1943 return 0;
1944}
1945
1946EXPORT_SYMBOL(do_munmap);
1947
1948SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
1949{
1950 int ret;
1951 struct mm_struct *mm = current->mm;
1952
1953 profile_munmap(addr);
1954
1955 down_write(&mm->mmap_sem);
1956 ret = do_munmap(mm, addr, len);
1957 up_write(&mm->mmap_sem);
1958 return ret;
1959}
1960
1961static inline void verify_mm_writelocked(struct mm_struct *mm)
1962{
1963#ifdef CONFIG_DEBUG_VM
1964 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
1965 WARN_ON(1);
1966 up_read(&mm->mmap_sem);
1967 }
1968#endif
1969}
1970
1971
1972
1973
1974
1975
1976unsigned long do_brk(unsigned long addr, unsigned long len)
1977{
1978 struct mm_struct * mm = current->mm;
1979 struct vm_area_struct * vma, * prev;
1980 unsigned long flags;
1981 struct rb_node ** rb_link, * rb_parent;
1982 pgoff_t pgoff = addr >> PAGE_SHIFT;
1983 int error;
1984
1985 len = PAGE_ALIGN(len);
1986 if (!len)
1987 return addr;
1988
1989 if ((addr + len) > TASK_SIZE || (addr + len) < addr)
1990 return -EINVAL;
1991
1992 if (is_hugepage_only_range(mm, addr, len))
1993 return -EINVAL;
1994
1995 error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
1996 if (error)
1997 return error;
1998
1999 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2000
2001 error = arch_mmap_check(addr, len, flags);
2002 if (error)
2003 return error;
2004
2005
2006
2007
2008 if (mm->def_flags & VM_LOCKED) {
2009 unsigned long locked, lock_limit;
2010 locked = len >> PAGE_SHIFT;
2011 locked += mm->locked_vm;
2012 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
2013 lock_limit >>= PAGE_SHIFT;
2014 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2015 return -EAGAIN;
2016 }
2017
2018
2019
2020
2021
2022 verify_mm_writelocked(mm);
2023
2024
2025
2026
2027 munmap_back:
2028 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2029 if (vma && vma->vm_start < addr + len) {
2030 if (do_munmap(mm, addr, len))
2031 return -ENOMEM;
2032 goto munmap_back;
2033 }
2034
2035
2036 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2037 return -ENOMEM;
2038
2039 if (mm->map_count > sysctl_max_map_count)
2040 return -ENOMEM;
2041
2042 if (security_vm_enough_memory(len >> PAGE_SHIFT))
2043 return -ENOMEM;
2044
2045
2046 vma = vma_merge(mm, prev, addr, addr + len, flags,
2047 NULL, NULL, pgoff, NULL);
2048 if (vma)
2049 goto out;
2050
2051
2052
2053
2054 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2055 if (!vma) {
2056 vm_unacct_memory(len >> PAGE_SHIFT);
2057 return -ENOMEM;
2058 }
2059
2060 vma->vm_mm = mm;
2061 vma->vm_start = addr;
2062 vma->vm_end = addr + len;
2063 vma->vm_pgoff = pgoff;
2064 vma->vm_flags = flags;
2065 vma->vm_page_prot = vm_get_page_prot(flags);
2066 vma_link(mm, vma, prev, rb_link, rb_parent);
2067out:
2068 mm->total_vm += len >> PAGE_SHIFT;
2069 if (flags & VM_LOCKED) {
2070 if (!mlock_vma_pages_range(vma, addr, addr + len))
2071 mm->locked_vm += (len >> PAGE_SHIFT);
2072 }
2073 return addr;
2074}
2075
2076EXPORT_SYMBOL(do_brk);
2077
2078
2079void exit_mmap(struct mm_struct *mm)
2080{
2081 struct mmu_gather *tlb;
2082 struct vm_area_struct *vma;
2083 unsigned long nr_accounted = 0;
2084 unsigned long end;
2085
2086
2087 mmu_notifier_release(mm);
2088
2089 if (mm->locked_vm) {
2090 vma = mm->mmap;
2091 while (vma) {
2092 if (vma->vm_flags & VM_LOCKED)
2093 munlock_vma_pages_all(vma);
2094 vma = vma->vm_next;
2095 }
2096 }
2097
2098 arch_exit_mmap(mm);
2099
2100 vma = mm->mmap;
2101 if (!vma)
2102 return;
2103
2104 lru_add_drain();
2105 flush_cache_mm(mm);
2106 tlb = tlb_gather_mmu(mm, 1);
2107
2108
2109 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
2110 vm_unacct_memory(nr_accounted);
2111 free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
2112 tlb_finish_mmu(tlb, 0, end);
2113
2114
2115
2116
2117
2118 while (vma)
2119 vma = remove_vma(vma);
2120
2121 BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2122}
2123
2124
2125
2126
2127
2128int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2129{
2130 struct vm_area_struct * __vma, * prev;
2131 struct rb_node ** rb_link, * rb_parent;
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145 if (!vma->vm_file) {
2146 BUG_ON(vma->anon_vma);
2147 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2148 }
2149 __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
2150 if (__vma && __vma->vm_start < vma->vm_end)
2151 return -ENOMEM;
2152 if ((vma->vm_flags & VM_ACCOUNT) &&
2153 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2154 return -ENOMEM;
2155 vma_link(mm, vma, prev, rb_link, rb_parent);
2156 return 0;
2157}
2158
2159
2160
2161
2162
2163struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2164 unsigned long addr, unsigned long len, pgoff_t pgoff)
2165{
2166 struct vm_area_struct *vma = *vmap;
2167 unsigned long vma_start = vma->vm_start;
2168 struct mm_struct *mm = vma->vm_mm;
2169 struct vm_area_struct *new_vma, *prev;
2170 struct rb_node **rb_link, *rb_parent;
2171 struct mempolicy *pol;
2172
2173
2174
2175
2176
2177 if (!vma->vm_file && !vma->anon_vma)
2178 pgoff = addr >> PAGE_SHIFT;
2179
2180 find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2181 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2182 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2183 if (new_vma) {
2184
2185
2186
2187 if (vma_start >= new_vma->vm_start &&
2188 vma_start < new_vma->vm_end)
2189 *vmap = new_vma;
2190 } else {
2191 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2192 if (new_vma) {
2193 *new_vma = *vma;
2194 pol = mpol_dup(vma_policy(vma));
2195 if (IS_ERR(pol)) {
2196 kmem_cache_free(vm_area_cachep, new_vma);
2197 return NULL;
2198 }
2199 vma_set_policy(new_vma, pol);
2200 new_vma->vm_start = addr;
2201 new_vma->vm_end = addr + len;
2202 new_vma->vm_pgoff = pgoff;
2203 if (new_vma->vm_file) {
2204 get_file(new_vma->vm_file);
2205 if (vma->vm_flags & VM_EXECUTABLE)
2206 added_exe_file_vma(mm);
2207 }
2208 if (new_vma->vm_ops && new_vma->vm_ops->open)
2209 new_vma->vm_ops->open(new_vma);
2210 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2211 }
2212 }
2213 return new_vma;
2214}
2215
2216
2217
2218
2219
2220int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2221{
2222 unsigned long cur = mm->total_vm;
2223 unsigned long lim;
2224
2225 lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
2226
2227 if (cur + npages > lim)
2228 return 0;
2229 return 1;
2230}
2231
2232
2233static int special_mapping_fault(struct vm_area_struct *vma,
2234 struct vm_fault *vmf)
2235{
2236 pgoff_t pgoff;
2237 struct page **pages;
2238
2239
2240
2241
2242
2243
2244
2245 pgoff = vmf->pgoff - vma->vm_pgoff;
2246
2247 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
2248 pgoff--;
2249
2250 if (*pages) {
2251 struct page *page = *pages;
2252 get_page(page);
2253 vmf->page = page;
2254 return 0;
2255 }
2256
2257 return VM_FAULT_SIGBUS;
2258}
2259
2260
2261
2262
2263static void special_mapping_close(struct vm_area_struct *vma)
2264{
2265}
2266
2267static struct vm_operations_struct special_mapping_vmops = {
2268 .close = special_mapping_close,
2269 .fault = special_mapping_fault,
2270};
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281int install_special_mapping(struct mm_struct *mm,
2282 unsigned long addr, unsigned long len,
2283 unsigned long vm_flags, struct page **pages)
2284{
2285 struct vm_area_struct *vma;
2286
2287 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2288 if (unlikely(vma == NULL))
2289 return -ENOMEM;
2290
2291 vma->vm_mm = mm;
2292 vma->vm_start = addr;
2293 vma->vm_end = addr + len;
2294
2295 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
2296 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2297
2298 vma->vm_ops = &special_mapping_vmops;
2299 vma->vm_private_data = pages;
2300
2301 if (unlikely(insert_vm_struct(mm, vma))) {
2302 kmem_cache_free(vm_area_cachep, vma);
2303 return -ENOMEM;
2304 }
2305
2306 mm->total_vm += len >> PAGE_SHIFT;
2307
2308 return 0;
2309}
2310
2311static DEFINE_MUTEX(mm_all_locks_mutex);
2312
2313static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2314{
2315 if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2316
2317
2318
2319
2320 spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330 if (__test_and_set_bit(0, (unsigned long *)
2331 &anon_vma->head.next))
2332 BUG();
2333 }
2334}
2335
2336static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2337{
2338 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2349 BUG();
2350 spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
2351 }
2352}
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386int mm_take_all_locks(struct mm_struct *mm)
2387{
2388 struct vm_area_struct *vma;
2389 int ret = -EINTR;
2390
2391 BUG_ON(down_read_trylock(&mm->mmap_sem));
2392
2393 mutex_lock(&mm_all_locks_mutex);
2394
2395 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2396 if (signal_pending(current))
2397 goto out_unlock;
2398 if (vma->vm_file && vma->vm_file->f_mapping)
2399 vm_lock_mapping(mm, vma->vm_file->f_mapping);
2400 }
2401
2402 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2403 if (signal_pending(current))
2404 goto out_unlock;
2405 if (vma->anon_vma)
2406 vm_lock_anon_vma(mm, vma->anon_vma);
2407 }
2408
2409 ret = 0;
2410
2411out_unlock:
2412 if (ret)
2413 mm_drop_all_locks(mm);
2414
2415 return ret;
2416}
2417
2418static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2419{
2420 if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433 if (!__test_and_clear_bit(0, (unsigned long *)
2434 &anon_vma->head.next))
2435 BUG();
2436 spin_unlock(&anon_vma->lock);
2437 }
2438}
2439
2440static void vm_unlock_mapping(struct address_space *mapping)
2441{
2442 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2443
2444
2445
2446
2447 spin_unlock(&mapping->i_mmap_lock);
2448 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
2449 &mapping->flags))
2450 BUG();
2451 }
2452}
2453
2454
2455
2456
2457
2458void mm_drop_all_locks(struct mm_struct *mm)
2459{
2460 struct vm_area_struct *vma;
2461
2462 BUG_ON(down_read_trylock(&mm->mmap_sem));
2463 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2464
2465 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2466 if (vma->anon_vma)
2467 vm_unlock_anon_vma(vma->anon_vma);
2468 if (vma->vm_file && vma->vm_file->f_mapping)
2469 vm_unlock_mapping(vma->vm_file->f_mapping);
2470 }
2471
2472 mutex_unlock(&mm_all_locks_mutex);
2473}
2474
2475
2476
2477
2478void __init mmap_init(void)
2479{
2480 vm_area_cachep = kmem_cache_create("vm_area_struct",
2481 sizeof(struct vm_area_struct), 0,
2482 SLAB_PANIC, NULL);
2483}
2484