1
2
3
4
5
6
7
8
9#include <linux/slab.h>
10#include <linux/backing-dev.h>
11#include <linux/mm.h>
12#include <linux/shm.h>
13#include <linux/mman.h>
14#include <linux/pagemap.h>
15#include <linux/swap.h>
16#include <linux/syscalls.h>
17#include <linux/capability.h>
18#include <linux/init.h>
19#include <linux/file.h>
20#include <linux/fs.h>
21#include <linux/personality.h>
22#include <linux/security.h>
23#include <linux/hugetlb.h>
24#include <linux/profile.h>
25#include <linux/module.h>
26#include <linux/mount.h>
27#include <linux/mempolicy.h>
28#include <linux/rmap.h>
29
30#include <asm/uaccess.h>
31#include <asm/cacheflush.h>
32#include <asm/tlb.h>
33#include <asm/mmu_context.h>
34
35#ifndef arch_mmap_check
36#define arch_mmap_check(addr, len, flags) (0)
37#endif
38
39#ifndef arch_rebalance_pgtables
40#define arch_rebalance_pgtables(addr, len) (addr)
41#endif
42
43static void unmap_region(struct mm_struct *mm,
44 struct vm_area_struct *vma, struct vm_area_struct *prev,
45 unsigned long start, unsigned long end);
46
47
48
49
50
51#undef DEBUG_MM_RB
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68pgprot_t protection_map[16] = {
69 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
70 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
71};
72
73pgprot_t vm_get_page_prot(unsigned long vm_flags)
74{
75 return protection_map[vm_flags &
76 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
77}
78EXPORT_SYMBOL(vm_get_page_prot);
79
80int sysctl_overcommit_memory = OVERCOMMIT_GUESS;
81int sysctl_overcommit_ratio = 50;
82int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
83atomic_t vm_committed_space = ATOMIC_INIT(0);
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
102{
103 unsigned long free, allowed;
104
105 vm_acct_memory(pages);
106
107
108
109
110 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
111 return 0;
112
113 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
114 unsigned long n;
115
116 free = global_page_state(NR_FILE_PAGES);
117 free += nr_swap_pages;
118
119
120
121
122
123
124
125 free += global_page_state(NR_SLAB_RECLAIMABLE);
126
127
128
129
130 if (!cap_sys_admin)
131 free -= free / 32;
132
133 if (free > pages)
134 return 0;
135
136
137
138
139
140 n = nr_free_pages();
141
142
143
144
145 if (n <= totalreserve_pages)
146 goto error;
147 else
148 n -= totalreserve_pages;
149
150
151
152
153 if (!cap_sys_admin)
154 n -= n / 32;
155 free += n;
156
157 if (free > pages)
158 return 0;
159
160 goto error;
161 }
162
163 allowed = (totalram_pages - hugetlb_total_pages())
164 * sysctl_overcommit_ratio / 100;
165
166
167
168 if (!cap_sys_admin)
169 allowed -= allowed / 32;
170 allowed += total_swap_pages;
171
172
173
174 allowed -= mm->total_vm / 32;
175
176
177
178
179
180 if (atomic_read(&vm_committed_space) < (long)allowed)
181 return 0;
182error:
183 vm_unacct_memory(pages);
184
185 return -ENOMEM;
186}
187
188
189
190
191static void __remove_shared_vm_struct(struct vm_area_struct *vma,
192 struct file *file, struct address_space *mapping)
193{
194 if (vma->vm_flags & VM_DENYWRITE)
195 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
196 if (vma->vm_flags & VM_SHARED)
197 mapping->i_mmap_writable--;
198
199 flush_dcache_mmap_lock(mapping);
200 if (unlikely(vma->vm_flags & VM_NONLINEAR))
201 list_del_init(&vma->shared.vm_set.list);
202 else
203 vma_prio_tree_remove(vma, &mapping->i_mmap);
204 flush_dcache_mmap_unlock(mapping);
205}
206
207
208
209
210
211void unlink_file_vma(struct vm_area_struct *vma)
212{
213 struct file *file = vma->vm_file;
214
215 if (file) {
216 struct address_space *mapping = file->f_mapping;
217 spin_lock(&mapping->i_mmap_lock);
218 __remove_shared_vm_struct(vma, file, mapping);
219 spin_unlock(&mapping->i_mmap_lock);
220 }
221}
222
223
224
225
226static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
227{
228 struct vm_area_struct *next = vma->vm_next;
229
230 might_sleep();
231 if (vma->vm_ops && vma->vm_ops->close)
232 vma->vm_ops->close(vma);
233 if (vma->vm_file)
234 fput(vma->vm_file);
235 mpol_free(vma_policy(vma));
236 kmem_cache_free(vm_area_cachep, vma);
237 return next;
238}
239
240asmlinkage unsigned long sys_brk(unsigned long brk)
241{
242 unsigned long rlim, retval;
243 unsigned long newbrk, oldbrk;
244 struct mm_struct *mm = current->mm;
245 unsigned long min_brk;
246
247 down_write(&mm->mmap_sem);
248
249#ifdef CONFIG_COMPAT_BRK
250 min_brk = mm->end_code;
251#else
252 min_brk = mm->start_brk;
253#endif
254 if (brk < min_brk)
255 goto out;
256
257
258
259
260
261
262
263 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
264 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
265 (mm->end_data - mm->start_data) > rlim)
266 goto out;
267
268 newbrk = PAGE_ALIGN(brk);
269 oldbrk = PAGE_ALIGN(mm->brk);
270 if (oldbrk == newbrk)
271 goto set_brk;
272
273
274 if (brk <= mm->brk) {
275 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
276 goto set_brk;
277 goto out;
278 }
279
280
281 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
282 goto out;
283
284
285 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
286 goto out;
287set_brk:
288 mm->brk = brk;
289out:
290 retval = mm->brk;
291 up_write(&mm->mmap_sem);
292 return retval;
293}
294
295#ifdef DEBUG_MM_RB
296static int browse_rb(struct rb_root *root)
297{
298 int i = 0, j;
299 struct rb_node *nd, *pn = NULL;
300 unsigned long prev = 0, pend = 0;
301
302 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
303 struct vm_area_struct *vma;
304 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
305 if (vma->vm_start < prev)
306 printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
307 if (vma->vm_start < pend)
308 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
309 if (vma->vm_start > vma->vm_end)
310 printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
311 i++;
312 pn = nd;
313 prev = vma->vm_start;
314 pend = vma->vm_end;
315 }
316 j = 0;
317 for (nd = pn; nd; nd = rb_prev(nd)) {
318 j++;
319 }
320 if (i != j)
321 printk("backwards %d, forwards %d\n", j, i), i = 0;
322 return i;
323}
324
325void validate_mm(struct mm_struct *mm)
326{
327 int bug = 0;
328 int i = 0;
329 struct vm_area_struct *tmp = mm->mmap;
330 while (tmp) {
331 tmp = tmp->vm_next;
332 i++;
333 }
334 if (i != mm->map_count)
335 printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
336 i = browse_rb(&mm->mm_rb);
337 if (i != mm->map_count)
338 printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
339 BUG_ON(bug);
340}
341#else
342#define validate_mm(mm) do { } while (0)
343#endif
344
345static struct vm_area_struct *
346find_vma_prepare(struct mm_struct *mm, unsigned long addr,
347 struct vm_area_struct **pprev, struct rb_node ***rb_link,
348 struct rb_node ** rb_parent)
349{
350 struct vm_area_struct * vma;
351 struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
352
353 __rb_link = &mm->mm_rb.rb_node;
354 rb_prev = __rb_parent = NULL;
355 vma = NULL;
356
357 while (*__rb_link) {
358 struct vm_area_struct *vma_tmp;
359
360 __rb_parent = *__rb_link;
361 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
362
363 if (vma_tmp->vm_end > addr) {
364 vma = vma_tmp;
365 if (vma_tmp->vm_start <= addr)
366 return vma;
367 __rb_link = &__rb_parent->rb_left;
368 } else {
369 rb_prev = __rb_parent;
370 __rb_link = &__rb_parent->rb_right;
371 }
372 }
373
374 *pprev = NULL;
375 if (rb_prev)
376 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
377 *rb_link = __rb_link;
378 *rb_parent = __rb_parent;
379 return vma;
380}
381
382static inline void
383__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
384 struct vm_area_struct *prev, struct rb_node *rb_parent)
385{
386 if (prev) {
387 vma->vm_next = prev->vm_next;
388 prev->vm_next = vma;
389 } else {
390 mm->mmap = vma;
391 if (rb_parent)
392 vma->vm_next = rb_entry(rb_parent,
393 struct vm_area_struct, vm_rb);
394 else
395 vma->vm_next = NULL;
396 }
397}
398
399void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
400 struct rb_node **rb_link, struct rb_node *rb_parent)
401{
402 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
403 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
404}
405
406static inline void __vma_link_file(struct vm_area_struct *vma)
407{
408 struct file * file;
409
410 file = vma->vm_file;
411 if (file) {
412 struct address_space *mapping = file->f_mapping;
413
414 if (vma->vm_flags & VM_DENYWRITE)
415 atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
416 if (vma->vm_flags & VM_SHARED)
417 mapping->i_mmap_writable++;
418
419 flush_dcache_mmap_lock(mapping);
420 if (unlikely(vma->vm_flags & VM_NONLINEAR))
421 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
422 else
423 vma_prio_tree_insert(vma, &mapping->i_mmap);
424 flush_dcache_mmap_unlock(mapping);
425 }
426}
427
428static void
429__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
430 struct vm_area_struct *prev, struct rb_node **rb_link,
431 struct rb_node *rb_parent)
432{
433 __vma_link_list(mm, vma, prev, rb_parent);
434 __vma_link_rb(mm, vma, rb_link, rb_parent);
435 __anon_vma_link(vma);
436}
437
438static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
439 struct vm_area_struct *prev, struct rb_node **rb_link,
440 struct rb_node *rb_parent)
441{
442 struct address_space *mapping = NULL;
443
444 if (vma->vm_file)
445 mapping = vma->vm_file->f_mapping;
446
447 if (mapping) {
448 spin_lock(&mapping->i_mmap_lock);
449 vma->vm_truncate_count = mapping->truncate_count;
450 }
451 anon_vma_lock(vma);
452
453 __vma_link(mm, vma, prev, rb_link, rb_parent);
454 __vma_link_file(vma);
455
456 anon_vma_unlock(vma);
457 if (mapping)
458 spin_unlock(&mapping->i_mmap_lock);
459
460 mm->map_count++;
461 validate_mm(mm);
462}
463
464
465
466
467
468
469static void
470__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
471{
472 struct vm_area_struct * __vma, * prev;
473 struct rb_node ** rb_link, * rb_parent;
474
475 __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
476 BUG_ON(__vma && __vma->vm_start < vma->vm_end);
477 __vma_link(mm, vma, prev, rb_link, rb_parent);
478 mm->map_count++;
479}
480
481static inline void
482__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
483 struct vm_area_struct *prev)
484{
485 prev->vm_next = vma->vm_next;
486 rb_erase(&vma->vm_rb, &mm->mm_rb);
487 if (mm->mmap_cache == vma)
488 mm->mmap_cache = prev;
489}
490
491
492
493
494
495
496
497
498void vma_adjust(struct vm_area_struct *vma, unsigned long start,
499 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
500{
501 struct mm_struct *mm = vma->vm_mm;
502 struct vm_area_struct *next = vma->vm_next;
503 struct vm_area_struct *importer = NULL;
504 struct address_space *mapping = NULL;
505 struct prio_tree_root *root = NULL;
506 struct file *file = vma->vm_file;
507 struct anon_vma *anon_vma = NULL;
508 long adjust_next = 0;
509 int remove_next = 0;
510
511 if (next && !insert) {
512 if (end >= next->vm_end) {
513
514
515
516
517again: remove_next = 1 + (end > next->vm_end);
518 end = next->vm_end;
519 anon_vma = next->anon_vma;
520 importer = vma;
521 } else if (end > next->vm_start) {
522
523
524
525
526 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
527 anon_vma = next->anon_vma;
528 importer = vma;
529 } else if (end < vma->vm_end) {
530
531
532
533
534
535 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
536 anon_vma = next->anon_vma;
537 importer = next;
538 }
539 }
540
541 if (file) {
542 mapping = file->f_mapping;
543 if (!(vma->vm_flags & VM_NONLINEAR))
544 root = &mapping->i_mmap;
545 spin_lock(&mapping->i_mmap_lock);
546 if (importer &&
547 vma->vm_truncate_count != next->vm_truncate_count) {
548
549
550
551
552 importer->vm_truncate_count = 0;
553 }
554 if (insert) {
555 insert->vm_truncate_count = vma->vm_truncate_count;
556
557
558
559
560
561
562 __vma_link_file(insert);
563 }
564 }
565
566
567
568
569
570 if (vma->anon_vma)
571 anon_vma = vma->anon_vma;
572 if (anon_vma) {
573 spin_lock(&anon_vma->lock);
574
575
576
577
578
579 if (importer && !importer->anon_vma) {
580 importer->anon_vma = anon_vma;
581 __anon_vma_link(importer);
582 }
583 }
584
585 if (root) {
586 flush_dcache_mmap_lock(mapping);
587 vma_prio_tree_remove(vma, root);
588 if (adjust_next)
589 vma_prio_tree_remove(next, root);
590 }
591
592 vma->vm_start = start;
593 vma->vm_end = end;
594 vma->vm_pgoff = pgoff;
595 if (adjust_next) {
596 next->vm_start += adjust_next << PAGE_SHIFT;
597 next->vm_pgoff += adjust_next;
598 }
599
600 if (root) {
601 if (adjust_next)
602 vma_prio_tree_insert(next, root);
603 vma_prio_tree_insert(vma, root);
604 flush_dcache_mmap_unlock(mapping);
605 }
606
607 if (remove_next) {
608
609
610
611
612 __vma_unlink(mm, next, vma);
613 if (file)
614 __remove_shared_vm_struct(next, file, mapping);
615 if (next->anon_vma)
616 __anon_vma_merge(vma, next);
617 } else if (insert) {
618
619
620
621
622
623 __insert_vm_struct(mm, insert);
624 }
625
626 if (anon_vma)
627 spin_unlock(&anon_vma->lock);
628 if (mapping)
629 spin_unlock(&mapping->i_mmap_lock);
630
631 if (remove_next) {
632 if (file)
633 fput(file);
634 mm->map_count--;
635 mpol_free(vma_policy(next));
636 kmem_cache_free(vm_area_cachep, next);
637
638
639
640
641
642 if (remove_next == 2) {
643 next = vma->vm_next;
644 goto again;
645 }
646 }
647
648 validate_mm(mm);
649}
650
651
652
653
654
655#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
656
657static inline int is_mergeable_vma(struct vm_area_struct *vma,
658 struct file *file, unsigned long vm_flags)
659{
660 if (vma->vm_flags != vm_flags)
661 return 0;
662 if (vma->vm_file != file)
663 return 0;
664 if (vma->vm_ops && vma->vm_ops->close)
665 return 0;
666 return 1;
667}
668
669static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
670 struct anon_vma *anon_vma2)
671{
672 return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
673}
674
675
676
677
678
679
680
681
682
683
684
685
686static int
687can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
688 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
689{
690 if (is_mergeable_vma(vma, file, vm_flags) &&
691 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
692 if (vma->vm_pgoff == vm_pgoff)
693 return 1;
694 }
695 return 0;
696}
697
698
699
700
701
702
703
704
705static int
706can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
707 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
708{
709 if (is_mergeable_vma(vma, file, vm_flags) &&
710 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
711 pgoff_t vm_pglen;
712 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
713 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
714 return 1;
715 }
716 return 0;
717}
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748struct vm_area_struct *vma_merge(struct mm_struct *mm,
749 struct vm_area_struct *prev, unsigned long addr,
750 unsigned long end, unsigned long vm_flags,
751 struct anon_vma *anon_vma, struct file *file,
752 pgoff_t pgoff, struct mempolicy *policy)
753{
754 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
755 struct vm_area_struct *area, *next;
756
757
758
759
760
761 if (vm_flags & VM_SPECIAL)
762 return NULL;
763
764 if (prev)
765 next = prev->vm_next;
766 else
767 next = mm->mmap;
768 area = next;
769 if (next && next->vm_end == end)
770 next = next->vm_next;
771
772
773
774
775 if (prev && prev->vm_end == addr &&
776 mpol_equal(vma_policy(prev), policy) &&
777 can_vma_merge_after(prev, vm_flags,
778 anon_vma, file, pgoff)) {
779
780
781
782 if (next && end == next->vm_start &&
783 mpol_equal(policy, vma_policy(next)) &&
784 can_vma_merge_before(next, vm_flags,
785 anon_vma, file, pgoff+pglen) &&
786 is_mergeable_anon_vma(prev->anon_vma,
787 next->anon_vma)) {
788
789 vma_adjust(prev, prev->vm_start,
790 next->vm_end, prev->vm_pgoff, NULL);
791 } else
792 vma_adjust(prev, prev->vm_start,
793 end, prev->vm_pgoff, NULL);
794 return prev;
795 }
796
797
798
799
800 if (next && end == next->vm_start &&
801 mpol_equal(policy, vma_policy(next)) &&
802 can_vma_merge_before(next, vm_flags,
803 anon_vma, file, pgoff+pglen)) {
804 if (prev && addr < prev->vm_end)
805 vma_adjust(prev, prev->vm_start,
806 addr, prev->vm_pgoff, NULL);
807 else
808 vma_adjust(area, addr, next->vm_end,
809 next->vm_pgoff - pglen, NULL);
810 return area;
811 }
812
813 return NULL;
814}
815
816
817
818
819
820
821
822
823
824struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
825{
826 struct vm_area_struct *near;
827 unsigned long vm_flags;
828
829 near = vma->vm_next;
830 if (!near)
831 goto try_prev;
832
833
834
835
836
837
838
839 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
840 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
841
842 if (near->anon_vma && vma->vm_end == near->vm_start &&
843 mpol_equal(vma_policy(vma), vma_policy(near)) &&
844 can_vma_merge_before(near, vm_flags,
845 NULL, vma->vm_file, vma->vm_pgoff +
846 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
847 return near->anon_vma;
848try_prev:
849
850
851
852
853
854
855
856 BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
857 if (!near)
858 goto none;
859
860 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
861 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
862
863 if (near->anon_vma && near->vm_end == vma->vm_start &&
864 mpol_equal(vma_policy(near), vma_policy(vma)) &&
865 can_vma_merge_after(near, vm_flags,
866 NULL, vma->vm_file, vma->vm_pgoff))
867 return near->anon_vma;
868none:
869
870
871
872
873
874
875
876
877 return NULL;
878}
879
880#ifdef CONFIG_PROC_FS
881void vm_stat_account(struct mm_struct *mm, unsigned long flags,
882 struct file *file, long pages)
883{
884 const unsigned long stack_flags
885 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
886
887 if (file) {
888 mm->shared_vm += pages;
889 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
890 mm->exec_vm += pages;
891 } else if (flags & stack_flags)
892 mm->stack_vm += pages;
893 if (flags & (VM_RESERVED|VM_IO))
894 mm->reserved_vm += pages;
895}
896#endif
897
898
899
900
901
902unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
903 unsigned long len, unsigned long prot,
904 unsigned long flags, unsigned long pgoff)
905{
906 struct mm_struct * mm = current->mm;
907 struct inode *inode;
908 unsigned int vm_flags;
909 int error;
910 int accountable = 1;
911 unsigned long reqprot = prot;
912
913
914
915
916
917
918
919 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
920 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
921 prot |= PROT_EXEC;
922
923 if (!len)
924 return -EINVAL;
925
926 if (!(flags & MAP_FIXED))
927 addr = round_hint_to_min(addr);
928
929 error = arch_mmap_check(addr, len, flags);
930 if (error)
931 return error;
932
933
934 len = PAGE_ALIGN(len);
935 if (!len || len > TASK_SIZE)
936 return -ENOMEM;
937
938
939 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
940 return -EOVERFLOW;
941
942
943 if (mm->map_count > sysctl_max_map_count)
944 return -ENOMEM;
945
946
947
948
949 addr = get_unmapped_area(file, addr, len, pgoff, flags);
950 if (addr & ~PAGE_MASK)
951 return addr;
952
953
954
955
956
957 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
958 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
959
960 if (flags & MAP_LOCKED) {
961 if (!can_do_mlock())
962 return -EPERM;
963 vm_flags |= VM_LOCKED;
964 }
965
966 if (vm_flags & VM_LOCKED) {
967 unsigned long locked, lock_limit;
968 locked = len >> PAGE_SHIFT;
969 locked += mm->locked_vm;
970 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
971 lock_limit >>= PAGE_SHIFT;
972 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
973 return -EAGAIN;
974 }
975
976 inode = file ? file->f_path.dentry->d_inode : NULL;
977
978 if (file) {
979 switch (flags & MAP_TYPE) {
980 case MAP_SHARED:
981 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
982 return -EACCES;
983
984
985
986
987
988 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
989 return -EACCES;
990
991
992
993
994 if (locks_verify_locked(inode))
995 return -EAGAIN;
996
997 vm_flags |= VM_SHARED | VM_MAYSHARE;
998 if (!(file->f_mode & FMODE_WRITE))
999 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1000
1001
1002 case MAP_PRIVATE:
1003 if (!(file->f_mode & FMODE_READ))
1004 return -EACCES;
1005 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1006 if (vm_flags & VM_EXEC)
1007 return -EPERM;
1008 vm_flags &= ~VM_MAYEXEC;
1009 }
1010 if (is_file_hugepages(file))
1011 accountable = 0;
1012
1013 if (!file->f_op || !file->f_op->mmap)
1014 return -ENODEV;
1015 break;
1016
1017 default:
1018 return -EINVAL;
1019 }
1020 } else {
1021 switch (flags & MAP_TYPE) {
1022 case MAP_SHARED:
1023 vm_flags |= VM_SHARED | VM_MAYSHARE;
1024 break;
1025 case MAP_PRIVATE:
1026
1027
1028
1029 pgoff = addr >> PAGE_SHIFT;
1030 break;
1031 default:
1032 return -EINVAL;
1033 }
1034 }
1035
1036 error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
1037 if (error)
1038 return error;
1039
1040 return mmap_region(file, addr, len, flags, vm_flags, pgoff,
1041 accountable);
1042}
1043EXPORT_SYMBOL(do_mmap_pgoff);
1044
1045
1046
1047
1048
1049
1050
1051int vma_wants_writenotify(struct vm_area_struct *vma)
1052{
1053 unsigned int vm_flags = vma->vm_flags;
1054
1055
1056 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1057 return 0;
1058
1059
1060 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1061 return 1;
1062
1063
1064 if (pgprot_val(vma->vm_page_prot) !=
1065 pgprot_val(vm_get_page_prot(vm_flags)))
1066 return 0;
1067
1068
1069 if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
1070 return 0;
1071
1072
1073 return vma->vm_file && vma->vm_file->f_mapping &&
1074 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1075}
1076
1077
1078unsigned long mmap_region(struct file *file, unsigned long addr,
1079 unsigned long len, unsigned long flags,
1080 unsigned int vm_flags, unsigned long pgoff,
1081 int accountable)
1082{
1083 struct mm_struct *mm = current->mm;
1084 struct vm_area_struct *vma, *prev;
1085 int correct_wcount = 0;
1086 int error;
1087 struct rb_node **rb_link, *rb_parent;
1088 unsigned long charged = 0;
1089 struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
1090
1091
1092 error = -ENOMEM;
1093munmap_back:
1094 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
1095 if (vma && vma->vm_start < addr + len) {
1096 if (do_munmap(mm, addr, len))
1097 return -ENOMEM;
1098 goto munmap_back;
1099 }
1100
1101
1102 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1103 return -ENOMEM;
1104
1105 if (accountable && (!(flags & MAP_NORESERVE) ||
1106 sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
1107 if (vm_flags & VM_SHARED) {
1108
1109 vm_flags |= VM_ACCOUNT;
1110 } else if (vm_flags & VM_WRITE) {
1111
1112
1113
1114 charged = len >> PAGE_SHIFT;
1115 if (security_vm_enough_memory(charged))
1116 return -ENOMEM;
1117 vm_flags |= VM_ACCOUNT;
1118 }
1119 }
1120
1121
1122
1123
1124
1125
1126 if (!file && !(vm_flags & VM_SHARED) &&
1127 vma_merge(mm, prev, addr, addr + len, vm_flags,
1128 NULL, NULL, pgoff, NULL))
1129 goto out;
1130
1131
1132
1133
1134
1135
1136 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1137 if (!vma) {
1138 error = -ENOMEM;
1139 goto unacct_error;
1140 }
1141
1142 vma->vm_mm = mm;
1143 vma->vm_start = addr;
1144 vma->vm_end = addr + len;
1145 vma->vm_flags = vm_flags;
1146 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1147 vma->vm_pgoff = pgoff;
1148
1149 if (file) {
1150 error = -EINVAL;
1151 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1152 goto free_vma;
1153 if (vm_flags & VM_DENYWRITE) {
1154 error = deny_write_access(file);
1155 if (error)
1156 goto free_vma;
1157 correct_wcount = 1;
1158 }
1159 vma->vm_file = file;
1160 get_file(file);
1161 error = file->f_op->mmap(file, vma);
1162 if (error)
1163 goto unmap_and_free_vma;
1164 } else if (vm_flags & VM_SHARED) {
1165 error = shmem_zero_setup(vma);
1166 if (error)
1167 goto free_vma;
1168 }
1169
1170
1171
1172
1173
1174
1175 if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
1176 vma->vm_flags &= ~VM_ACCOUNT;
1177
1178
1179
1180
1181
1182
1183 addr = vma->vm_start;
1184 pgoff = vma->vm_pgoff;
1185 vm_flags = vma->vm_flags;
1186
1187 if (vma_wants_writenotify(vma))
1188 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1189
1190 if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
1191 vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
1192 file = vma->vm_file;
1193 vma_link(mm, vma, prev, rb_link, rb_parent);
1194 if (correct_wcount)
1195 atomic_inc(&inode->i_writecount);
1196 } else {
1197 if (file) {
1198 if (correct_wcount)
1199 atomic_inc(&inode->i_writecount);
1200 fput(file);
1201 }
1202 mpol_free(vma_policy(vma));
1203 kmem_cache_free(vm_area_cachep, vma);
1204 }
1205out:
1206 mm->total_vm += len >> PAGE_SHIFT;
1207 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1208 if (vm_flags & VM_LOCKED) {
1209 mm->locked_vm += len >> PAGE_SHIFT;
1210 make_pages_present(addr, addr + len);
1211 }
1212 if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1213 make_pages_present(addr, addr + len);
1214 return addr;
1215
1216unmap_and_free_vma:
1217 if (correct_wcount)
1218 atomic_inc(&inode->i_writecount);
1219 vma->vm_file = NULL;
1220 fput(file);
1221
1222
1223 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1224 charged = 0;
1225free_vma:
1226 kmem_cache_free(vm_area_cachep, vma);
1227unacct_error:
1228 if (charged)
1229 vm_unacct_memory(charged);
1230 return error;
1231}
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244#ifndef HAVE_ARCH_UNMAPPED_AREA
1245unsigned long
1246arch_get_unmapped_area(struct file *filp, unsigned long addr,
1247 unsigned long len, unsigned long pgoff, unsigned long flags)
1248{
1249 struct mm_struct *mm = current->mm;
1250 struct vm_area_struct *vma;
1251 unsigned long start_addr;
1252
1253 if (len > TASK_SIZE)
1254 return -ENOMEM;
1255
1256 if (flags & MAP_FIXED)
1257 return addr;
1258
1259 if (addr) {
1260 addr = PAGE_ALIGN(addr);
1261 vma = find_vma(mm, addr);
1262 if (TASK_SIZE - len >= addr &&
1263 (!vma || addr + len <= vma->vm_start))
1264 return addr;
1265 }
1266 if (len > mm->cached_hole_size) {
1267 start_addr = addr = mm->free_area_cache;
1268 } else {
1269 start_addr = addr = TASK_UNMAPPED_BASE;
1270 mm->cached_hole_size = 0;
1271 }
1272
1273full_search:
1274 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
1275
1276 if (TASK_SIZE - len < addr) {
1277
1278
1279
1280
1281 if (start_addr != TASK_UNMAPPED_BASE) {
1282 addr = TASK_UNMAPPED_BASE;
1283 start_addr = addr;
1284 mm->cached_hole_size = 0;
1285 goto full_search;
1286 }
1287 return -ENOMEM;
1288 }
1289 if (!vma || addr + len <= vma->vm_start) {
1290
1291
1292
1293 mm->free_area_cache = addr + len;
1294 return addr;
1295 }
1296 if (addr + mm->cached_hole_size < vma->vm_start)
1297 mm->cached_hole_size = vma->vm_start - addr;
1298 addr = vma->vm_end;
1299 }
1300}
1301#endif
1302
1303void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1304{
1305
1306
1307
1308 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
1309 mm->free_area_cache = addr;
1310 mm->cached_hole_size = ~0UL;
1311 }
1312}
1313
1314
1315
1316
1317
1318#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1319unsigned long
1320arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1321 const unsigned long len, const unsigned long pgoff,
1322 const unsigned long flags)
1323{
1324 struct vm_area_struct *vma;
1325 struct mm_struct *mm = current->mm;
1326 unsigned long addr = addr0;
1327
1328
1329 if (len > TASK_SIZE)
1330 return -ENOMEM;
1331
1332 if (flags & MAP_FIXED)
1333 return addr;
1334
1335
1336 if (addr) {
1337 addr = PAGE_ALIGN(addr);
1338 vma = find_vma(mm, addr);
1339 if (TASK_SIZE - len >= addr &&
1340 (!vma || addr + len <= vma->vm_start))
1341 return addr;
1342 }
1343
1344
1345 if (len <= mm->cached_hole_size) {
1346 mm->cached_hole_size = 0;
1347 mm->free_area_cache = mm->mmap_base;
1348 }
1349
1350
1351 addr = mm->free_area_cache;
1352
1353
1354 if (addr > len) {
1355 vma = find_vma(mm, addr-len);
1356 if (!vma || addr <= vma->vm_start)
1357
1358 return (mm->free_area_cache = addr-len);
1359 }
1360
1361 if (mm->mmap_base < len)
1362 goto bottomup;
1363
1364 addr = mm->mmap_base-len;
1365
1366 do {
1367
1368
1369
1370
1371
1372 vma = find_vma(mm, addr);
1373 if (!vma || addr+len <= vma->vm_start)
1374
1375 return (mm->free_area_cache = addr);
1376
1377
1378 if (addr + mm->cached_hole_size < vma->vm_start)
1379 mm->cached_hole_size = vma->vm_start - addr;
1380
1381
1382 addr = vma->vm_start-len;
1383 } while (len < vma->vm_start);
1384
1385bottomup:
1386
1387
1388
1389
1390
1391
1392 mm->cached_hole_size = ~0UL;
1393 mm->free_area_cache = TASK_UNMAPPED_BASE;
1394 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
1395
1396
1397
1398 mm->free_area_cache = mm->mmap_base;
1399 mm->cached_hole_size = ~0UL;
1400
1401 return addr;
1402}
1403#endif
1404
1405void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
1406{
1407
1408
1409
1410 if (addr > mm->free_area_cache)
1411 mm->free_area_cache = addr;
1412
1413
1414 if (mm->free_area_cache > mm->mmap_base)
1415 mm->free_area_cache = mm->mmap_base;
1416}
1417
1418unsigned long
1419get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1420 unsigned long pgoff, unsigned long flags)
1421{
1422 unsigned long (*get_area)(struct file *, unsigned long,
1423 unsigned long, unsigned long, unsigned long);
1424
1425 get_area = current->mm->get_unmapped_area;
1426 if (file && file->f_op && file->f_op->get_unmapped_area)
1427 get_area = file->f_op->get_unmapped_area;
1428 addr = get_area(file, addr, len, pgoff, flags);
1429 if (IS_ERR_VALUE(addr))
1430 return addr;
1431
1432 if (addr > TASK_SIZE - len)
1433 return -ENOMEM;
1434 if (addr & ~PAGE_MASK)
1435 return -EINVAL;
1436
1437 return arch_rebalance_pgtables(addr, len);
1438}
1439
1440EXPORT_SYMBOL(get_unmapped_area);
1441
1442
1443struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
1444{
1445 struct vm_area_struct *vma = NULL;
1446
1447 if (mm) {
1448
1449
1450 vma = mm->mmap_cache;
1451 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1452 struct rb_node * rb_node;
1453
1454 rb_node = mm->mm_rb.rb_node;
1455 vma = NULL;
1456
1457 while (rb_node) {
1458 struct vm_area_struct * vma_tmp;
1459
1460 vma_tmp = rb_entry(rb_node,
1461 struct vm_area_struct, vm_rb);
1462
1463 if (vma_tmp->vm_end > addr) {
1464 vma = vma_tmp;
1465 if (vma_tmp->vm_start <= addr)
1466 break;
1467 rb_node = rb_node->rb_left;
1468 } else
1469 rb_node = rb_node->rb_right;
1470 }
1471 if (vma)
1472 mm->mmap_cache = vma;
1473 }
1474 }
1475 return vma;
1476}
1477
1478EXPORT_SYMBOL(find_vma);
1479
1480
1481struct vm_area_struct *
1482find_vma_prev(struct mm_struct *mm, unsigned long addr,
1483 struct vm_area_struct **pprev)
1484{
1485 struct vm_area_struct *vma = NULL, *prev = NULL;
1486 struct rb_node * rb_node;
1487 if (!mm)
1488 goto out;
1489
1490
1491 vma = mm->mmap;
1492
1493
1494 rb_node = mm->mm_rb.rb_node;
1495
1496 while (rb_node) {
1497 struct vm_area_struct *vma_tmp;
1498 vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
1499
1500 if (addr < vma_tmp->vm_end) {
1501 rb_node = rb_node->rb_left;
1502 } else {
1503 prev = vma_tmp;
1504 if (!prev->vm_next || (addr < prev->vm_next->vm_end))
1505 break;
1506 rb_node = rb_node->rb_right;
1507 }
1508 }
1509
1510out:
1511 *pprev = prev;
1512 return prev ? prev->vm_next : vma;
1513}
1514
1515
1516
1517
1518
1519
1520static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, unsigned long grow)
1521{
1522 struct mm_struct *mm = vma->vm_mm;
1523 struct rlimit *rlim = current->signal->rlim;
1524 unsigned long new_start;
1525
1526
1527 if (!may_expand_vm(mm, grow))
1528 return -ENOMEM;
1529
1530
1531 if (size > rlim[RLIMIT_STACK].rlim_cur)
1532 return -ENOMEM;
1533
1534
1535 if (vma->vm_flags & VM_LOCKED) {
1536 unsigned long locked;
1537 unsigned long limit;
1538 locked = mm->locked_vm + grow;
1539 limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
1540 if (locked > limit && !capable(CAP_IPC_LOCK))
1541 return -ENOMEM;
1542 }
1543
1544
1545 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
1546 vma->vm_end - size;
1547 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
1548 return -EFAULT;
1549
1550
1551
1552
1553
1554 if (security_vm_enough_memory(grow))
1555 return -ENOMEM;
1556
1557
1558 mm->total_vm += grow;
1559 if (vma->vm_flags & VM_LOCKED)
1560 mm->locked_vm += grow;
1561 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
1562 return 0;
1563}
1564
1565#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
1566
1567
1568
1569
1570#ifndef CONFIG_IA64
1571static inline
1572#endif
1573int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1574{
1575 int error;
1576
1577 if (!(vma->vm_flags & VM_GROWSUP))
1578 return -EFAULT;
1579
1580
1581
1582
1583
1584 if (unlikely(anon_vma_prepare(vma)))
1585 return -ENOMEM;
1586 anon_vma_lock(vma);
1587
1588
1589
1590
1591
1592
1593
1594 if (address < PAGE_ALIGN(address+4))
1595 address = PAGE_ALIGN(address+4);
1596 else {
1597 anon_vma_unlock(vma);
1598 return -ENOMEM;
1599 }
1600 error = 0;
1601
1602
1603 if (address > vma->vm_end) {
1604 unsigned long size, grow;
1605
1606 size = address - vma->vm_start;
1607 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1608
1609 error = acct_stack_growth(vma, size, grow);
1610 if (!error)
1611 vma->vm_end = address;
1612 }
1613 anon_vma_unlock(vma);
1614 return error;
1615}
1616#endif
1617
1618
1619
1620
1621static inline int expand_downwards(struct vm_area_struct *vma,
1622 unsigned long address)
1623{
1624 int error;
1625
1626
1627
1628
1629
1630 if (unlikely(anon_vma_prepare(vma)))
1631 return -ENOMEM;
1632
1633 address &= PAGE_MASK;
1634 error = security_file_mmap(NULL, 0, 0, 0, address, 1);
1635 if (error)
1636 return error;
1637
1638 anon_vma_lock(vma);
1639
1640
1641
1642
1643
1644
1645
1646
1647 if (address < vma->vm_start) {
1648 unsigned long size, grow;
1649
1650 size = vma->vm_end - address;
1651 grow = (vma->vm_start - address) >> PAGE_SHIFT;
1652
1653 error = acct_stack_growth(vma, size, grow);
1654 if (!error) {
1655 vma->vm_start = address;
1656 vma->vm_pgoff -= grow;
1657 }
1658 }
1659 anon_vma_unlock(vma);
1660 return error;
1661}
1662
1663int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
1664{
1665 return expand_downwards(vma, address);
1666}
1667
1668#ifdef CONFIG_STACK_GROWSUP
1669int expand_stack(struct vm_area_struct *vma, unsigned long address)
1670{
1671 return expand_upwards(vma, address);
1672}
1673
1674struct vm_area_struct *
1675find_extend_vma(struct mm_struct *mm, unsigned long addr)
1676{
1677 struct vm_area_struct *vma, *prev;
1678
1679 addr &= PAGE_MASK;
1680 vma = find_vma_prev(mm, addr, &prev);
1681 if (vma && (vma->vm_start <= addr))
1682 return vma;
1683 if (!prev || expand_stack(prev, addr))
1684 return NULL;
1685 if (prev->vm_flags & VM_LOCKED)
1686 make_pages_present(addr, prev->vm_end);
1687 return prev;
1688}
1689#else
1690int expand_stack(struct vm_area_struct *vma, unsigned long address)
1691{
1692 return expand_downwards(vma, address);
1693}
1694
1695struct vm_area_struct *
1696find_extend_vma(struct mm_struct * mm, unsigned long addr)
1697{
1698 struct vm_area_struct * vma;
1699 unsigned long start;
1700
1701 addr &= PAGE_MASK;
1702 vma = find_vma(mm,addr);
1703 if (!vma)
1704 return NULL;
1705 if (vma->vm_start <= addr)
1706 return vma;
1707 if (!(vma->vm_flags & VM_GROWSDOWN))
1708 return NULL;
1709 start = vma->vm_start;
1710 if (expand_stack(vma, addr))
1711 return NULL;
1712 if (vma->vm_flags & VM_LOCKED)
1713 make_pages_present(addr, start);
1714 return vma;
1715}
1716#endif
1717
1718
1719
1720
1721
1722
1723
1724static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
1725{
1726
1727 update_hiwater_vm(mm);
1728 do {
1729 long nrpages = vma_pages(vma);
1730
1731 mm->total_vm -= nrpages;
1732 if (vma->vm_flags & VM_LOCKED)
1733 mm->locked_vm -= nrpages;
1734 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
1735 vma = remove_vma(vma);
1736 } while (vma);
1737 validate_mm(mm);
1738}
1739
1740
1741
1742
1743
1744
1745static void unmap_region(struct mm_struct *mm,
1746 struct vm_area_struct *vma, struct vm_area_struct *prev,
1747 unsigned long start, unsigned long end)
1748{
1749 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
1750 struct mmu_gather *tlb;
1751 unsigned long nr_accounted = 0;
1752
1753 lru_add_drain();
1754 tlb = tlb_gather_mmu(mm, 0);
1755 update_hiwater_rss(mm);
1756 unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
1757 vm_unacct_memory(nr_accounted);
1758 free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
1759 next? next->vm_start: 0);
1760 tlb_finish_mmu(tlb, start, end);
1761}
1762
1763
1764
1765
1766
1767static void
1768detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1769 struct vm_area_struct *prev, unsigned long end)
1770{
1771 struct vm_area_struct **insertion_point;
1772 struct vm_area_struct *tail_vma = NULL;
1773 unsigned long addr;
1774
1775 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1776 do {
1777 rb_erase(&vma->vm_rb, &mm->mm_rb);
1778 mm->map_count--;
1779 tail_vma = vma;
1780 vma = vma->vm_next;
1781 } while (vma && vma->vm_start < end);
1782 *insertion_point = vma;
1783 tail_vma->vm_next = NULL;
1784 if (mm->unmap_area == arch_unmap_area)
1785 addr = prev ? prev->vm_end : mm->mmap_base;
1786 else
1787 addr = vma ? vma->vm_start : mm->mmap_base;
1788 mm->unmap_area(mm, addr);
1789 mm->mmap_cache = NULL;
1790}
1791
1792
1793
1794
1795
1796int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1797 unsigned long addr, int new_below)
1798{
1799 struct mempolicy *pol;
1800 struct vm_area_struct *new;
1801
1802 if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
1803 return -EINVAL;
1804
1805 if (mm->map_count >= sysctl_max_map_count)
1806 return -ENOMEM;
1807
1808 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1809 if (!new)
1810 return -ENOMEM;
1811
1812
1813 *new = *vma;
1814
1815 if (new_below)
1816 new->vm_end = addr;
1817 else {
1818 new->vm_start = addr;
1819 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
1820 }
1821
1822 pol = mpol_copy(vma_policy(vma));
1823 if (IS_ERR(pol)) {
1824 kmem_cache_free(vm_area_cachep, new);
1825 return PTR_ERR(pol);
1826 }
1827 vma_set_policy(new, pol);
1828
1829 if (new->vm_file)
1830 get_file(new->vm_file);
1831
1832 if (new->vm_ops && new->vm_ops->open)
1833 new->vm_ops->open(new);
1834
1835 if (new_below)
1836 vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1837 ((addr - new->vm_start) >> PAGE_SHIFT), new);
1838 else
1839 vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1840
1841 return 0;
1842}
1843
1844
1845
1846
1847
1848
1849int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1850{
1851 unsigned long end;
1852 struct vm_area_struct *vma, *prev, *last;
1853
1854 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
1855 return -EINVAL;
1856
1857 if ((len = PAGE_ALIGN(len)) == 0)
1858 return -EINVAL;
1859
1860
1861 vma = find_vma_prev(mm, start, &prev);
1862 if (!vma)
1863 return 0;
1864
1865
1866
1867 end = start + len;
1868 if (vma->vm_start >= end)
1869 return 0;
1870
1871
1872
1873
1874
1875
1876
1877
1878 if (start > vma->vm_start) {
1879 int error = split_vma(mm, vma, start, 0);
1880 if (error)
1881 return error;
1882 prev = vma;
1883 }
1884
1885
1886 last = find_vma(mm, end);
1887 if (last && end > last->vm_start) {
1888 int error = split_vma(mm, last, end, 1);
1889 if (error)
1890 return error;
1891 }
1892 vma = prev? prev->vm_next: mm->mmap;
1893
1894
1895
1896
1897 detach_vmas_to_be_unmapped(mm, vma, prev, end);
1898 unmap_region(mm, vma, prev, start, end);
1899
1900
1901 remove_vma_list(mm, vma);
1902
1903 return 0;
1904}
1905
1906EXPORT_SYMBOL(do_munmap);
1907
1908asmlinkage long sys_munmap(unsigned long addr, size_t len)
1909{
1910 int ret;
1911 struct mm_struct *mm = current->mm;
1912
1913 profile_munmap(addr);
1914
1915 down_write(&mm->mmap_sem);
1916 ret = do_munmap(mm, addr, len);
1917 up_write(&mm->mmap_sem);
1918 return ret;
1919}
1920
1921static inline void verify_mm_writelocked(struct mm_struct *mm)
1922{
1923#ifdef CONFIG_DEBUG_VM
1924 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
1925 WARN_ON(1);
1926 up_read(&mm->mmap_sem);
1927 }
1928#endif
1929}
1930
1931
1932
1933
1934
1935
1936unsigned long do_brk(unsigned long addr, unsigned long len)
1937{
1938 struct mm_struct * mm = current->mm;
1939 struct vm_area_struct * vma, * prev;
1940 unsigned long flags;
1941 struct rb_node ** rb_link, * rb_parent;
1942 pgoff_t pgoff = addr >> PAGE_SHIFT;
1943 int error;
1944
1945 len = PAGE_ALIGN(len);
1946 if (!len)
1947 return addr;
1948
1949 if ((addr + len) > TASK_SIZE || (addr + len) < addr)
1950 return -EINVAL;
1951
1952 if (is_hugepage_only_range(mm, addr, len))
1953 return -EINVAL;
1954
1955 error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
1956 if (error)
1957 return error;
1958
1959 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
1960
1961 error = arch_mmap_check(addr, len, flags);
1962 if (error)
1963 return error;
1964
1965
1966
1967
1968 if (mm->def_flags & VM_LOCKED) {
1969 unsigned long locked, lock_limit;
1970 locked = len >> PAGE_SHIFT;
1971 locked += mm->locked_vm;
1972 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
1973 lock_limit >>= PAGE_SHIFT;
1974 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1975 return -EAGAIN;
1976 }
1977
1978
1979
1980
1981
1982 verify_mm_writelocked(mm);
1983
1984
1985
1986
1987 munmap_back:
1988 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
1989 if (vma && vma->vm_start < addr + len) {
1990 if (do_munmap(mm, addr, len))
1991 return -ENOMEM;
1992 goto munmap_back;
1993 }
1994
1995
1996 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1997 return -ENOMEM;
1998
1999 if (mm->map_count > sysctl_max_map_count)
2000 return -ENOMEM;
2001
2002 if (security_vm_enough_memory(len >> PAGE_SHIFT))
2003 return -ENOMEM;
2004
2005
2006 if (vma_merge(mm, prev, addr, addr + len, flags,
2007 NULL, NULL, pgoff, NULL))
2008 goto out;
2009
2010
2011
2012
2013 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2014 if (!vma) {
2015 vm_unacct_memory(len >> PAGE_SHIFT);
2016 return -ENOMEM;
2017 }
2018
2019 vma->vm_mm = mm;
2020 vma->vm_start = addr;
2021 vma->vm_end = addr + len;
2022 vma->vm_pgoff = pgoff;
2023 vma->vm_flags = flags;
2024 vma->vm_page_prot = vm_get_page_prot(flags);
2025 vma_link(mm, vma, prev, rb_link, rb_parent);
2026out:
2027 mm->total_vm += len >> PAGE_SHIFT;
2028 if (flags & VM_LOCKED) {
2029 mm->locked_vm += len >> PAGE_SHIFT;
2030 make_pages_present(addr, addr + len);
2031 }
2032 return addr;
2033}
2034
2035EXPORT_SYMBOL(do_brk);
2036
2037
2038void exit_mmap(struct mm_struct *mm)
2039{
2040 struct mmu_gather *tlb;
2041 struct vm_area_struct *vma = mm->mmap;
2042 unsigned long nr_accounted = 0;
2043 unsigned long end;
2044
2045
2046 arch_exit_mmap(mm);
2047
2048 lru_add_drain();
2049 flush_cache_mm(mm);
2050 tlb = tlb_gather_mmu(mm, 1);
2051
2052
2053 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
2054 vm_unacct_memory(nr_accounted);
2055 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
2056 tlb_finish_mmu(tlb, 0, end);
2057
2058
2059
2060
2061
2062 while (vma)
2063 vma = remove_vma(vma);
2064
2065 BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2066}
2067
2068
2069
2070
2071
2072int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2073{
2074 struct vm_area_struct * __vma, * prev;
2075 struct rb_node ** rb_link, * rb_parent;
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089 if (!vma->vm_file) {
2090 BUG_ON(vma->anon_vma);
2091 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2092 }
2093 __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
2094 if (__vma && __vma->vm_start < vma->vm_end)
2095 return -ENOMEM;
2096 if ((vma->vm_flags & VM_ACCOUNT) &&
2097 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2098 return -ENOMEM;
2099 vma_link(mm, vma, prev, rb_link, rb_parent);
2100 return 0;
2101}
2102
2103
2104
2105
2106
2107struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2108 unsigned long addr, unsigned long len, pgoff_t pgoff)
2109{
2110 struct vm_area_struct *vma = *vmap;
2111 unsigned long vma_start = vma->vm_start;
2112 struct mm_struct *mm = vma->vm_mm;
2113 struct vm_area_struct *new_vma, *prev;
2114 struct rb_node **rb_link, *rb_parent;
2115 struct mempolicy *pol;
2116
2117
2118
2119
2120
2121 if (!vma->vm_file && !vma->anon_vma)
2122 pgoff = addr >> PAGE_SHIFT;
2123
2124 find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2125 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2126 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2127 if (new_vma) {
2128
2129
2130
2131 if (vma_start >= new_vma->vm_start &&
2132 vma_start < new_vma->vm_end)
2133 *vmap = new_vma;
2134 } else {
2135 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2136 if (new_vma) {
2137 *new_vma = *vma;
2138 pol = mpol_copy(vma_policy(vma));
2139 if (IS_ERR(pol)) {
2140 kmem_cache_free(vm_area_cachep, new_vma);
2141 return NULL;
2142 }
2143 vma_set_policy(new_vma, pol);
2144 new_vma->vm_start = addr;
2145 new_vma->vm_end = addr + len;
2146 new_vma->vm_pgoff = pgoff;
2147 if (new_vma->vm_file)
2148 get_file(new_vma->vm_file);
2149 if (new_vma->vm_ops && new_vma->vm_ops->open)
2150 new_vma->vm_ops->open(new_vma);
2151 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2152 }
2153 }
2154 return new_vma;
2155}
2156
2157
2158
2159
2160
2161int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2162{
2163 unsigned long cur = mm->total_vm;
2164 unsigned long lim;
2165
2166 lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
2167
2168 if (cur + npages > lim)
2169 return 0;
2170 return 1;
2171}
2172
2173
2174static int special_mapping_fault(struct vm_area_struct *vma,
2175 struct vm_fault *vmf)
2176{
2177 pgoff_t pgoff;
2178 struct page **pages;
2179
2180
2181
2182
2183
2184
2185
2186 pgoff = vmf->pgoff - vma->vm_pgoff;
2187
2188 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
2189 pgoff--;
2190
2191 if (*pages) {
2192 struct page *page = *pages;
2193 get_page(page);
2194 vmf->page = page;
2195 return 0;
2196 }
2197
2198 return VM_FAULT_SIGBUS;
2199}
2200
2201
2202
2203
2204static void special_mapping_close(struct vm_area_struct *vma)
2205{
2206}
2207
2208static struct vm_operations_struct special_mapping_vmops = {
2209 .close = special_mapping_close,
2210 .fault = special_mapping_fault,
2211};
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222int install_special_mapping(struct mm_struct *mm,
2223 unsigned long addr, unsigned long len,
2224 unsigned long vm_flags, struct page **pages)
2225{
2226 struct vm_area_struct *vma;
2227
2228 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2229 if (unlikely(vma == NULL))
2230 return -ENOMEM;
2231
2232 vma->vm_mm = mm;
2233 vma->vm_start = addr;
2234 vma->vm_end = addr + len;
2235
2236 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
2237 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2238
2239 vma->vm_ops = &special_mapping_vmops;
2240 vma->vm_private_data = pages;
2241
2242 if (unlikely(insert_vm_struct(mm, vma))) {
2243 kmem_cache_free(vm_area_cachep, vma);
2244 return -ENOMEM;
2245 }
2246
2247 mm->total_vm += len >> PAGE_SHIFT;
2248
2249 return 0;
2250}
2251