1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41#include <linux/kernel_stat.h>
42#include <linux/mm.h>
43#include <linux/hugetlb.h>
44#include <linux/mman.h>
45#include <linux/swap.h>
46#include <linux/highmem.h>
47#include <linux/pagemap.h>
48#include <linux/rmap.h>
49#include <linux/module.h>
50#include <linux/delayacct.h>
51#include <linux/init.h>
52#include <linux/writeback.h>
53#include <linux/memcontrol.h>
54
55#include <asm/pgalloc.h>
56#include <asm/uaccess.h>
57#include <asm/tlb.h>
58#include <asm/tlbflush.h>
59#include <asm/pgtable.h>
60
61#include <linux/swapops.h>
62#include <linux/elf.h>
63
64#ifndef CONFIG_NEED_MULTIPLE_NODES
65
66unsigned long max_mapnr;
67struct page *mem_map;
68
69EXPORT_SYMBOL(max_mapnr);
70EXPORT_SYMBOL(mem_map);
71#endif
72
73unsigned long num_physpages;
74
75
76
77
78
79
80
81void * high_memory;
82
83EXPORT_SYMBOL(num_physpages);
84EXPORT_SYMBOL(high_memory);
85
86
87
88
89
90
91
92int randomize_va_space __read_mostly =
93#ifdef CONFIG_COMPAT_BRK
94 1;
95#else
96 2;
97#endif
98
99static int __init disable_randmaps(char *s)
100{
101 randomize_va_space = 0;
102 return 1;
103}
104__setup("norandmaps", disable_randmaps);
105
106
107
108
109
110
111
112
113void pgd_clear_bad(pgd_t *pgd)
114{
115 pgd_ERROR(*pgd);
116 pgd_clear(pgd);
117}
118
119void pud_clear_bad(pud_t *pud)
120{
121 pud_ERROR(*pud);
122 pud_clear(pud);
123}
124
125void pmd_clear_bad(pmd_t *pmd)
126{
127 pmd_ERROR(*pmd);
128 pmd_clear(pmd);
129}
130
131
132
133
134
135static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
136{
137 pgtable_t token = pmd_pgtable(*pmd);
138 pmd_clear(pmd);
139 pte_free_tlb(tlb, token);
140 tlb->mm->nr_ptes--;
141}
142
143static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
144 unsigned long addr, unsigned long end,
145 unsigned long floor, unsigned long ceiling)
146{
147 pmd_t *pmd;
148 unsigned long next;
149 unsigned long start;
150
151 start = addr;
152 pmd = pmd_offset(pud, addr);
153 do {
154 next = pmd_addr_end(addr, end);
155 if (pmd_none_or_clear_bad(pmd))
156 continue;
157 free_pte_range(tlb, pmd);
158 } while (pmd++, addr = next, addr != end);
159
160 start &= PUD_MASK;
161 if (start < floor)
162 return;
163 if (ceiling) {
164 ceiling &= PUD_MASK;
165 if (!ceiling)
166 return;
167 }
168 if (end - 1 > ceiling - 1)
169 return;
170
171 pmd = pmd_offset(pud, start);
172 pud_clear(pud);
173 pmd_free_tlb(tlb, pmd);
174}
175
176static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
177 unsigned long addr, unsigned long end,
178 unsigned long floor, unsigned long ceiling)
179{
180 pud_t *pud;
181 unsigned long next;
182 unsigned long start;
183
184 start = addr;
185 pud = pud_offset(pgd, addr);
186 do {
187 next = pud_addr_end(addr, end);
188 if (pud_none_or_clear_bad(pud))
189 continue;
190 free_pmd_range(tlb, pud, addr, next, floor, ceiling);
191 } while (pud++, addr = next, addr != end);
192
193 start &= PGDIR_MASK;
194 if (start < floor)
195 return;
196 if (ceiling) {
197 ceiling &= PGDIR_MASK;
198 if (!ceiling)
199 return;
200 }
201 if (end - 1 > ceiling - 1)
202 return;
203
204 pud = pud_offset(pgd, start);
205 pgd_clear(pgd);
206 pud_free_tlb(tlb, pud);
207}
208
209
210
211
212
213
214void free_pgd_range(struct mmu_gather **tlb,
215 unsigned long addr, unsigned long end,
216 unsigned long floor, unsigned long ceiling)
217{
218 pgd_t *pgd;
219 unsigned long next;
220 unsigned long start;
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248 addr &= PMD_MASK;
249 if (addr < floor) {
250 addr += PMD_SIZE;
251 if (!addr)
252 return;
253 }
254 if (ceiling) {
255 ceiling &= PMD_MASK;
256 if (!ceiling)
257 return;
258 }
259 if (end - 1 > ceiling - 1)
260 end -= PMD_SIZE;
261 if (addr > end - 1)
262 return;
263
264 start = addr;
265 pgd = pgd_offset((*tlb)->mm, addr);
266 do {
267 next = pgd_addr_end(addr, end);
268 if (pgd_none_or_clear_bad(pgd))
269 continue;
270 free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
271 } while (pgd++, addr = next, addr != end);
272}
273
274void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
275 unsigned long floor, unsigned long ceiling)
276{
277 while (vma) {
278 struct vm_area_struct *next = vma->vm_next;
279 unsigned long addr = vma->vm_start;
280
281
282
283
284 anon_vma_unlink(vma);
285 unlink_file_vma(vma);
286
287 if (is_vm_hugetlb_page(vma)) {
288 hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
289 floor, next? next->vm_start: ceiling);
290 } else {
291
292
293
294 while (next && next->vm_start <= vma->vm_end + PMD_SIZE
295 && !is_vm_hugetlb_page(next)) {
296 vma = next;
297 next = vma->vm_next;
298 anon_vma_unlink(vma);
299 unlink_file_vma(vma);
300 }
301 free_pgd_range(tlb, addr, vma->vm_end,
302 floor, next? next->vm_start: ceiling);
303 }
304 vma = next;
305 }
306}
307
308int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
309{
310 pgtable_t new = pte_alloc_one(mm, address);
311 if (!new)
312 return -ENOMEM;
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327 smp_wmb();
328
329 spin_lock(&mm->page_table_lock);
330 if (!pmd_present(*pmd)) {
331 mm->nr_ptes++;
332 pmd_populate(mm, pmd, new);
333 new = NULL;
334 }
335 spin_unlock(&mm->page_table_lock);
336 if (new)
337 pte_free(mm, new);
338 return 0;
339}
340
341int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
342{
343 pte_t *new = pte_alloc_one_kernel(&init_mm, address);
344 if (!new)
345 return -ENOMEM;
346
347 smp_wmb();
348
349 spin_lock(&init_mm.page_table_lock);
350 if (!pmd_present(*pmd)) {
351 pmd_populate_kernel(&init_mm, pmd, new);
352 new = NULL;
353 }
354 spin_unlock(&init_mm.page_table_lock);
355 if (new)
356 pte_free_kernel(&init_mm, new);
357 return 0;
358}
359
360static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
361{
362 if (file_rss)
363 add_mm_counter(mm, file_rss, file_rss);
364 if (anon_rss)
365 add_mm_counter(mm, anon_rss, anon_rss);
366}
367
368
369
370
371
372
373
374
375void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
376{
377 printk(KERN_ERR "Bad pte = %08llx, process = %s, "
378 "vm_flags = %lx, vaddr = %lx\n",
379 (long long)pte_val(pte),
380 (vma->vm_mm == current->mm ? current->comm : "???"),
381 vma->vm_flags, vaddr);
382 dump_stack();
383}
384
385static inline int is_cow_mapping(unsigned int flags)
386{
387 return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
388}
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432#ifdef __HAVE_ARCH_PTE_SPECIAL
433# define HAVE_PTE_SPECIAL 1
434#else
435# define HAVE_PTE_SPECIAL 0
436#endif
437struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
438 pte_t pte)
439{
440 unsigned long pfn;
441
442 if (HAVE_PTE_SPECIAL) {
443 if (likely(!pte_special(pte))) {
444 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
445 return pte_page(pte);
446 }
447 VM_BUG_ON(!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)));
448 return NULL;
449 }
450
451
452
453 pfn = pte_pfn(pte);
454
455 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
456 if (vma->vm_flags & VM_MIXEDMAP) {
457 if (!pfn_valid(pfn))
458 return NULL;
459 goto out;
460 } else {
461 unsigned long off;
462 off = (addr - vma->vm_start) >> PAGE_SHIFT;
463 if (pfn == vma->vm_pgoff + off)
464 return NULL;
465 if (!is_cow_mapping(vma->vm_flags))
466 return NULL;
467 }
468 }
469
470 VM_BUG_ON(!pfn_valid(pfn));
471
472
473
474
475
476
477out:
478 return pfn_to_page(pfn);
479}
480
481
482
483
484
485
486
487static inline void
488copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
489 pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
490 unsigned long addr, int *rss)
491{
492 unsigned long vm_flags = vma->vm_flags;
493 pte_t pte = *src_pte;
494 struct page *page;
495
496
497 if (unlikely(!pte_present(pte))) {
498 if (!pte_file(pte)) {
499 swp_entry_t entry = pte_to_swp_entry(pte);
500
501 swap_duplicate(entry);
502
503 if (unlikely(list_empty(&dst_mm->mmlist))) {
504 spin_lock(&mmlist_lock);
505 if (list_empty(&dst_mm->mmlist))
506 list_add(&dst_mm->mmlist,
507 &src_mm->mmlist);
508 spin_unlock(&mmlist_lock);
509 }
510 if (is_write_migration_entry(entry) &&
511 is_cow_mapping(vm_flags)) {
512
513
514
515
516 make_migration_entry_read(&entry);
517 pte = swp_entry_to_pte(entry);
518 set_pte_at(src_mm, addr, src_pte, pte);
519 }
520 }
521 goto out_set_pte;
522 }
523
524
525
526
527
528 if (is_cow_mapping(vm_flags)) {
529 ptep_set_wrprotect(src_mm, addr, src_pte);
530 pte = pte_wrprotect(pte);
531 }
532
533
534
535
536
537 if (vm_flags & VM_SHARED)
538 pte = pte_mkclean(pte);
539 pte = pte_mkold(pte);
540
541 page = vm_normal_page(vma, addr, pte);
542 if (page) {
543 get_page(page);
544 page_dup_rmap(page, vma, addr);
545 rss[!!PageAnon(page)]++;
546 }
547
548out_set_pte:
549 set_pte_at(dst_mm, addr, dst_pte, pte);
550}
551
552static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
553 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
554 unsigned long addr, unsigned long end)
555{
556 pte_t *src_pte, *dst_pte;
557 spinlock_t *src_ptl, *dst_ptl;
558 int progress = 0;
559 int rss[2];
560
561again:
562 rss[1] = rss[0] = 0;
563 dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
564 if (!dst_pte)
565 return -ENOMEM;
566 src_pte = pte_offset_map_nested(src_pmd, addr);
567 src_ptl = pte_lockptr(src_mm, src_pmd);
568 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
569 arch_enter_lazy_mmu_mode();
570
571 do {
572
573
574
575
576 if (progress >= 32) {
577 progress = 0;
578 if (need_resched() ||
579 spin_needbreak(src_ptl) || spin_needbreak(dst_ptl))
580 break;
581 }
582 if (pte_none(*src_pte)) {
583 progress++;
584 continue;
585 }
586 copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
587 progress += 8;
588 } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
589
590 arch_leave_lazy_mmu_mode();
591 spin_unlock(src_ptl);
592 pte_unmap_nested(src_pte - 1);
593 add_mm_rss(dst_mm, rss[0], rss[1]);
594 pte_unmap_unlock(dst_pte - 1, dst_ptl);
595 cond_resched();
596 if (addr != end)
597 goto again;
598 return 0;
599}
600
601static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
602 pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
603 unsigned long addr, unsigned long end)
604{
605 pmd_t *src_pmd, *dst_pmd;
606 unsigned long next;
607
608 dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
609 if (!dst_pmd)
610 return -ENOMEM;
611 src_pmd = pmd_offset(src_pud, addr);
612 do {
613 next = pmd_addr_end(addr, end);
614 if (pmd_none_or_clear_bad(src_pmd))
615 continue;
616 if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
617 vma, addr, next))
618 return -ENOMEM;
619 } while (dst_pmd++, src_pmd++, addr = next, addr != end);
620 return 0;
621}
622
623static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
624 pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
625 unsigned long addr, unsigned long end)
626{
627 pud_t *src_pud, *dst_pud;
628 unsigned long next;
629
630 dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
631 if (!dst_pud)
632 return -ENOMEM;
633 src_pud = pud_offset(src_pgd, addr);
634 do {
635 next = pud_addr_end(addr, end);
636 if (pud_none_or_clear_bad(src_pud))
637 continue;
638 if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
639 vma, addr, next))
640 return -ENOMEM;
641 } while (dst_pud++, src_pud++, addr = next, addr != end);
642 return 0;
643}
644
645int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
646 struct vm_area_struct *vma)
647{
648 pgd_t *src_pgd, *dst_pgd;
649 unsigned long next;
650 unsigned long addr = vma->vm_start;
651 unsigned long end = vma->vm_end;
652
653
654
655
656
657
658
659 if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) {
660 if (!vma->anon_vma)
661 return 0;
662 }
663
664 if (is_vm_hugetlb_page(vma))
665 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
666
667 dst_pgd = pgd_offset(dst_mm, addr);
668 src_pgd = pgd_offset(src_mm, addr);
669 do {
670 next = pgd_addr_end(addr, end);
671 if (pgd_none_or_clear_bad(src_pgd))
672 continue;
673 if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
674 vma, addr, next))
675 return -ENOMEM;
676 } while (dst_pgd++, src_pgd++, addr = next, addr != end);
677 return 0;
678}
679
680static unsigned long zap_pte_range(struct mmu_gather *tlb,
681 struct vm_area_struct *vma, pmd_t *pmd,
682 unsigned long addr, unsigned long end,
683 long *zap_work, struct zap_details *details)
684{
685 struct mm_struct *mm = tlb->mm;
686 pte_t *pte;
687 spinlock_t *ptl;
688 int file_rss = 0;
689 int anon_rss = 0;
690
691 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
692 arch_enter_lazy_mmu_mode();
693 do {
694 pte_t ptent = *pte;
695 if (pte_none(ptent)) {
696 (*zap_work)--;
697 continue;
698 }
699
700 (*zap_work) -= PAGE_SIZE;
701
702 if (pte_present(ptent)) {
703 struct page *page;
704
705 page = vm_normal_page(vma, addr, ptent);
706 if (unlikely(details) && page) {
707
708
709
710
711
712 if (details->check_mapping &&
713 details->check_mapping != page->mapping)
714 continue;
715
716
717
718
719 if (details->nonlinear_vma &&
720 (page->index < details->first_index ||
721 page->index > details->last_index))
722 continue;
723 }
724 ptent = ptep_get_and_clear_full(mm, addr, pte,
725 tlb->fullmm);
726 tlb_remove_tlb_entry(tlb, pte, addr);
727 if (unlikely(!page))
728 continue;
729 if (unlikely(details) && details->nonlinear_vma
730 && linear_page_index(details->nonlinear_vma,
731 addr) != page->index)
732 set_pte_at(mm, addr, pte,
733 pgoff_to_pte(page->index));
734 if (PageAnon(page))
735 anon_rss--;
736 else {
737 if (pte_dirty(ptent))
738 set_page_dirty(page);
739 if (pte_young(ptent))
740 SetPageReferenced(page);
741 file_rss--;
742 }
743 page_remove_rmap(page, vma);
744 tlb_remove_page(tlb, page);
745 continue;
746 }
747
748
749
750
751 if (unlikely(details))
752 continue;
753 if (!pte_file(ptent))
754 free_swap_and_cache(pte_to_swp_entry(ptent));
755 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
756 } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
757
758 add_mm_rss(mm, file_rss, anon_rss);
759 arch_leave_lazy_mmu_mode();
760 pte_unmap_unlock(pte - 1, ptl);
761
762 return addr;
763}
764
765static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
766 struct vm_area_struct *vma, pud_t *pud,
767 unsigned long addr, unsigned long end,
768 long *zap_work, struct zap_details *details)
769{
770 pmd_t *pmd;
771 unsigned long next;
772
773 pmd = pmd_offset(pud, addr);
774 do {
775 next = pmd_addr_end(addr, end);
776 if (pmd_none_or_clear_bad(pmd)) {
777 (*zap_work)--;
778 continue;
779 }
780 next = zap_pte_range(tlb, vma, pmd, addr, next,
781 zap_work, details);
782 } while (pmd++, addr = next, (addr != end && *zap_work > 0));
783
784 return addr;
785}
786
787static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
788 struct vm_area_struct *vma, pgd_t *pgd,
789 unsigned long addr, unsigned long end,
790 long *zap_work, struct zap_details *details)
791{
792 pud_t *pud;
793 unsigned long next;
794
795 pud = pud_offset(pgd, addr);
796 do {
797 next = pud_addr_end(addr, end);
798 if (pud_none_or_clear_bad(pud)) {
799 (*zap_work)--;
800 continue;
801 }
802 next = zap_pmd_range(tlb, vma, pud, addr, next,
803 zap_work, details);
804 } while (pud++, addr = next, (addr != end && *zap_work > 0));
805
806 return addr;
807}
808
809static unsigned long unmap_page_range(struct mmu_gather *tlb,
810 struct vm_area_struct *vma,
811 unsigned long addr, unsigned long end,
812 long *zap_work, struct zap_details *details)
813{
814 pgd_t *pgd;
815 unsigned long next;
816
817 if (details && !details->check_mapping && !details->nonlinear_vma)
818 details = NULL;
819
820 BUG_ON(addr >= end);
821 tlb_start_vma(tlb, vma);
822 pgd = pgd_offset(vma->vm_mm, addr);
823 do {
824 next = pgd_addr_end(addr, end);
825 if (pgd_none_or_clear_bad(pgd)) {
826 (*zap_work)--;
827 continue;
828 }
829 next = zap_pud_range(tlb, vma, pgd, addr, next,
830 zap_work, details);
831 } while (pgd++, addr = next, (addr != end && *zap_work > 0));
832 tlb_end_vma(tlb, vma);
833
834 return addr;
835}
836
837#ifdef CONFIG_PREEMPT
838# define ZAP_BLOCK_SIZE (8 * PAGE_SIZE)
839#else
840
841# define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE)
842#endif
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870unsigned long unmap_vmas(struct mmu_gather **tlbp,
871 struct vm_area_struct *vma, unsigned long start_addr,
872 unsigned long end_addr, unsigned long *nr_accounted,
873 struct zap_details *details)
874{
875 long zap_work = ZAP_BLOCK_SIZE;
876 unsigned long tlb_start = 0;
877 int tlb_start_valid = 0;
878 unsigned long start = start_addr;
879 spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
880 int fullmm = (*tlbp)->fullmm;
881
882 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
883 unsigned long end;
884
885 start = max(vma->vm_start, start_addr);
886 if (start >= vma->vm_end)
887 continue;
888 end = min(vma->vm_end, end_addr);
889 if (end <= vma->vm_start)
890 continue;
891
892 if (vma->vm_flags & VM_ACCOUNT)
893 *nr_accounted += (end - start) >> PAGE_SHIFT;
894
895 while (start != end) {
896 if (!tlb_start_valid) {
897 tlb_start = start;
898 tlb_start_valid = 1;
899 }
900
901 if (unlikely(is_vm_hugetlb_page(vma))) {
902 unmap_hugepage_range(vma, start, end);
903 zap_work -= (end - start) /
904 (HPAGE_SIZE / PAGE_SIZE);
905 start = end;
906 } else
907 start = unmap_page_range(*tlbp, vma,
908 start, end, &zap_work, details);
909
910 if (zap_work > 0) {
911 BUG_ON(start != end);
912 break;
913 }
914
915 tlb_finish_mmu(*tlbp, tlb_start, start);
916
917 if (need_resched() ||
918 (i_mmap_lock && spin_needbreak(i_mmap_lock))) {
919 if (i_mmap_lock) {
920 *tlbp = NULL;
921 goto out;
922 }
923 cond_resched();
924 }
925
926 *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
927 tlb_start_valid = 0;
928 zap_work = ZAP_BLOCK_SIZE;
929 }
930 }
931out:
932 return start;
933}
934
935
936
937
938
939
940
941
942unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
943 unsigned long size, struct zap_details *details)
944{
945 struct mm_struct *mm = vma->vm_mm;
946 struct mmu_gather *tlb;
947 unsigned long end = address + size;
948 unsigned long nr_accounted = 0;
949
950 lru_add_drain();
951 tlb = tlb_gather_mmu(mm, 0);
952 update_hiwater_rss(mm);
953 end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
954 if (tlb)
955 tlb_finish_mmu(tlb, address, end);
956 return end;
957}
958
959
960
961
962struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
963 unsigned int flags)
964{
965 pgd_t *pgd;
966 pud_t *pud;
967 pmd_t *pmd;
968 pte_t *ptep, pte;
969 spinlock_t *ptl;
970 struct page *page;
971 struct mm_struct *mm = vma->vm_mm;
972
973 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
974 if (!IS_ERR(page)) {
975 BUG_ON(flags & FOLL_GET);
976 goto out;
977 }
978
979 page = NULL;
980 pgd = pgd_offset(mm, address);
981 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
982 goto no_page_table;
983
984 pud = pud_offset(pgd, address);
985 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
986 goto no_page_table;
987
988 pmd = pmd_offset(pud, address);
989 if (pmd_none(*pmd))
990 goto no_page_table;
991
992 if (pmd_huge(*pmd)) {
993 BUG_ON(flags & FOLL_GET);
994 page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
995 goto out;
996 }
997
998 if (unlikely(pmd_bad(*pmd)))
999 goto no_page_table;
1000
1001 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
1002
1003 pte = *ptep;
1004 if (!pte_present(pte))
1005 goto no_page;
1006 if ((flags & FOLL_WRITE) && !pte_write(pte))
1007 goto unlock;
1008 page = vm_normal_page(vma, address, pte);
1009 if (unlikely(!page))
1010 goto bad_page;
1011
1012 if (flags & FOLL_GET)
1013 get_page(page);
1014 if (flags & FOLL_TOUCH) {
1015 if ((flags & FOLL_WRITE) &&
1016 !pte_dirty(pte) && !PageDirty(page))
1017 set_page_dirty(page);
1018 mark_page_accessed(page);
1019 }
1020unlock:
1021 pte_unmap_unlock(ptep, ptl);
1022out:
1023 return page;
1024
1025bad_page:
1026 pte_unmap_unlock(ptep, ptl);
1027 return ERR_PTR(-EFAULT);
1028
1029no_page:
1030 pte_unmap_unlock(ptep, ptl);
1031 if (!pte_none(pte))
1032 return page;
1033
1034no_page_table:
1035
1036
1037
1038
1039 if (flags & FOLL_ANON) {
1040 page = ZERO_PAGE(0);
1041 if (flags & FOLL_GET)
1042 get_page(page);
1043 BUG_ON(flags & FOLL_WRITE);
1044 }
1045 return page;
1046}
1047
1048
1049static inline int use_zero_page(struct vm_area_struct *vma)
1050{
1051
1052
1053
1054
1055
1056
1057
1058 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
1059 return 0;
1060
1061
1062
1063
1064 return !vma->vm_ops ||
1065 (!vma->vm_ops->fault && !vma->vm_ops->nopfn);
1066}
1067
1068int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1069 unsigned long start, int len, int write, int force,
1070 struct page **pages, struct vm_area_struct **vmas)
1071{
1072 int i;
1073 unsigned int vm_flags;
1074
1075 if (len <= 0)
1076 return 0;
1077
1078
1079
1080
1081 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
1082 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
1083 i = 0;
1084
1085 do {
1086 struct vm_area_struct *vma;
1087 unsigned int foll_flags;
1088
1089 vma = find_extend_vma(mm, start);
1090 if (!vma && in_gate_area(tsk, start)) {
1091 unsigned long pg = start & PAGE_MASK;
1092 struct vm_area_struct *gate_vma = get_gate_vma(tsk);
1093 pgd_t *pgd;
1094 pud_t *pud;
1095 pmd_t *pmd;
1096 pte_t *pte;
1097 if (write)
1098 return i ? : -EFAULT;
1099 if (pg > TASK_SIZE)
1100 pgd = pgd_offset_k(pg);
1101 else
1102 pgd = pgd_offset_gate(mm, pg);
1103 BUG_ON(pgd_none(*pgd));
1104 pud = pud_offset(pgd, pg);
1105 BUG_ON(pud_none(*pud));
1106 pmd = pmd_offset(pud, pg);
1107 if (pmd_none(*pmd))
1108 return i ? : -EFAULT;
1109 pte = pte_offset_map(pmd, pg);
1110 if (pte_none(*pte)) {
1111 pte_unmap(pte);
1112 return i ? : -EFAULT;
1113 }
1114 if (pages) {
1115 struct page *page = vm_normal_page(gate_vma, start, *pte);
1116 pages[i] = page;
1117 if (page)
1118 get_page(page);
1119 }
1120 pte_unmap(pte);
1121 if (vmas)
1122 vmas[i] = gate_vma;
1123 i++;
1124 start += PAGE_SIZE;
1125 len--;
1126 continue;
1127 }
1128
1129 if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
1130 || !(vm_flags & vma->vm_flags))
1131 return i ? : -EFAULT;
1132
1133 if (is_vm_hugetlb_page(vma)) {
1134 i = follow_hugetlb_page(mm, vma, pages, vmas,
1135 &start, &len, i, write);
1136 continue;
1137 }
1138
1139 foll_flags = FOLL_TOUCH;
1140 if (pages)
1141 foll_flags |= FOLL_GET;
1142 if (!write && use_zero_page(vma))
1143 foll_flags |= FOLL_ANON;
1144
1145 do {
1146 struct page *page;
1147
1148
1149
1150
1151
1152
1153 if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE)))
1154 return i ? i : -ENOMEM;
1155
1156 if (write)
1157 foll_flags |= FOLL_WRITE;
1158
1159 cond_resched();
1160 while (!(page = follow_page(vma, start, foll_flags))) {
1161 int ret;
1162 ret = handle_mm_fault(mm, vma, start,
1163 foll_flags & FOLL_WRITE);
1164 if (ret & VM_FAULT_ERROR) {
1165 if (ret & VM_FAULT_OOM)
1166 return i ? i : -ENOMEM;
1167 else if (ret & VM_FAULT_SIGBUS)
1168 return i ? i : -EFAULT;
1169 BUG();
1170 }
1171 if (ret & VM_FAULT_MAJOR)
1172 tsk->maj_flt++;
1173 else
1174 tsk->min_flt++;
1175
1176
1177
1178
1179
1180
1181
1182
1183 if (ret & VM_FAULT_WRITE)
1184 foll_flags &= ~FOLL_WRITE;
1185
1186 cond_resched();
1187 }
1188 if (IS_ERR(page))
1189 return i ? i : PTR_ERR(page);
1190 if (pages) {
1191 pages[i] = page;
1192
1193 flush_anon_page(vma, page, start);
1194 flush_dcache_page(page);
1195 }
1196 if (vmas)
1197 vmas[i] = vma;
1198 i++;
1199 start += PAGE_SIZE;
1200 len--;
1201 } while (len && start < vma->vm_end);
1202 } while (len);
1203 return i;
1204}
1205EXPORT_SYMBOL(get_user_pages);
1206
1207pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
1208 spinlock_t **ptl)
1209{
1210 pgd_t * pgd = pgd_offset(mm, addr);
1211 pud_t * pud = pud_alloc(mm, pgd, addr);
1212 if (pud) {
1213 pmd_t * pmd = pmd_alloc(mm, pud, addr);
1214 if (pmd)
1215 return pte_alloc_map_lock(mm, pmd, addr, ptl);
1216 }
1217 return NULL;
1218}
1219
1220
1221
1222
1223
1224
1225
1226
1227static int insert_page(struct vm_area_struct *vma, unsigned long addr,
1228 struct page *page, pgprot_t prot)
1229{
1230 struct mm_struct *mm = vma->vm_mm;
1231 int retval;
1232 pte_t *pte;
1233 spinlock_t *ptl;
1234
1235 retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
1236 if (retval)
1237 goto out;
1238
1239 retval = -EINVAL;
1240 if (PageAnon(page))
1241 goto out_uncharge;
1242 retval = -ENOMEM;
1243 flush_dcache_page(page);
1244 pte = get_locked_pte(mm, addr, &ptl);
1245 if (!pte)
1246 goto out_uncharge;
1247 retval = -EBUSY;
1248 if (!pte_none(*pte))
1249 goto out_unlock;
1250
1251
1252 get_page(page);
1253 inc_mm_counter(mm, file_rss);
1254 page_add_file_rmap(page);
1255 set_pte_at(mm, addr, pte, mk_pte(page, prot));
1256
1257 retval = 0;
1258 pte_unmap_unlock(pte, ptl);
1259 return retval;
1260out_unlock:
1261 pte_unmap_unlock(pte, ptl);
1262out_uncharge:
1263 mem_cgroup_uncharge_page(page);
1264out:
1265 return retval;
1266}
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
1291 struct page *page)
1292{
1293 if (addr < vma->vm_start || addr >= vma->vm_end)
1294 return -EFAULT;
1295 if (!page_count(page))
1296 return -EINVAL;
1297 vma->vm_flags |= VM_INSERTPAGE;
1298 return insert_page(vma, addr, page, vma->vm_page_prot);
1299}
1300EXPORT_SYMBOL(vm_insert_page);
1301
1302static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1303 unsigned long pfn, pgprot_t prot)
1304{
1305 struct mm_struct *mm = vma->vm_mm;
1306 int retval;
1307 pte_t *pte, entry;
1308 spinlock_t *ptl;
1309
1310 retval = -ENOMEM;
1311 pte = get_locked_pte(mm, addr, &ptl);
1312 if (!pte)
1313 goto out;
1314 retval = -EBUSY;
1315 if (!pte_none(*pte))
1316 goto out_unlock;
1317
1318
1319 entry = pte_mkspecial(pfn_pte(pfn, prot));
1320 set_pte_at(mm, addr, pte, entry);
1321 update_mmu_cache(vma, addr, entry);
1322
1323 retval = 0;
1324out_unlock:
1325 pte_unmap_unlock(pte, ptl);
1326out:
1327 return retval;
1328}
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1343 unsigned long pfn)
1344{
1345
1346
1347
1348
1349
1350
1351 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
1352 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
1353 (VM_PFNMAP|VM_MIXEDMAP));
1354 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
1355 BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
1356
1357 if (addr < vma->vm_start || addr >= vma->vm_end)
1358 return -EFAULT;
1359 return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
1360}
1361EXPORT_SYMBOL(vm_insert_pfn);
1362
1363int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
1364 unsigned long pfn)
1365{
1366 BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
1367
1368 if (addr < vma->vm_start || addr >= vma->vm_end)
1369 return -EFAULT;
1370
1371
1372
1373
1374
1375
1376
1377 if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) {
1378 struct page *page;
1379
1380 page = pfn_to_page(pfn);
1381 return insert_page(vma, addr, page, vma->vm_page_prot);
1382 }
1383 return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
1384}
1385EXPORT_SYMBOL(vm_insert_mixed);
1386
1387
1388
1389
1390
1391
1392static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
1393 unsigned long addr, unsigned long end,
1394 unsigned long pfn, pgprot_t prot)
1395{
1396 pte_t *pte;
1397 spinlock_t *ptl;
1398
1399 pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
1400 if (!pte)
1401 return -ENOMEM;
1402 arch_enter_lazy_mmu_mode();
1403 do {
1404 BUG_ON(!pte_none(*pte));
1405 set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
1406 pfn++;
1407 } while (pte++, addr += PAGE_SIZE, addr != end);
1408 arch_leave_lazy_mmu_mode();
1409 pte_unmap_unlock(pte - 1, ptl);
1410 return 0;
1411}
1412
1413static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
1414 unsigned long addr, unsigned long end,
1415 unsigned long pfn, pgprot_t prot)
1416{
1417 pmd_t *pmd;
1418 unsigned long next;
1419
1420 pfn -= addr >> PAGE_SHIFT;
1421 pmd = pmd_alloc(mm, pud, addr);
1422 if (!pmd)
1423 return -ENOMEM;
1424 do {
1425 next = pmd_addr_end(addr, end);
1426 if (remap_pte_range(mm, pmd, addr, next,
1427 pfn + (addr >> PAGE_SHIFT), prot))
1428 return -ENOMEM;
1429 } while (pmd++, addr = next, addr != end);
1430 return 0;
1431}
1432
1433static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
1434 unsigned long addr, unsigned long end,
1435 unsigned long pfn, pgprot_t prot)
1436{
1437 pud_t *pud;
1438 unsigned long next;
1439
1440 pfn -= addr >> PAGE_SHIFT;
1441 pud = pud_alloc(mm, pgd, addr);
1442 if (!pud)
1443 return -ENOMEM;
1444 do {
1445 next = pud_addr_end(addr, end);
1446 if (remap_pmd_range(mm, pud, addr, next,
1447 pfn + (addr >> PAGE_SHIFT), prot))
1448 return -ENOMEM;
1449 } while (pud++, addr = next, addr != end);
1450 return 0;
1451}
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1464 unsigned long pfn, unsigned long size, pgprot_t prot)
1465{
1466 pgd_t *pgd;
1467 unsigned long next;
1468 unsigned long end = addr + PAGE_ALIGN(size);
1469 struct mm_struct *mm = vma->vm_mm;
1470 int err;
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490 if (is_cow_mapping(vma->vm_flags)) {
1491 if (addr != vma->vm_start || end != vma->vm_end)
1492 return -EINVAL;
1493 vma->vm_pgoff = pfn;
1494 }
1495
1496 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
1497
1498 BUG_ON(addr >= end);
1499 pfn -= addr >> PAGE_SHIFT;
1500 pgd = pgd_offset(mm, addr);
1501 flush_cache_range(vma, addr, end);
1502 do {
1503 next = pgd_addr_end(addr, end);
1504 err = remap_pud_range(mm, pgd, addr, next,
1505 pfn + (addr >> PAGE_SHIFT), prot);
1506 if (err)
1507 break;
1508 } while (pgd++, addr = next, addr != end);
1509 return err;
1510}
1511EXPORT_SYMBOL(remap_pfn_range);
1512
1513static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
1514 unsigned long addr, unsigned long end,
1515 pte_fn_t fn, void *data)
1516{
1517 pte_t *pte;
1518 int err;
1519 pgtable_t token;
1520 spinlock_t *uninitialized_var(ptl);
1521
1522 pte = (mm == &init_mm) ?
1523 pte_alloc_kernel(pmd, addr) :
1524 pte_alloc_map_lock(mm, pmd, addr, &ptl);
1525 if (!pte)
1526 return -ENOMEM;
1527
1528 BUG_ON(pmd_huge(*pmd));
1529
1530 token = pmd_pgtable(*pmd);
1531
1532 do {
1533 err = fn(pte, token, addr, data);
1534 if (err)
1535 break;
1536 } while (pte++, addr += PAGE_SIZE, addr != end);
1537
1538 if (mm != &init_mm)
1539 pte_unmap_unlock(pte-1, ptl);
1540 return err;
1541}
1542
1543static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
1544 unsigned long addr, unsigned long end,
1545 pte_fn_t fn, void *data)
1546{
1547 pmd_t *pmd;
1548 unsigned long next;
1549 int err;
1550
1551 pmd = pmd_alloc(mm, pud, addr);
1552 if (!pmd)
1553 return -ENOMEM;
1554 do {
1555 next = pmd_addr_end(addr, end);
1556 err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
1557 if (err)
1558 break;
1559 } while (pmd++, addr = next, addr != end);
1560 return err;
1561}
1562
1563static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
1564 unsigned long addr, unsigned long end,
1565 pte_fn_t fn, void *data)
1566{
1567 pud_t *pud;
1568 unsigned long next;
1569 int err;
1570
1571 pud = pud_alloc(mm, pgd, addr);
1572 if (!pud)
1573 return -ENOMEM;
1574 do {
1575 next = pud_addr_end(addr, end);
1576 err = apply_to_pmd_range(mm, pud, addr, next, fn, data);
1577 if (err)
1578 break;
1579 } while (pud++, addr = next, addr != end);
1580 return err;
1581}
1582
1583
1584
1585
1586
1587int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
1588 unsigned long size, pte_fn_t fn, void *data)
1589{
1590 pgd_t *pgd;
1591 unsigned long next;
1592 unsigned long end = addr + size;
1593 int err;
1594
1595 BUG_ON(addr >= end);
1596 pgd = pgd_offset(mm, addr);
1597 do {
1598 next = pgd_addr_end(addr, end);
1599 err = apply_to_pud_range(mm, pgd, addr, next, fn, data);
1600 if (err)
1601 break;
1602 } while (pgd++, addr = next, addr != end);
1603 return err;
1604}
1605EXPORT_SYMBOL_GPL(apply_to_page_range);
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
1617 pte_t *page_table, pte_t orig_pte)
1618{
1619 int same = 1;
1620#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
1621 if (sizeof(pte_t) > sizeof(unsigned long)) {
1622 spinlock_t *ptl = pte_lockptr(mm, pmd);
1623 spin_lock(ptl);
1624 same = pte_same(*page_table, orig_pte);
1625 spin_unlock(ptl);
1626 }
1627#endif
1628 pte_unmap(page_table);
1629 return same;
1630}
1631
1632
1633
1634
1635
1636
1637
1638static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
1639{
1640 if (likely(vma->vm_flags & VM_WRITE))
1641 pte = pte_mkwrite(pte);
1642 return pte;
1643}
1644
1645static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
1646{
1647
1648
1649
1650
1651
1652
1653 if (unlikely(!src)) {
1654 void *kaddr = kmap_atomic(dst, KM_USER0);
1655 void __user *uaddr = (void __user *)(va & PAGE_MASK);
1656
1657
1658
1659
1660
1661
1662
1663 if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
1664 memset(kaddr, 0, PAGE_SIZE);
1665 kunmap_atomic(kaddr, KM_USER0);
1666 flush_dcache_page(dst);
1667 } else
1668 copy_user_highpage(dst, src, va, vma);
1669}
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1690 unsigned long address, pte_t *page_table, pmd_t *pmd,
1691 spinlock_t *ptl, pte_t orig_pte)
1692{
1693 struct page *old_page, *new_page;
1694 pte_t entry;
1695 int reuse = 0, ret = 0;
1696 int page_mkwrite = 0;
1697 struct page *dirty_page = NULL;
1698
1699 old_page = vm_normal_page(vma, address, orig_pte);
1700 if (!old_page) {
1701
1702
1703
1704
1705
1706
1707
1708 if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
1709 (VM_WRITE|VM_SHARED))
1710 goto reuse;
1711 goto gotten;
1712 }
1713
1714
1715
1716
1717
1718 if (PageAnon(old_page)) {
1719 if (!TestSetPageLocked(old_page)) {
1720 reuse = can_share_swap_page(old_page);
1721 unlock_page(old_page);
1722 }
1723 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
1724 (VM_WRITE|VM_SHARED))) {
1725
1726
1727
1728
1729
1730 if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
1731
1732
1733
1734
1735
1736
1737
1738
1739 page_cache_get(old_page);
1740 pte_unmap_unlock(page_table, ptl);
1741
1742 if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
1743 goto unwritable_page;
1744
1745
1746
1747
1748
1749
1750
1751 page_table = pte_offset_map_lock(mm, pmd, address,
1752 &ptl);
1753 page_cache_release(old_page);
1754 if (!pte_same(*page_table, orig_pte))
1755 goto unlock;
1756
1757 page_mkwrite = 1;
1758 }
1759 dirty_page = old_page;
1760 get_page(dirty_page);
1761 reuse = 1;
1762 }
1763
1764 if (reuse) {
1765reuse:
1766 flush_cache_page(vma, address, pte_pfn(orig_pte));
1767 entry = pte_mkyoung(orig_pte);
1768 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
1769 if (ptep_set_access_flags(vma, address, page_table, entry,1))
1770 update_mmu_cache(vma, address, entry);
1771 ret |= VM_FAULT_WRITE;
1772 goto unlock;
1773 }
1774
1775
1776
1777
1778 page_cache_get(old_page);
1779gotten:
1780 pte_unmap_unlock(page_table, ptl);
1781
1782 if (unlikely(anon_vma_prepare(vma)))
1783 goto oom;
1784 VM_BUG_ON(old_page == ZERO_PAGE(0));
1785 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1786 if (!new_page)
1787 goto oom;
1788 cow_user_page(new_page, old_page, address, vma);
1789 __SetPageUptodate(new_page);
1790
1791 if (mem_cgroup_charge(new_page, mm, GFP_KERNEL))
1792 goto oom_free_new;
1793
1794
1795
1796
1797 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
1798 if (likely(pte_same(*page_table, orig_pte))) {
1799 if (old_page) {
1800 if (!PageAnon(old_page)) {
1801 dec_mm_counter(mm, file_rss);
1802 inc_mm_counter(mm, anon_rss);
1803 }
1804 } else
1805 inc_mm_counter(mm, anon_rss);
1806 flush_cache_page(vma, address, pte_pfn(orig_pte));
1807 entry = mk_pte(new_page, vma->vm_page_prot);
1808 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
1809
1810
1811
1812
1813
1814
1815 ptep_clear_flush(vma, address, page_table);
1816 set_pte_at(mm, address, page_table, entry);
1817 update_mmu_cache(vma, address, entry);
1818 lru_cache_add_active(new_page);
1819 page_add_new_anon_rmap(new_page, vma, address);
1820
1821 if (old_page) {
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844 page_remove_rmap(old_page, vma);
1845 }
1846
1847
1848 new_page = old_page;
1849 ret |= VM_FAULT_WRITE;
1850 } else
1851 mem_cgroup_uncharge_page(new_page);
1852
1853 if (new_page)
1854 page_cache_release(new_page);
1855 if (old_page)
1856 page_cache_release(old_page);
1857unlock:
1858 pte_unmap_unlock(page_table, ptl);
1859 if (dirty_page) {
1860 if (vma->vm_file)
1861 file_update_time(vma->vm_file);
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871 wait_on_page_locked(dirty_page);
1872 set_page_dirty_balance(dirty_page, page_mkwrite);
1873 put_page(dirty_page);
1874 }
1875 return ret;
1876oom_free_new:
1877 page_cache_release(new_page);
1878oom:
1879 if (old_page)
1880 page_cache_release(old_page);
1881 return VM_FAULT_OOM;
1882
1883unwritable_page:
1884 page_cache_release(old_page);
1885 return VM_FAULT_SIGBUS;
1886}
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK))
1921
1922static void reset_vma_truncate_counts(struct address_space *mapping)
1923{
1924 struct vm_area_struct *vma;
1925 struct prio_tree_iter iter;
1926
1927 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
1928 vma->vm_truncate_count = 0;
1929 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
1930 vma->vm_truncate_count = 0;
1931}
1932
1933static int unmap_mapping_range_vma(struct vm_area_struct *vma,
1934 unsigned long start_addr, unsigned long end_addr,
1935 struct zap_details *details)
1936{
1937 unsigned long restart_addr;
1938 int need_break;
1939
1940
1941
1942
1943
1944
1945
1946
1947again:
1948 restart_addr = vma->vm_truncate_count;
1949 if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
1950 start_addr = restart_addr;
1951 if (start_addr >= end_addr) {
1952
1953 vma->vm_truncate_count = details->truncate_count;
1954 return 0;
1955 }
1956 }
1957
1958 restart_addr = zap_page_range(vma, start_addr,
1959 end_addr - start_addr, details);
1960 need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
1961
1962 if (restart_addr >= end_addr) {
1963
1964 vma->vm_truncate_count = details->truncate_count;
1965 if (!need_break)
1966 return 0;
1967 } else {
1968
1969 vma->vm_truncate_count = restart_addr;
1970 if (!need_break)
1971 goto again;
1972 }
1973
1974 spin_unlock(details->i_mmap_lock);
1975 cond_resched();
1976 spin_lock(details->i_mmap_lock);
1977 return -EINTR;
1978}
1979
1980static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
1981 struct zap_details *details)
1982{
1983 struct vm_area_struct *vma;
1984 struct prio_tree_iter iter;
1985 pgoff_t vba, vea, zba, zea;
1986
1987restart:
1988 vma_prio_tree_foreach(vma, &iter, root,
1989 details->first_index, details->last_index) {
1990
1991 if (vma->vm_truncate_count == details->truncate_count)
1992 continue;
1993
1994 vba = vma->vm_pgoff;
1995 vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
1996
1997 zba = details->first_index;
1998 if (zba < vba)
1999 zba = vba;
2000 zea = details->last_index;
2001 if (zea > vea)
2002 zea = vea;
2003
2004 if (unmap_mapping_range_vma(vma,
2005 ((zba - vba) << PAGE_SHIFT) + vma->vm_start,
2006 ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,
2007 details) < 0)
2008 goto restart;
2009 }
2010}
2011
2012static inline void unmap_mapping_range_list(struct list_head *head,
2013 struct zap_details *details)
2014{
2015 struct vm_area_struct *vma;
2016
2017
2018
2019
2020
2021
2022
2023restart:
2024 list_for_each_entry(vma, head, shared.vm_set.list) {
2025
2026 if (vma->vm_truncate_count == details->truncate_count)
2027 continue;
2028 details->nonlinear_vma = vma;
2029 if (unmap_mapping_range_vma(vma, vma->vm_start,
2030 vma->vm_end, details) < 0)
2031 goto restart;
2032 }
2033}
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049void unmap_mapping_range(struct address_space *mapping,
2050 loff_t const holebegin, loff_t const holelen, int even_cows)
2051{
2052 struct zap_details details;
2053 pgoff_t hba = holebegin >> PAGE_SHIFT;
2054 pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
2055
2056
2057 if (sizeof(holelen) > sizeof(hlen)) {
2058 long long holeend =
2059 (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
2060 if (holeend & ~(long long)ULONG_MAX)
2061 hlen = ULONG_MAX - hba + 1;
2062 }
2063
2064 details.check_mapping = even_cows? NULL: mapping;
2065 details.nonlinear_vma = NULL;
2066 details.first_index = hba;
2067 details.last_index = hba + hlen - 1;
2068 if (details.last_index < details.first_index)
2069 details.last_index = ULONG_MAX;
2070 details.i_mmap_lock = &mapping->i_mmap_lock;
2071
2072 spin_lock(&mapping->i_mmap_lock);
2073
2074
2075 mapping->truncate_count++;
2076 if (unlikely(is_restart_addr(mapping->truncate_count))) {
2077 if (mapping->truncate_count == 0)
2078 reset_vma_truncate_counts(mapping);
2079 mapping->truncate_count++;
2080 }
2081 details.truncate_count = mapping->truncate_count;
2082
2083 if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
2084 unmap_mapping_range_tree(&mapping->i_mmap, &details);
2085 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
2086 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
2087 spin_unlock(&mapping->i_mmap_lock);
2088}
2089EXPORT_SYMBOL(unmap_mapping_range);
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100int vmtruncate(struct inode * inode, loff_t offset)
2101{
2102 if (inode->i_size < offset) {
2103 unsigned long limit;
2104
2105 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
2106 if (limit != RLIM_INFINITY && offset > limit)
2107 goto out_sig;
2108 if (offset > inode->i_sb->s_maxbytes)
2109 goto out_big;
2110 i_size_write(inode, offset);
2111 } else {
2112 struct address_space *mapping = inode->i_mapping;
2113
2114
2115
2116
2117
2118
2119 if (IS_SWAPFILE(inode))
2120 return -ETXTBSY;
2121 i_size_write(inode, offset);
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
2133 truncate_inode_pages(mapping, offset);
2134 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
2135 }
2136
2137 if (inode->i_op && inode->i_op->truncate)
2138 inode->i_op->truncate(inode);
2139 return 0;
2140
2141out_sig:
2142 send_sig(SIGXFSZ, current, 0);
2143out_big:
2144 return -EFBIG;
2145}
2146EXPORT_SYMBOL(vmtruncate);
2147
2148int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
2149{
2150 struct address_space *mapping = inode->i_mapping;
2151
2152
2153
2154
2155
2156
2157 if (!inode->i_op || !inode->i_op->truncate_range)
2158 return -ENOSYS;
2159
2160 mutex_lock(&inode->i_mutex);
2161 down_write(&inode->i_alloc_sem);
2162 unmap_mapping_range(mapping, offset, (end - offset), 1);
2163 truncate_inode_pages_range(mapping, offset, end);
2164 unmap_mapping_range(mapping, offset, (end - offset), 1);
2165 inode->i_op->truncate_range(inode, offset, end);
2166 up_write(&inode->i_alloc_sem);
2167 mutex_unlock(&inode->i_mutex);
2168
2169 return 0;
2170}
2171
2172
2173
2174
2175
2176
2177static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2178 unsigned long address, pte_t *page_table, pmd_t *pmd,
2179 int write_access, pte_t orig_pte)
2180{
2181 spinlock_t *ptl;
2182 struct page *page;
2183 swp_entry_t entry;
2184 pte_t pte;
2185 int ret = 0;
2186
2187 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
2188 goto out;
2189
2190 entry = pte_to_swp_entry(orig_pte);
2191 if (is_migration_entry(entry)) {
2192 migration_entry_wait(mm, pmd, address);
2193 goto out;
2194 }
2195 delayacct_set_flag(DELAYACCT_PF_SWAPIN);
2196 page = lookup_swap_cache(entry);
2197 if (!page) {
2198 grab_swap_token();
2199 page = swapin_readahead(entry,
2200 GFP_HIGHUSER_MOVABLE, vma, address);
2201 if (!page) {
2202
2203
2204
2205
2206 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2207 if (likely(pte_same(*page_table, orig_pte)))
2208 ret = VM_FAULT_OOM;
2209 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2210 goto unlock;
2211 }
2212
2213
2214 ret = VM_FAULT_MAJOR;
2215 count_vm_event(PGMAJFAULT);
2216 }
2217
2218 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
2219 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2220 ret = VM_FAULT_OOM;
2221 goto out;
2222 }
2223
2224 mark_page_accessed(page);
2225 lock_page(page);
2226 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2227
2228
2229
2230
2231 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2232 if (unlikely(!pte_same(*page_table, orig_pte)))
2233 goto out_nomap;
2234
2235 if (unlikely(!PageUptodate(page))) {
2236 ret = VM_FAULT_SIGBUS;
2237 goto out_nomap;
2238 }
2239
2240
2241
2242 inc_mm_counter(mm, anon_rss);
2243 pte = mk_pte(page, vma->vm_page_prot);
2244 if (write_access && can_share_swap_page(page)) {
2245 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
2246 write_access = 0;
2247 }
2248
2249 flush_icache_page(vma, page);
2250 set_pte_at(mm, address, page_table, pte);
2251 page_add_anon_rmap(page, vma, address);
2252
2253 swap_free(entry);
2254 if (vm_swap_full())
2255 remove_exclusive_swap_page(page);
2256 unlock_page(page);
2257
2258 if (write_access) {
2259 ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
2260 if (ret & VM_FAULT_ERROR)
2261 ret &= VM_FAULT_ERROR;
2262 goto out;
2263 }
2264
2265
2266 update_mmu_cache(vma, address, pte);
2267unlock:
2268 pte_unmap_unlock(page_table, ptl);
2269out:
2270 return ret;
2271out_nomap:
2272 mem_cgroup_uncharge_page(page);
2273 pte_unmap_unlock(page_table, ptl);
2274 unlock_page(page);
2275 page_cache_release(page);
2276 return ret;
2277}
2278
2279
2280
2281
2282
2283
2284static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2285 unsigned long address, pte_t *page_table, pmd_t *pmd,
2286 int write_access)
2287{
2288 struct page *page;
2289 spinlock_t *ptl;
2290 pte_t entry;
2291
2292
2293 pte_unmap(page_table);
2294
2295 if (unlikely(anon_vma_prepare(vma)))
2296 goto oom;
2297 page = alloc_zeroed_user_highpage_movable(vma, address);
2298 if (!page)
2299 goto oom;
2300 __SetPageUptodate(page);
2301
2302 if (mem_cgroup_charge(page, mm, GFP_KERNEL))
2303 goto oom_free_page;
2304
2305 entry = mk_pte(page, vma->vm_page_prot);
2306 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2307
2308 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2309 if (!pte_none(*page_table))
2310 goto release;
2311 inc_mm_counter(mm, anon_rss);
2312 lru_cache_add_active(page);
2313 page_add_new_anon_rmap(page, vma, address);
2314 set_pte_at(mm, address, page_table, entry);
2315
2316
2317 update_mmu_cache(vma, address, entry);
2318unlock:
2319 pte_unmap_unlock(page_table, ptl);
2320 return 0;
2321release:
2322 mem_cgroup_uncharge_page(page);
2323 page_cache_release(page);
2324 goto unlock;
2325oom_free_page:
2326 page_cache_release(page);
2327oom:
2328 return VM_FAULT_OOM;
2329}
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2345 unsigned long address, pmd_t *pmd,
2346 pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
2347{
2348 pte_t *page_table;
2349 spinlock_t *ptl;
2350 struct page *page;
2351 pte_t entry;
2352 int anon = 0;
2353 struct page *dirty_page = NULL;
2354 struct vm_fault vmf;
2355 int ret;
2356 int page_mkwrite = 0;
2357
2358 vmf.virtual_address = (void __user *)(address & PAGE_MASK);
2359 vmf.pgoff = pgoff;
2360 vmf.flags = flags;
2361 vmf.page = NULL;
2362
2363 ret = vma->vm_ops->fault(vma, &vmf);
2364 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
2365 return ret;
2366
2367
2368
2369
2370
2371 if (unlikely(!(ret & VM_FAULT_LOCKED)))
2372 lock_page(vmf.page);
2373 else
2374 VM_BUG_ON(!PageLocked(vmf.page));
2375
2376
2377
2378
2379 page = vmf.page;
2380 if (flags & FAULT_FLAG_WRITE) {
2381 if (!(vma->vm_flags & VM_SHARED)) {
2382 anon = 1;
2383 if (unlikely(anon_vma_prepare(vma))) {
2384 ret = VM_FAULT_OOM;
2385 goto out;
2386 }
2387 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE,
2388 vma, address);
2389 if (!page) {
2390 ret = VM_FAULT_OOM;
2391 goto out;
2392 }
2393 copy_user_highpage(page, vmf.page, address, vma);
2394 __SetPageUptodate(page);
2395 } else {
2396
2397
2398
2399
2400
2401 if (vma->vm_ops->page_mkwrite) {
2402 unlock_page(page);
2403 if (vma->vm_ops->page_mkwrite(vma, page) < 0) {
2404 ret = VM_FAULT_SIGBUS;
2405 anon = 1;
2406 goto out_unlocked;
2407 }
2408 lock_page(page);
2409
2410
2411
2412
2413
2414
2415
2416 if (!page->mapping) {
2417 ret = 0;
2418 anon = 1;
2419 goto out;
2420 }
2421 page_mkwrite = 1;
2422 }
2423 }
2424
2425 }
2426
2427 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
2428 ret = VM_FAULT_OOM;
2429 goto out;
2430 }
2431
2432 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445 if (likely(pte_same(*page_table, orig_pte))) {
2446 flush_icache_page(vma, page);
2447 entry = mk_pte(page, vma->vm_page_prot);
2448 if (flags & FAULT_FLAG_WRITE)
2449 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2450 set_pte_at(mm, address, page_table, entry);
2451 if (anon) {
2452 inc_mm_counter(mm, anon_rss);
2453 lru_cache_add_active(page);
2454 page_add_new_anon_rmap(page, vma, address);
2455 } else {
2456 inc_mm_counter(mm, file_rss);
2457 page_add_file_rmap(page);
2458 if (flags & FAULT_FLAG_WRITE) {
2459 dirty_page = page;
2460 get_page(dirty_page);
2461 }
2462 }
2463
2464
2465 update_mmu_cache(vma, address, entry);
2466 } else {
2467 mem_cgroup_uncharge_page(page);
2468 if (anon)
2469 page_cache_release(page);
2470 else
2471 anon = 1;
2472 }
2473
2474 pte_unmap_unlock(page_table, ptl);
2475
2476out:
2477 unlock_page(vmf.page);
2478out_unlocked:
2479 if (anon)
2480 page_cache_release(vmf.page);
2481 else if (dirty_page) {
2482 if (vma->vm_file)
2483 file_update_time(vma->vm_file);
2484
2485 set_page_dirty_balance(dirty_page, page_mkwrite);
2486 put_page(dirty_page);
2487 }
2488
2489 return ret;
2490}
2491
2492static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2493 unsigned long address, pte_t *page_table, pmd_t *pmd,
2494 int write_access, pte_t orig_pte)
2495{
2496 pgoff_t pgoff = (((address & PAGE_MASK)
2497 - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
2498 unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
2499
2500 pte_unmap(page_table);
2501 return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
2502}
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
2522 unsigned long address, pte_t *page_table, pmd_t *pmd,
2523 int write_access)
2524{
2525 spinlock_t *ptl;
2526 pte_t entry;
2527 unsigned long pfn;
2528
2529 pte_unmap(page_table);
2530 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
2531 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
2532
2533 pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK);
2534
2535 BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
2536
2537 if (unlikely(pfn == NOPFN_OOM))
2538 return VM_FAULT_OOM;
2539 else if (unlikely(pfn == NOPFN_SIGBUS))
2540 return VM_FAULT_SIGBUS;
2541 else if (unlikely(pfn == NOPFN_REFAULT))
2542 return 0;
2543
2544 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2545
2546
2547 if (pte_none(*page_table)) {
2548 entry = pfn_pte(pfn, vma->vm_page_prot);
2549 if (write_access)
2550 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2551 set_pte_at(mm, address, page_table, entry);
2552 }
2553 pte_unmap_unlock(page_table, ptl);
2554 return 0;
2555}
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2567 unsigned long address, pte_t *page_table, pmd_t *pmd,
2568 int write_access, pte_t orig_pte)
2569{
2570 unsigned int flags = FAULT_FLAG_NONLINEAR |
2571 (write_access ? FAULT_FLAG_WRITE : 0);
2572 pgoff_t pgoff;
2573
2574 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
2575 return 0;
2576
2577 if (unlikely(!(vma->vm_flags & VM_NONLINEAR) ||
2578 !(vma->vm_flags & VM_CAN_NONLINEAR))) {
2579
2580
2581
2582 print_bad_pte(vma, orig_pte, address);
2583 return VM_FAULT_OOM;
2584 }
2585
2586 pgoff = pte_to_pgoff(orig_pte);
2587 return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
2588}
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603static inline int handle_pte_fault(struct mm_struct *mm,
2604 struct vm_area_struct *vma, unsigned long address,
2605 pte_t *pte, pmd_t *pmd, int write_access)
2606{
2607 pte_t entry;
2608 spinlock_t *ptl;
2609
2610 entry = *pte;
2611 if (!pte_present(entry)) {
2612 if (pte_none(entry)) {
2613 if (vma->vm_ops) {
2614 if (likely(vma->vm_ops->fault))
2615 return do_linear_fault(mm, vma, address,
2616 pte, pmd, write_access, entry);
2617 if (unlikely(vma->vm_ops->nopfn))
2618 return do_no_pfn(mm, vma, address, pte,
2619 pmd, write_access);
2620 }
2621 return do_anonymous_page(mm, vma, address,
2622 pte, pmd, write_access);
2623 }
2624 if (pte_file(entry))
2625 return do_nonlinear_fault(mm, vma, address,
2626 pte, pmd, write_access, entry);
2627 return do_swap_page(mm, vma, address,
2628 pte, pmd, write_access, entry);
2629 }
2630
2631 ptl = pte_lockptr(mm, pmd);
2632 spin_lock(ptl);
2633 if (unlikely(!pte_same(*pte, entry)))
2634 goto unlock;
2635 if (write_access) {
2636 if (!pte_write(entry))
2637 return do_wp_page(mm, vma, address,
2638 pte, pmd, ptl, entry);
2639 entry = pte_mkdirty(entry);
2640 }
2641 entry = pte_mkyoung(entry);
2642 if (ptep_set_access_flags(vma, address, pte, entry, write_access)) {
2643 update_mmu_cache(vma, address, entry);
2644 } else {
2645
2646
2647
2648
2649
2650
2651 if (write_access)
2652 flush_tlb_page(vma, address);
2653 }
2654unlock:
2655 pte_unmap_unlock(pte, ptl);
2656 return 0;
2657}
2658
2659
2660
2661
2662int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2663 unsigned long address, int write_access)
2664{
2665 pgd_t *pgd;
2666 pud_t *pud;
2667 pmd_t *pmd;
2668 pte_t *pte;
2669
2670 __set_current_state(TASK_RUNNING);
2671
2672 count_vm_event(PGFAULT);
2673
2674 if (unlikely(is_vm_hugetlb_page(vma)))
2675 return hugetlb_fault(mm, vma, address, write_access);
2676
2677 pgd = pgd_offset(mm, address);
2678 pud = pud_alloc(mm, pgd, address);
2679 if (!pud)
2680 return VM_FAULT_OOM;
2681 pmd = pmd_alloc(mm, pud, address);
2682 if (!pmd)
2683 return VM_FAULT_OOM;
2684 pte = pte_alloc_map(mm, pmd, address);
2685 if (!pte)
2686 return VM_FAULT_OOM;
2687
2688 return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
2689}
2690
2691#ifndef __PAGETABLE_PUD_FOLDED
2692
2693
2694
2695
2696int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
2697{
2698 pud_t *new = pud_alloc_one(mm, address);
2699 if (!new)
2700 return -ENOMEM;
2701
2702 smp_wmb();
2703
2704 spin_lock(&mm->page_table_lock);
2705 if (pgd_present(*pgd))
2706 pud_free(mm, new);
2707 else
2708 pgd_populate(mm, pgd, new);
2709 spin_unlock(&mm->page_table_lock);
2710 return 0;
2711}
2712#endif
2713
2714#ifndef __PAGETABLE_PMD_FOLDED
2715
2716
2717
2718
2719int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
2720{
2721 pmd_t *new = pmd_alloc_one(mm, address);
2722 if (!new)
2723 return -ENOMEM;
2724
2725 smp_wmb();
2726
2727 spin_lock(&mm->page_table_lock);
2728#ifndef __ARCH_HAS_4LEVEL_HACK
2729 if (pud_present(*pud))
2730 pmd_free(mm, new);
2731 else
2732 pud_populate(mm, pud, new);
2733#else
2734 if (pgd_present(*pud))
2735 pmd_free(mm, new);
2736 else
2737 pgd_populate(mm, pud, new);
2738#endif
2739 spin_unlock(&mm->page_table_lock);
2740 return 0;
2741}
2742#endif
2743
2744int make_pages_present(unsigned long addr, unsigned long end)
2745{
2746 int ret, len, write;
2747 struct vm_area_struct * vma;
2748
2749 vma = find_vma(current->mm, addr);
2750 if (!vma)
2751 return -ENOMEM;
2752 write = (vma->vm_flags & VM_WRITE) != 0;
2753 BUG_ON(addr >= end);
2754 BUG_ON(end > vma->vm_end);
2755 len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
2756 ret = get_user_pages(current, current->mm, addr,
2757 len, write, 0, NULL, NULL);
2758 if (ret < 0) {
2759
2760
2761
2762
2763
2764 if (ret == -EFAULT)
2765 ret = -ENOMEM;
2766 else if (ret == -ENOMEM)
2767 ret = -EAGAIN;
2768 return ret;
2769 }
2770 return ret == len ? 0 : -ENOMEM;
2771}
2772
2773#if !defined(__HAVE_ARCH_GATE_AREA)
2774
2775#if defined(AT_SYSINFO_EHDR)
2776static struct vm_area_struct gate_vma;
2777
2778static int __init gate_vma_init(void)
2779{
2780 gate_vma.vm_mm = NULL;
2781 gate_vma.vm_start = FIXADDR_USER_START;
2782 gate_vma.vm_end = FIXADDR_USER_END;
2783 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
2784 gate_vma.vm_page_prot = __P101;
2785
2786
2787
2788
2789
2790
2791 gate_vma.vm_flags |= VM_ALWAYSDUMP;
2792 return 0;
2793}
2794__initcall(gate_vma_init);
2795#endif
2796
2797struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
2798{
2799#ifdef AT_SYSINFO_EHDR
2800 return &gate_vma;
2801#else
2802 return NULL;
2803#endif
2804}
2805
2806int in_gate_area_no_task(unsigned long addr)
2807{
2808#ifdef AT_SYSINFO_EHDR
2809 if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
2810 return 1;
2811#endif
2812 return 0;
2813}
2814
2815#endif
2816
2817
2818
2819
2820
2821
2822int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
2823{
2824 struct mm_struct *mm;
2825 struct vm_area_struct *vma;
2826 struct page *page;
2827 void *old_buf = buf;
2828
2829 mm = get_task_mm(tsk);
2830 if (!mm)
2831 return 0;
2832
2833 down_read(&mm->mmap_sem);
2834
2835 while (len) {
2836 int bytes, ret, offset;
2837 void *maddr;
2838
2839 ret = get_user_pages(tsk, mm, addr, 1,
2840 write, 1, &page, &vma);
2841 if (ret <= 0)
2842 break;
2843
2844 bytes = len;
2845 offset = addr & (PAGE_SIZE-1);
2846 if (bytes > PAGE_SIZE-offset)
2847 bytes = PAGE_SIZE-offset;
2848
2849 maddr = kmap(page);
2850 if (write) {
2851 copy_to_user_page(vma, page, addr,
2852 maddr + offset, buf, bytes);
2853 set_page_dirty_lock(page);
2854 } else {
2855 copy_from_user_page(vma, page, addr,
2856 buf, maddr + offset, bytes);
2857 }
2858 kunmap(page);
2859 page_cache_release(page);
2860 len -= bytes;
2861 buf += bytes;
2862 addr += bytes;
2863 }
2864 up_read(&mm->mmap_sem);
2865 mmput(mm);
2866
2867 return buf - old_buf;
2868}
2869
2870
2871
2872
2873void print_vma_addr(char *prefix, unsigned long ip)
2874{
2875 struct mm_struct *mm = current->mm;
2876 struct vm_area_struct *vma;
2877
2878
2879
2880
2881
2882 if (preempt_count())
2883 return;
2884
2885 down_read(&mm->mmap_sem);
2886 vma = find_vma(mm, ip);
2887 if (vma && vma->vm_file) {
2888 struct file *f = vma->vm_file;
2889 char *buf = (char *)__get_free_page(GFP_KERNEL);
2890 if (buf) {
2891 char *p, *s;
2892
2893 p = d_path(&f->f_path, buf, PAGE_SIZE);
2894 if (IS_ERR(p))
2895 p = "?";
2896 s = strrchr(p, '/');
2897 if (s)
2898 p = s+1;
2899 printk("%s%s[%lx+%lx]", prefix, p,
2900 vma->vm_start,
2901 vma->vm_end - vma->vm_start);
2902 free_page((unsigned long)buf);
2903 }
2904 }
2905 up_read(¤t->mm->mmap_sem);
2906}
2907