1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41#include <linux/kernel_stat.h>
42#include <linux/mm.h>
43#include <linux/hugetlb.h>
44#include <linux/mman.h>
45#include <linux/swap.h>
46#include <linux/highmem.h>
47#include <linux/pagemap.h>
48#include <linux/ksm.h>
49#include <linux/rmap.h>
50#include <linux/module.h>
51#include <linux/delayacct.h>
52#include <linux/init.h>
53#include <linux/writeback.h>
54#include <linux/memcontrol.h>
55#include <linux/mmu_notifier.h>
56#include <linux/kallsyms.h>
57#include <linux/swapops.h>
58#include <linux/elf.h>
59#include <linux/gfp.h>
60
61#include <asm/io.h>
62#include <asm/pgalloc.h>
63#include <asm/uaccess.h>
64#include <asm/tlb.h>
65#include <asm/tlbflush.h>
66#include <asm/pgtable.h>
67
68#include "internal.h"
69
70#ifndef CONFIG_NEED_MULTIPLE_NODES
71
72unsigned long max_mapnr;
73struct page *mem_map;
74
75EXPORT_SYMBOL(max_mapnr);
76EXPORT_SYMBOL(mem_map);
77#endif
78
79unsigned long num_physpages;
80
81
82
83
84
85
86
87void * high_memory;
88
89EXPORT_SYMBOL(num_physpages);
90EXPORT_SYMBOL(high_memory);
91
92
93
94
95
96
97
98int randomize_va_space __read_mostly =
99#ifdef CONFIG_COMPAT_BRK
100 1;
101#else
102 2;
103#endif
104
105static int __init disable_randmaps(char *s)
106{
107 randomize_va_space = 0;
108 return 1;
109}
110__setup("norandmaps", disable_randmaps);
111
112unsigned long zero_pfn __read_mostly;
113unsigned long highest_memmap_pfn __read_mostly;
114
115
116
117
118static int __init init_zero_pfn(void)
119{
120 zero_pfn = page_to_pfn(ZERO_PAGE(0));
121 return 0;
122}
123core_initcall(init_zero_pfn);
124
125
126#if defined(SPLIT_RSS_COUNTING)
127
128static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
129{
130 int i;
131
132 for (i = 0; i < NR_MM_COUNTERS; i++) {
133 if (task->rss_stat.count[i]) {
134 add_mm_counter(mm, i, task->rss_stat.count[i]);
135 task->rss_stat.count[i] = 0;
136 }
137 }
138 task->rss_stat.events = 0;
139}
140
141static void add_mm_counter_fast(struct mm_struct *mm, int member, int val)
142{
143 struct task_struct *task = current;
144
145 if (likely(task->mm == mm))
146 task->rss_stat.count[member] += val;
147 else
148 add_mm_counter(mm, member, val);
149}
150#define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1)
151#define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
152
153
154#define TASK_RSS_EVENTS_THRESH (64)
155static void check_sync_rss_stat(struct task_struct *task)
156{
157 if (unlikely(task != current))
158 return;
159 if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
160 __sync_task_rss_stat(task, task->mm);
161}
162
163unsigned long get_mm_counter(struct mm_struct *mm, int member)
164{
165 long val = 0;
166
167
168
169
170
171 val = atomic_long_read(&mm->rss_stat.count[member]);
172
173
174
175
176 if (val < 0)
177 return 0;
178 return (unsigned long)val;
179}
180
181void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
182{
183 __sync_task_rss_stat(task, mm);
184}
185#else
186
187#define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member)
188#define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member)
189
190static void check_sync_rss_stat(struct task_struct *task)
191{
192}
193
194#endif
195
196
197
198
199
200
201
202void pgd_clear_bad(pgd_t *pgd)
203{
204 pgd_ERROR(*pgd);
205 pgd_clear(pgd);
206}
207
208void pud_clear_bad(pud_t *pud)
209{
210 pud_ERROR(*pud);
211 pud_clear(pud);
212}
213
214void pmd_clear_bad(pmd_t *pmd)
215{
216 pmd_ERROR(*pmd);
217 pmd_clear(pmd);
218}
219
220
221
222
223
224static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
225 unsigned long addr)
226{
227 pgtable_t token = pmd_pgtable(*pmd);
228 pmd_clear(pmd);
229 pte_free_tlb(tlb, token, addr);
230 tlb->mm->nr_ptes--;
231}
232
233static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
234 unsigned long addr, unsigned long end,
235 unsigned long floor, unsigned long ceiling)
236{
237 pmd_t *pmd;
238 unsigned long next;
239 unsigned long start;
240
241 start = addr;
242 pmd = pmd_offset(pud, addr);
243 do {
244 next = pmd_addr_end(addr, end);
245 if (pmd_none_or_clear_bad(pmd))
246 continue;
247 free_pte_range(tlb, pmd, addr);
248 } while (pmd++, addr = next, addr != end);
249
250 start &= PUD_MASK;
251 if (start < floor)
252 return;
253 if (ceiling) {
254 ceiling &= PUD_MASK;
255 if (!ceiling)
256 return;
257 }
258 if (end - 1 > ceiling - 1)
259 return;
260
261 pmd = pmd_offset(pud, start);
262 pud_clear(pud);
263 pmd_free_tlb(tlb, pmd, start);
264}
265
266static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
267 unsigned long addr, unsigned long end,
268 unsigned long floor, unsigned long ceiling)
269{
270 pud_t *pud;
271 unsigned long next;
272 unsigned long start;
273
274 start = addr;
275 pud = pud_offset(pgd, addr);
276 do {
277 next = pud_addr_end(addr, end);
278 if (pud_none_or_clear_bad(pud))
279 continue;
280 free_pmd_range(tlb, pud, addr, next, floor, ceiling);
281 } while (pud++, addr = next, addr != end);
282
283 start &= PGDIR_MASK;
284 if (start < floor)
285 return;
286 if (ceiling) {
287 ceiling &= PGDIR_MASK;
288 if (!ceiling)
289 return;
290 }
291 if (end - 1 > ceiling - 1)
292 return;
293
294 pud = pud_offset(pgd, start);
295 pgd_clear(pgd);
296 pud_free_tlb(tlb, pud, start);
297}
298
299
300
301
302
303
304void free_pgd_range(struct mmu_gather *tlb,
305 unsigned long addr, unsigned long end,
306 unsigned long floor, unsigned long ceiling)
307{
308 pgd_t *pgd;
309 unsigned long next;
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337 addr &= PMD_MASK;
338 if (addr < floor) {
339 addr += PMD_SIZE;
340 if (!addr)
341 return;
342 }
343 if (ceiling) {
344 ceiling &= PMD_MASK;
345 if (!ceiling)
346 return;
347 }
348 if (end - 1 > ceiling - 1)
349 end -= PMD_SIZE;
350 if (addr > end - 1)
351 return;
352
353 pgd = pgd_offset(tlb->mm, addr);
354 do {
355 next = pgd_addr_end(addr, end);
356 if (pgd_none_or_clear_bad(pgd))
357 continue;
358 free_pud_range(tlb, pgd, addr, next, floor, ceiling);
359 } while (pgd++, addr = next, addr != end);
360}
361
362void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
363 unsigned long floor, unsigned long ceiling)
364{
365 while (vma) {
366 struct vm_area_struct *next = vma->vm_next;
367 unsigned long addr = vma->vm_start;
368
369
370
371
372
373 unlink_anon_vmas(vma);
374 unlink_file_vma(vma);
375
376 if (is_vm_hugetlb_page(vma)) {
377 hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
378 floor, next? next->vm_start: ceiling);
379 } else {
380
381
382
383 while (next && next->vm_start <= vma->vm_end + PMD_SIZE
384 && !is_vm_hugetlb_page(next)) {
385 vma = next;
386 next = vma->vm_next;
387 unlink_anon_vmas(vma);
388 unlink_file_vma(vma);
389 }
390 free_pgd_range(tlb, addr, vma->vm_end,
391 floor, next? next->vm_start: ceiling);
392 }
393 vma = next;
394 }
395}
396
397int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
398{
399 pgtable_t new = pte_alloc_one(mm, address);
400 if (!new)
401 return -ENOMEM;
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416 smp_wmb();
417
418 spin_lock(&mm->page_table_lock);
419 if (!pmd_present(*pmd)) {
420 mm->nr_ptes++;
421 pmd_populate(mm, pmd, new);
422 new = NULL;
423 }
424 spin_unlock(&mm->page_table_lock);
425 if (new)
426 pte_free(mm, new);
427 return 0;
428}
429
430int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
431{
432 pte_t *new = pte_alloc_one_kernel(&init_mm, address);
433 if (!new)
434 return -ENOMEM;
435
436 smp_wmb();
437
438 spin_lock(&init_mm.page_table_lock);
439 if (!pmd_present(*pmd)) {
440 pmd_populate_kernel(&init_mm, pmd, new);
441 new = NULL;
442 }
443 spin_unlock(&init_mm.page_table_lock);
444 if (new)
445 pte_free_kernel(&init_mm, new);
446 return 0;
447}
448
449static inline void init_rss_vec(int *rss)
450{
451 memset(rss, 0, sizeof(int) * NR_MM_COUNTERS);
452}
453
454static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
455{
456 int i;
457
458 if (current->mm == mm)
459 sync_mm_rss(current, mm);
460 for (i = 0; i < NR_MM_COUNTERS; i++)
461 if (rss[i])
462 add_mm_counter(mm, i, rss[i]);
463}
464
465
466
467
468
469
470
471
472static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
473 pte_t pte, struct page *page)
474{
475 pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
476 pud_t *pud = pud_offset(pgd, addr);
477 pmd_t *pmd = pmd_offset(pud, addr);
478 struct address_space *mapping;
479 pgoff_t index;
480 static unsigned long resume;
481 static unsigned long nr_shown;
482 static unsigned long nr_unshown;
483
484
485
486
487
488 if (nr_shown == 60) {
489 if (time_before(jiffies, resume)) {
490 nr_unshown++;
491 return;
492 }
493 if (nr_unshown) {
494 printk(KERN_ALERT
495 "BUG: Bad page map: %lu messages suppressed\n",
496 nr_unshown);
497 nr_unshown = 0;
498 }
499 nr_shown = 0;
500 }
501 if (nr_shown++ == 0)
502 resume = jiffies + 60 * HZ;
503
504 mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL;
505 index = linear_page_index(vma, addr);
506
507 printk(KERN_ALERT
508 "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n",
509 current->comm,
510 (long long)pte_val(pte), (long long)pmd_val(*pmd));
511 if (page)
512 dump_page(page);
513 printk(KERN_ALERT
514 "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
515 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
516
517
518
519 if (vma->vm_ops)
520 print_symbol(KERN_ALERT "vma->vm_ops->fault: %s\n",
521 (unsigned long)vma->vm_ops->fault);
522 if (vma->vm_file && vma->vm_file->f_op)
523 print_symbol(KERN_ALERT "vma->vm_file->f_op->mmap: %s\n",
524 (unsigned long)vma->vm_file->f_op->mmap);
525 dump_stack();
526 add_taint(TAINT_BAD_PAGE);
527}
528
529static inline int is_cow_mapping(unsigned int flags)
530{
531 return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
532}
533
534#ifndef is_zero_pfn
535static inline int is_zero_pfn(unsigned long pfn)
536{
537 return pfn == zero_pfn;
538}
539#endif
540
541#ifndef my_zero_pfn
542static inline unsigned long my_zero_pfn(unsigned long addr)
543{
544 return zero_pfn;
545}
546#endif
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590#ifdef __HAVE_ARCH_PTE_SPECIAL
591# define HAVE_PTE_SPECIAL 1
592#else
593# define HAVE_PTE_SPECIAL 0
594#endif
595struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
596 pte_t pte)
597{
598 unsigned long pfn = pte_pfn(pte);
599
600 if (HAVE_PTE_SPECIAL) {
601 if (likely(!pte_special(pte)))
602 goto check_pfn;
603 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
604 return NULL;
605 if (!is_zero_pfn(pfn))
606 print_bad_pte(vma, addr, pte, NULL);
607 return NULL;
608 }
609
610
611
612 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
613 if (vma->vm_flags & VM_MIXEDMAP) {
614 if (!pfn_valid(pfn))
615 return NULL;
616 goto out;
617 } else {
618 unsigned long off;
619 off = (addr - vma->vm_start) >> PAGE_SHIFT;
620 if (pfn == vma->vm_pgoff + off)
621 return NULL;
622 if (!is_cow_mapping(vma->vm_flags))
623 return NULL;
624 }
625 }
626
627 if (is_zero_pfn(pfn))
628 return NULL;
629check_pfn:
630 if (unlikely(pfn > highest_memmap_pfn)) {
631 print_bad_pte(vma, addr, pte, NULL);
632 return NULL;
633 }
634
635
636
637
638
639out:
640 return pfn_to_page(pfn);
641}
642
643
644
645
646
647
648
649static inline unsigned long
650copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
651 pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
652 unsigned long addr, int *rss)
653{
654 unsigned long vm_flags = vma->vm_flags;
655 pte_t pte = *src_pte;
656 struct page *page;
657
658
659 if (unlikely(!pte_present(pte))) {
660 if (!pte_file(pte)) {
661 swp_entry_t entry = pte_to_swp_entry(pte);
662
663 if (swap_duplicate(entry) < 0)
664 return entry.val;
665
666
667 if (unlikely(list_empty(&dst_mm->mmlist))) {
668 spin_lock(&mmlist_lock);
669 if (list_empty(&dst_mm->mmlist))
670 list_add(&dst_mm->mmlist,
671 &src_mm->mmlist);
672 spin_unlock(&mmlist_lock);
673 }
674 if (likely(!non_swap_entry(entry)))
675 rss[MM_SWAPENTS]++;
676 else if (is_write_migration_entry(entry) &&
677 is_cow_mapping(vm_flags)) {
678
679
680
681
682 make_migration_entry_read(&entry);
683 pte = swp_entry_to_pte(entry);
684 set_pte_at(src_mm, addr, src_pte, pte);
685 }
686 }
687 goto out_set_pte;
688 }
689
690
691
692
693
694 if (is_cow_mapping(vm_flags)) {
695 ptep_set_wrprotect(src_mm, addr, src_pte);
696 pte = pte_wrprotect(pte);
697 }
698
699
700
701
702
703 if (vm_flags & VM_SHARED)
704 pte = pte_mkclean(pte);
705 pte = pte_mkold(pte);
706
707 page = vm_normal_page(vma, addr, pte);
708 if (page) {
709 get_page(page);
710 page_dup_rmap(page);
711 if (PageAnon(page))
712 rss[MM_ANONPAGES]++;
713 else
714 rss[MM_FILEPAGES]++;
715 }
716
717out_set_pte:
718 set_pte_at(dst_mm, addr, dst_pte, pte);
719 return 0;
720}
721
722static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
723 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
724 unsigned long addr, unsigned long end)
725{
726 pte_t *orig_src_pte, *orig_dst_pte;
727 pte_t *src_pte, *dst_pte;
728 spinlock_t *src_ptl, *dst_ptl;
729 int progress = 0;
730 int rss[NR_MM_COUNTERS];
731 swp_entry_t entry = (swp_entry_t){0};
732
733again:
734 init_rss_vec(rss);
735
736 dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
737 if (!dst_pte)
738 return -ENOMEM;
739 src_pte = pte_offset_map(src_pmd, addr);
740 src_ptl = pte_lockptr(src_mm, src_pmd);
741 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
742 orig_src_pte = src_pte;
743 orig_dst_pte = dst_pte;
744 arch_enter_lazy_mmu_mode();
745
746 do {
747
748
749
750
751 if (progress >= 32) {
752 progress = 0;
753 if (need_resched() ||
754 spin_needbreak(src_ptl) || spin_needbreak(dst_ptl))
755 break;
756 }
757 if (pte_none(*src_pte)) {
758 progress++;
759 continue;
760 }
761 entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
762 vma, addr, rss);
763 if (entry.val)
764 break;
765 progress += 8;
766 } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
767
768 arch_leave_lazy_mmu_mode();
769 spin_unlock(src_ptl);
770 pte_unmap(orig_src_pte);
771 add_mm_rss_vec(dst_mm, rss);
772 pte_unmap_unlock(orig_dst_pte, dst_ptl);
773 cond_resched();
774
775 if (entry.val) {
776 if (add_swap_count_continuation(entry, GFP_KERNEL) < 0)
777 return -ENOMEM;
778 progress = 0;
779 }
780 if (addr != end)
781 goto again;
782 return 0;
783}
784
785static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
786 pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
787 unsigned long addr, unsigned long end)
788{
789 pmd_t *src_pmd, *dst_pmd;
790 unsigned long next;
791
792 dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
793 if (!dst_pmd)
794 return -ENOMEM;
795 src_pmd = pmd_offset(src_pud, addr);
796 do {
797 next = pmd_addr_end(addr, end);
798 if (pmd_none_or_clear_bad(src_pmd))
799 continue;
800 if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
801 vma, addr, next))
802 return -ENOMEM;
803 } while (dst_pmd++, src_pmd++, addr = next, addr != end);
804 return 0;
805}
806
807static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
808 pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
809 unsigned long addr, unsigned long end)
810{
811 pud_t *src_pud, *dst_pud;
812 unsigned long next;
813
814 dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
815 if (!dst_pud)
816 return -ENOMEM;
817 src_pud = pud_offset(src_pgd, addr);
818 do {
819 next = pud_addr_end(addr, end);
820 if (pud_none_or_clear_bad(src_pud))
821 continue;
822 if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
823 vma, addr, next))
824 return -ENOMEM;
825 } while (dst_pud++, src_pud++, addr = next, addr != end);
826 return 0;
827}
828
829int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
830 struct vm_area_struct *vma)
831{
832 pgd_t *src_pgd, *dst_pgd;
833 unsigned long next;
834 unsigned long addr = vma->vm_start;
835 unsigned long end = vma->vm_end;
836 int ret;
837
838
839
840
841
842
843
844 if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) {
845 if (!vma->anon_vma)
846 return 0;
847 }
848
849 if (is_vm_hugetlb_page(vma))
850 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
851
852 if (unlikely(is_pfn_mapping(vma))) {
853
854
855
856
857 ret = track_pfn_vma_copy(vma);
858 if (ret)
859 return ret;
860 }
861
862
863
864
865
866
867
868 if (is_cow_mapping(vma->vm_flags))
869 mmu_notifier_invalidate_range_start(src_mm, addr, end);
870
871 ret = 0;
872 dst_pgd = pgd_offset(dst_mm, addr);
873 src_pgd = pgd_offset(src_mm, addr);
874 do {
875 next = pgd_addr_end(addr, end);
876 if (pgd_none_or_clear_bad(src_pgd))
877 continue;
878 if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
879 vma, addr, next))) {
880 ret = -ENOMEM;
881 break;
882 }
883 } while (dst_pgd++, src_pgd++, addr = next, addr != end);
884
885 if (is_cow_mapping(vma->vm_flags))
886 mmu_notifier_invalidate_range_end(src_mm,
887 vma->vm_start, end);
888 return ret;
889}
890
891static unsigned long zap_pte_range(struct mmu_gather *tlb,
892 struct vm_area_struct *vma, pmd_t *pmd,
893 unsigned long addr, unsigned long end,
894 long *zap_work, struct zap_details *details)
895{
896 struct mm_struct *mm = tlb->mm;
897 pte_t *pte;
898 spinlock_t *ptl;
899 int rss[NR_MM_COUNTERS];
900
901 init_rss_vec(rss);
902
903 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
904 arch_enter_lazy_mmu_mode();
905 do {
906 pte_t ptent = *pte;
907 if (pte_none(ptent)) {
908 (*zap_work)--;
909 continue;
910 }
911
912 (*zap_work) -= PAGE_SIZE;
913
914 if (pte_present(ptent)) {
915 struct page *page;
916
917 page = vm_normal_page(vma, addr, ptent);
918 if (unlikely(details) && page) {
919
920
921
922
923
924 if (details->check_mapping &&
925 details->check_mapping != page->mapping)
926 continue;
927
928
929
930
931 if (details->nonlinear_vma &&
932 (page->index < details->first_index ||
933 page->index > details->last_index))
934 continue;
935 }
936 ptent = ptep_get_and_clear_full(mm, addr, pte,
937 tlb->fullmm);
938 tlb_remove_tlb_entry(tlb, pte, addr);
939 if (unlikely(!page))
940 continue;
941 if (unlikely(details) && details->nonlinear_vma
942 && linear_page_index(details->nonlinear_vma,
943 addr) != page->index)
944 set_pte_at(mm, addr, pte,
945 pgoff_to_pte(page->index));
946 if (PageAnon(page))
947 rss[MM_ANONPAGES]--;
948 else {
949 if (pte_dirty(ptent))
950 set_page_dirty(page);
951 if (pte_young(ptent) &&
952 likely(!VM_SequentialReadHint(vma)))
953 mark_page_accessed(page);
954 rss[MM_FILEPAGES]--;
955 }
956 page_remove_rmap(page);
957 if (unlikely(page_mapcount(page) < 0))
958 print_bad_pte(vma, addr, ptent, page);
959 tlb_remove_page(tlb, page);
960 continue;
961 }
962
963
964
965
966 if (unlikely(details))
967 continue;
968 if (pte_file(ptent)) {
969 if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
970 print_bad_pte(vma, addr, ptent, NULL);
971 } else {
972 swp_entry_t entry = pte_to_swp_entry(ptent);
973
974 if (!non_swap_entry(entry))
975 rss[MM_SWAPENTS]--;
976 if (unlikely(!free_swap_and_cache(entry)))
977 print_bad_pte(vma, addr, ptent, NULL);
978 }
979 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
980 } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
981
982 add_mm_rss_vec(mm, rss);
983 arch_leave_lazy_mmu_mode();
984 pte_unmap_unlock(pte - 1, ptl);
985
986 return addr;
987}
988
989static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
990 struct vm_area_struct *vma, pud_t *pud,
991 unsigned long addr, unsigned long end,
992 long *zap_work, struct zap_details *details)
993{
994 pmd_t *pmd;
995 unsigned long next;
996
997 pmd = pmd_offset(pud, addr);
998 do {
999 next = pmd_addr_end(addr, end);
1000 if (pmd_none_or_clear_bad(pmd)) {
1001 (*zap_work)--;
1002 continue;
1003 }
1004 next = zap_pte_range(tlb, vma, pmd, addr, next,
1005 zap_work, details);
1006 } while (pmd++, addr = next, (addr != end && *zap_work > 0));
1007
1008 return addr;
1009}
1010
1011static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
1012 struct vm_area_struct *vma, pgd_t *pgd,
1013 unsigned long addr, unsigned long end,
1014 long *zap_work, struct zap_details *details)
1015{
1016 pud_t *pud;
1017 unsigned long next;
1018
1019 pud = pud_offset(pgd, addr);
1020 do {
1021 next = pud_addr_end(addr, end);
1022 if (pud_none_or_clear_bad(pud)) {
1023 (*zap_work)--;
1024 continue;
1025 }
1026 next = zap_pmd_range(tlb, vma, pud, addr, next,
1027 zap_work, details);
1028 } while (pud++, addr = next, (addr != end && *zap_work > 0));
1029
1030 return addr;
1031}
1032
1033static unsigned long unmap_page_range(struct mmu_gather *tlb,
1034 struct vm_area_struct *vma,
1035 unsigned long addr, unsigned long end,
1036 long *zap_work, struct zap_details *details)
1037{
1038 pgd_t *pgd;
1039 unsigned long next;
1040
1041 if (details && !details->check_mapping && !details->nonlinear_vma)
1042 details = NULL;
1043
1044 BUG_ON(addr >= end);
1045 mem_cgroup_uncharge_start();
1046 tlb_start_vma(tlb, vma);
1047 pgd = pgd_offset(vma->vm_mm, addr);
1048 do {
1049 next = pgd_addr_end(addr, end);
1050 if (pgd_none_or_clear_bad(pgd)) {
1051 (*zap_work)--;
1052 continue;
1053 }
1054 next = zap_pud_range(tlb, vma, pgd, addr, next,
1055 zap_work, details);
1056 } while (pgd++, addr = next, (addr != end && *zap_work > 0));
1057 tlb_end_vma(tlb, vma);
1058 mem_cgroup_uncharge_end();
1059
1060 return addr;
1061}
1062
1063#ifdef CONFIG_PREEMPT
1064# define ZAP_BLOCK_SIZE (8 * PAGE_SIZE)
1065#else
1066
1067# define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE)
1068#endif
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096unsigned long unmap_vmas(struct mmu_gather **tlbp,
1097 struct vm_area_struct *vma, unsigned long start_addr,
1098 unsigned long end_addr, unsigned long *nr_accounted,
1099 struct zap_details *details)
1100{
1101 long zap_work = ZAP_BLOCK_SIZE;
1102 unsigned long tlb_start = 0;
1103 int tlb_start_valid = 0;
1104 unsigned long start = start_addr;
1105 spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
1106 int fullmm = (*tlbp)->fullmm;
1107 struct mm_struct *mm = vma->vm_mm;
1108
1109 mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
1110 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
1111 unsigned long end;
1112
1113 start = max(vma->vm_start, start_addr);
1114 if (start >= vma->vm_end)
1115 continue;
1116 end = min(vma->vm_end, end_addr);
1117 if (end <= vma->vm_start)
1118 continue;
1119
1120 if (vma->vm_flags & VM_ACCOUNT)
1121 *nr_accounted += (end - start) >> PAGE_SHIFT;
1122
1123 if (unlikely(is_pfn_mapping(vma)))
1124 untrack_pfn_vma(vma, 0, 0);
1125
1126 while (start != end) {
1127 if (!tlb_start_valid) {
1128 tlb_start = start;
1129 tlb_start_valid = 1;
1130 }
1131
1132 if (unlikely(is_vm_hugetlb_page(vma))) {
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144 if (vma->vm_file) {
1145 unmap_hugepage_range(vma, start, end, NULL);
1146 zap_work -= (end - start) /
1147 pages_per_huge_page(hstate_vma(vma));
1148 }
1149
1150 start = end;
1151 } else
1152 start = unmap_page_range(*tlbp, vma,
1153 start, end, &zap_work, details);
1154
1155 if (zap_work > 0) {
1156 BUG_ON(start != end);
1157 break;
1158 }
1159
1160 tlb_finish_mmu(*tlbp, tlb_start, start);
1161
1162 if (need_resched() ||
1163 (i_mmap_lock && spin_needbreak(i_mmap_lock))) {
1164 if (i_mmap_lock) {
1165 *tlbp = NULL;
1166 goto out;
1167 }
1168 cond_resched();
1169 }
1170
1171 *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
1172 tlb_start_valid = 0;
1173 zap_work = ZAP_BLOCK_SIZE;
1174 }
1175 }
1176out:
1177 mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
1178 return start;
1179}
1180
1181
1182
1183
1184
1185
1186
1187
1188unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
1189 unsigned long size, struct zap_details *details)
1190{
1191 struct mm_struct *mm = vma->vm_mm;
1192 struct mmu_gather *tlb;
1193 unsigned long end = address + size;
1194 unsigned long nr_accounted = 0;
1195
1196 lru_add_drain();
1197 tlb = tlb_gather_mmu(mm, 0);
1198 update_hiwater_rss(mm);
1199 end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
1200 if (tlb)
1201 tlb_finish_mmu(tlb, address, end);
1202 return end;
1203}
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
1218 unsigned long size)
1219{
1220 if (address < vma->vm_start || address + size > vma->vm_end ||
1221 !(vma->vm_flags & VM_PFNMAP))
1222 return -1;
1223 zap_page_range(vma, address, size, NULL);
1224 return 0;
1225}
1226EXPORT_SYMBOL_GPL(zap_vma_ptes);
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
1241 unsigned int flags)
1242{
1243 pgd_t *pgd;
1244 pud_t *pud;
1245 pmd_t *pmd;
1246 pte_t *ptep, pte;
1247 spinlock_t *ptl;
1248 struct page *page;
1249 struct mm_struct *mm = vma->vm_mm;
1250
1251 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
1252 if (!IS_ERR(page)) {
1253 BUG_ON(flags & FOLL_GET);
1254 goto out;
1255 }
1256
1257 page = NULL;
1258 pgd = pgd_offset(mm, address);
1259 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1260 goto no_page_table;
1261
1262 pud = pud_offset(pgd, address);
1263 if (pud_none(*pud))
1264 goto no_page_table;
1265 if (pud_huge(*pud)) {
1266 BUG_ON(flags & FOLL_GET);
1267 page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
1268 goto out;
1269 }
1270 if (unlikely(pud_bad(*pud)))
1271 goto no_page_table;
1272
1273 pmd = pmd_offset(pud, address);
1274 if (pmd_none(*pmd))
1275 goto no_page_table;
1276 if (pmd_huge(*pmd)) {
1277 BUG_ON(flags & FOLL_GET);
1278 page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
1279 goto out;
1280 }
1281 if (unlikely(pmd_bad(*pmd)))
1282 goto no_page_table;
1283
1284 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
1285
1286 pte = *ptep;
1287 if (!pte_present(pte))
1288 goto no_page;
1289 if ((flags & FOLL_WRITE) && !pte_write(pte))
1290 goto unlock;
1291
1292 page = vm_normal_page(vma, address, pte);
1293 if (unlikely(!page)) {
1294 if ((flags & FOLL_DUMP) ||
1295 !is_zero_pfn(pte_pfn(pte)))
1296 goto bad_page;
1297 page = pte_page(pte);
1298 }
1299
1300 if (flags & FOLL_GET)
1301 get_page(page);
1302 if (flags & FOLL_TOUCH) {
1303 if ((flags & FOLL_WRITE) &&
1304 !pte_dirty(pte) && !PageDirty(page))
1305 set_page_dirty(page);
1306
1307
1308
1309
1310
1311 mark_page_accessed(page);
1312 }
1313unlock:
1314 pte_unmap_unlock(ptep, ptl);
1315out:
1316 return page;
1317
1318bad_page:
1319 pte_unmap_unlock(ptep, ptl);
1320 return ERR_PTR(-EFAULT);
1321
1322no_page:
1323 pte_unmap_unlock(ptep, ptl);
1324 if (!pte_none(pte))
1325 return page;
1326
1327no_page_table:
1328
1329
1330
1331
1332
1333
1334
1335
1336 if ((flags & FOLL_DUMP) &&
1337 (!vma->vm_ops || !vma->vm_ops->fault))
1338 return ERR_PTR(-EFAULT);
1339 return page;
1340}
1341
1342int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1343 unsigned long start, int nr_pages, unsigned int gup_flags,
1344 struct page **pages, struct vm_area_struct **vmas)
1345{
1346 int i;
1347 unsigned long vm_flags;
1348
1349 if (nr_pages <= 0)
1350 return 0;
1351
1352 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
1353
1354
1355
1356
1357
1358 vm_flags = (gup_flags & FOLL_WRITE) ?
1359 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
1360 vm_flags &= (gup_flags & FOLL_FORCE) ?
1361 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
1362 i = 0;
1363
1364 do {
1365 struct vm_area_struct *vma;
1366
1367 vma = find_extend_vma(mm, start);
1368 if (!vma && in_gate_area(tsk, start)) {
1369 unsigned long pg = start & PAGE_MASK;
1370 struct vm_area_struct *gate_vma = get_gate_vma(tsk);
1371 pgd_t *pgd;
1372 pud_t *pud;
1373 pmd_t *pmd;
1374 pte_t *pte;
1375
1376
1377 if (gup_flags & FOLL_WRITE)
1378 return i ? : -EFAULT;
1379 if (pg > TASK_SIZE)
1380 pgd = pgd_offset_k(pg);
1381 else
1382 pgd = pgd_offset_gate(mm, pg);
1383 BUG_ON(pgd_none(*pgd));
1384 pud = pud_offset(pgd, pg);
1385 BUG_ON(pud_none(*pud));
1386 pmd = pmd_offset(pud, pg);
1387 if (pmd_none(*pmd))
1388 return i ? : -EFAULT;
1389 pte = pte_offset_map(pmd, pg);
1390 if (pte_none(*pte)) {
1391 pte_unmap(pte);
1392 return i ? : -EFAULT;
1393 }
1394 if (pages) {
1395 struct page *page;
1396
1397 page = vm_normal_page(gate_vma, start, *pte);
1398 if (!page) {
1399 if (!(gup_flags & FOLL_DUMP) &&
1400 is_zero_pfn(pte_pfn(*pte)))
1401 page = pte_page(*pte);
1402 else {
1403 pte_unmap(pte);
1404 return i ? : -EFAULT;
1405 }
1406 }
1407 pages[i] = page;
1408 get_page(page);
1409 }
1410 pte_unmap(pte);
1411 if (vmas)
1412 vmas[i] = gate_vma;
1413 i++;
1414 start += PAGE_SIZE;
1415 nr_pages--;
1416 continue;
1417 }
1418
1419 if (!vma ||
1420 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
1421 !(vm_flags & vma->vm_flags))
1422 return i ? : -EFAULT;
1423
1424 if (is_vm_hugetlb_page(vma)) {
1425 i = follow_hugetlb_page(mm, vma, pages, vmas,
1426 &start, &nr_pages, i, gup_flags);
1427 continue;
1428 }
1429
1430 do {
1431 struct page *page;
1432 unsigned int foll_flags = gup_flags;
1433
1434
1435
1436
1437
1438 if (unlikely(fatal_signal_pending(current)))
1439 return i ? i : -ERESTARTSYS;
1440
1441 cond_resched();
1442 while (!(page = follow_page(vma, start, foll_flags))) {
1443 int ret;
1444
1445 ret = handle_mm_fault(mm, vma, start,
1446 (foll_flags & FOLL_WRITE) ?
1447 FAULT_FLAG_WRITE : 0);
1448
1449 if (ret & VM_FAULT_ERROR) {
1450 if (ret & VM_FAULT_OOM)
1451 return i ? i : -ENOMEM;
1452 if (ret &
1453 (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE|
1454 VM_FAULT_SIGBUS))
1455 return i ? i : -EFAULT;
1456 BUG();
1457 }
1458 if (ret & VM_FAULT_MAJOR)
1459 tsk->maj_flt++;
1460 else
1461 tsk->min_flt++;
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475 if ((ret & VM_FAULT_WRITE) &&
1476 !(vma->vm_flags & VM_WRITE))
1477 foll_flags &= ~FOLL_WRITE;
1478
1479 cond_resched();
1480 }
1481 if (IS_ERR(page))
1482 return i ? i : PTR_ERR(page);
1483 if (pages) {
1484 pages[i] = page;
1485
1486 flush_anon_page(vma, page, start);
1487 flush_dcache_page(page);
1488 }
1489 if (vmas)
1490 vmas[i] = vma;
1491 i++;
1492 start += PAGE_SIZE;
1493 nr_pages--;
1494 } while (nr_pages && start < vma->vm_end);
1495 } while (nr_pages);
1496 return i;
1497}
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1550 unsigned long start, int nr_pages, int write, int force,
1551 struct page **pages, struct vm_area_struct **vmas)
1552{
1553 int flags = FOLL_TOUCH;
1554
1555 if (pages)
1556 flags |= FOLL_GET;
1557 if (write)
1558 flags |= FOLL_WRITE;
1559 if (force)
1560 flags |= FOLL_FORCE;
1561
1562 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
1563}
1564EXPORT_SYMBOL(get_user_pages);
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580#ifdef CONFIG_ELF_CORE
1581struct page *get_dump_page(unsigned long addr)
1582{
1583 struct vm_area_struct *vma;
1584 struct page *page;
1585
1586 if (__get_user_pages(current, current->mm, addr, 1,
1587 FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1)
1588 return NULL;
1589 flush_cache_page(vma, addr, page_to_pfn(page));
1590 return page;
1591}
1592#endif
1593
1594pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
1595 spinlock_t **ptl)
1596{
1597 pgd_t * pgd = pgd_offset(mm, addr);
1598 pud_t * pud = pud_alloc(mm, pgd, addr);
1599 if (pud) {
1600 pmd_t * pmd = pmd_alloc(mm, pud, addr);
1601 if (pmd)
1602 return pte_alloc_map_lock(mm, pmd, addr, ptl);
1603 }
1604 return NULL;
1605}
1606
1607
1608
1609
1610
1611
1612
1613
1614static int insert_page(struct vm_area_struct *vma, unsigned long addr,
1615 struct page *page, pgprot_t prot)
1616{
1617 struct mm_struct *mm = vma->vm_mm;
1618 int retval;
1619 pte_t *pte;
1620 spinlock_t *ptl;
1621
1622 retval = -EINVAL;
1623 if (PageAnon(page))
1624 goto out;
1625 retval = -ENOMEM;
1626 flush_dcache_page(page);
1627 pte = get_locked_pte(mm, addr, &ptl);
1628 if (!pte)
1629 goto out;
1630 retval = -EBUSY;
1631 if (!pte_none(*pte))
1632 goto out_unlock;
1633
1634
1635 get_page(page);
1636 inc_mm_counter_fast(mm, MM_FILEPAGES);
1637 page_add_file_rmap(page);
1638 set_pte_at(mm, addr, pte, mk_pte(page, prot));
1639
1640 retval = 0;
1641 pte_unmap_unlock(pte, ptl);
1642 return retval;
1643out_unlock:
1644 pte_unmap_unlock(pte, ptl);
1645out:
1646 return retval;
1647}
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
1672 struct page *page)
1673{
1674 if (addr < vma->vm_start || addr >= vma->vm_end)
1675 return -EFAULT;
1676 if (!page_count(page))
1677 return -EINVAL;
1678 vma->vm_flags |= VM_INSERTPAGE;
1679 return insert_page(vma, addr, page, vma->vm_page_prot);
1680}
1681EXPORT_SYMBOL(vm_insert_page);
1682
1683static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1684 unsigned long pfn, pgprot_t prot)
1685{
1686 struct mm_struct *mm = vma->vm_mm;
1687 int retval;
1688 pte_t *pte, entry;
1689 spinlock_t *ptl;
1690
1691 retval = -ENOMEM;
1692 pte = get_locked_pte(mm, addr, &ptl);
1693 if (!pte)
1694 goto out;
1695 retval = -EBUSY;
1696 if (!pte_none(*pte))
1697 goto out_unlock;
1698
1699
1700 entry = pte_mkspecial(pfn_pte(pfn, prot));
1701 set_pte_at(mm, addr, pte, entry);
1702 update_mmu_cache(vma, addr, pte);
1703
1704 retval = 0;
1705out_unlock:
1706 pte_unmap_unlock(pte, ptl);
1707out:
1708 return retval;
1709}
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1729 unsigned long pfn)
1730{
1731 int ret;
1732 pgprot_t pgprot = vma->vm_page_prot;
1733
1734
1735
1736
1737
1738
1739 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
1740 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
1741 (VM_PFNMAP|VM_MIXEDMAP));
1742 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
1743 BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
1744
1745 if (addr < vma->vm_start || addr >= vma->vm_end)
1746 return -EFAULT;
1747 if (track_pfn_vma_new(vma, &pgprot, pfn, PAGE_SIZE))
1748 return -EINVAL;
1749
1750 ret = insert_pfn(vma, addr, pfn, pgprot);
1751
1752 if (ret)
1753 untrack_pfn_vma(vma, pfn, PAGE_SIZE);
1754
1755 return ret;
1756}
1757EXPORT_SYMBOL(vm_insert_pfn);
1758
1759int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
1760 unsigned long pfn)
1761{
1762 BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
1763
1764 if (addr < vma->vm_start || addr >= vma->vm_end)
1765 return -EFAULT;
1766
1767
1768
1769
1770
1771
1772
1773
1774 if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) {
1775 struct page *page;
1776
1777 page = pfn_to_page(pfn);
1778 return insert_page(vma, addr, page, vma->vm_page_prot);
1779 }
1780 return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
1781}
1782EXPORT_SYMBOL(vm_insert_mixed);
1783
1784
1785
1786
1787
1788
1789static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
1790 unsigned long addr, unsigned long end,
1791 unsigned long pfn, pgprot_t prot)
1792{
1793 pte_t *pte;
1794 spinlock_t *ptl;
1795
1796 pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
1797 if (!pte)
1798 return -ENOMEM;
1799 arch_enter_lazy_mmu_mode();
1800 do {
1801 BUG_ON(!pte_none(*pte));
1802 set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
1803 pfn++;
1804 } while (pte++, addr += PAGE_SIZE, addr != end);
1805 arch_leave_lazy_mmu_mode();
1806 pte_unmap_unlock(pte - 1, ptl);
1807 return 0;
1808}
1809
1810static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
1811 unsigned long addr, unsigned long end,
1812 unsigned long pfn, pgprot_t prot)
1813{
1814 pmd_t *pmd;
1815 unsigned long next;
1816
1817 pfn -= addr >> PAGE_SHIFT;
1818 pmd = pmd_alloc(mm, pud, addr);
1819 if (!pmd)
1820 return -ENOMEM;
1821 do {
1822 next = pmd_addr_end(addr, end);
1823 if (remap_pte_range(mm, pmd, addr, next,
1824 pfn + (addr >> PAGE_SHIFT), prot))
1825 return -ENOMEM;
1826 } while (pmd++, addr = next, addr != end);
1827 return 0;
1828}
1829
1830static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
1831 unsigned long addr, unsigned long end,
1832 unsigned long pfn, pgprot_t prot)
1833{
1834 pud_t *pud;
1835 unsigned long next;
1836
1837 pfn -= addr >> PAGE_SHIFT;
1838 pud = pud_alloc(mm, pgd, addr);
1839 if (!pud)
1840 return -ENOMEM;
1841 do {
1842 next = pud_addr_end(addr, end);
1843 if (remap_pmd_range(mm, pud, addr, next,
1844 pfn + (addr >> PAGE_SHIFT), prot))
1845 return -ENOMEM;
1846 } while (pud++, addr = next, addr != end);
1847 return 0;
1848}
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1861 unsigned long pfn, unsigned long size, pgprot_t prot)
1862{
1863 pgd_t *pgd;
1864 unsigned long next;
1865 unsigned long end = addr + PAGE_ALIGN(size);
1866 struct mm_struct *mm = vma->vm_mm;
1867 int err;
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887 if (addr == vma->vm_start && end == vma->vm_end) {
1888 vma->vm_pgoff = pfn;
1889 vma->vm_flags |= VM_PFN_AT_MMAP;
1890 } else if (is_cow_mapping(vma->vm_flags))
1891 return -EINVAL;
1892
1893 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
1894
1895 err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size));
1896 if (err) {
1897
1898
1899
1900
1901 vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP);
1902 vma->vm_flags &= ~VM_PFN_AT_MMAP;
1903 return -EINVAL;
1904 }
1905
1906 BUG_ON(addr >= end);
1907 pfn -= addr >> PAGE_SHIFT;
1908 pgd = pgd_offset(mm, addr);
1909 flush_cache_range(vma, addr, end);
1910 do {
1911 next = pgd_addr_end(addr, end);
1912 err = remap_pud_range(mm, pgd, addr, next,
1913 pfn + (addr >> PAGE_SHIFT), prot);
1914 if (err)
1915 break;
1916 } while (pgd++, addr = next, addr != end);
1917
1918 if (err)
1919 untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size));
1920
1921 return err;
1922}
1923EXPORT_SYMBOL(remap_pfn_range);
1924
1925static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
1926 unsigned long addr, unsigned long end,
1927 pte_fn_t fn, void *data)
1928{
1929 pte_t *pte;
1930 int err;
1931 pgtable_t token;
1932 spinlock_t *uninitialized_var(ptl);
1933
1934 pte = (mm == &init_mm) ?
1935 pte_alloc_kernel(pmd, addr) :
1936 pte_alloc_map_lock(mm, pmd, addr, &ptl);
1937 if (!pte)
1938 return -ENOMEM;
1939
1940 BUG_ON(pmd_huge(*pmd));
1941
1942 arch_enter_lazy_mmu_mode();
1943
1944 token = pmd_pgtable(*pmd);
1945
1946 do {
1947 err = fn(pte++, token, addr, data);
1948 if (err)
1949 break;
1950 } while (addr += PAGE_SIZE, addr != end);
1951
1952 arch_leave_lazy_mmu_mode();
1953
1954 if (mm != &init_mm)
1955 pte_unmap_unlock(pte-1, ptl);
1956 return err;
1957}
1958
1959static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
1960 unsigned long addr, unsigned long end,
1961 pte_fn_t fn, void *data)
1962{
1963 pmd_t *pmd;
1964 unsigned long next;
1965 int err;
1966
1967 BUG_ON(pud_huge(*pud));
1968
1969 pmd = pmd_alloc(mm, pud, addr);
1970 if (!pmd)
1971 return -ENOMEM;
1972 do {
1973 next = pmd_addr_end(addr, end);
1974 err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
1975 if (err)
1976 break;
1977 } while (pmd++, addr = next, addr != end);
1978 return err;
1979}
1980
1981static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
1982 unsigned long addr, unsigned long end,
1983 pte_fn_t fn, void *data)
1984{
1985 pud_t *pud;
1986 unsigned long next;
1987 int err;
1988
1989 pud = pud_alloc(mm, pgd, addr);
1990 if (!pud)
1991 return -ENOMEM;
1992 do {
1993 next = pud_addr_end(addr, end);
1994 err = apply_to_pmd_range(mm, pud, addr, next, fn, data);
1995 if (err)
1996 break;
1997 } while (pud++, addr = next, addr != end);
1998 return err;
1999}
2000
2001
2002
2003
2004
2005int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
2006 unsigned long size, pte_fn_t fn, void *data)
2007{
2008 pgd_t *pgd;
2009 unsigned long next;
2010 unsigned long end = addr + size;
2011 int err;
2012
2013 BUG_ON(addr >= end);
2014 pgd = pgd_offset(mm, addr);
2015 do {
2016 next = pgd_addr_end(addr, end);
2017 err = apply_to_pud_range(mm, pgd, addr, next, fn, data);
2018 if (err)
2019 break;
2020 } while (pgd++, addr = next, addr != end);
2021
2022 return err;
2023}
2024EXPORT_SYMBOL_GPL(apply_to_page_range);
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
2036 pte_t *page_table, pte_t orig_pte)
2037{
2038 int same = 1;
2039#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
2040 if (sizeof(pte_t) > sizeof(unsigned long)) {
2041 spinlock_t *ptl = pte_lockptr(mm, pmd);
2042 spin_lock(ptl);
2043 same = pte_same(*page_table, orig_pte);
2044 spin_unlock(ptl);
2045 }
2046#endif
2047 pte_unmap(page_table);
2048 return same;
2049}
2050
2051
2052
2053
2054
2055
2056
2057static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
2058{
2059 if (likely(vma->vm_flags & VM_WRITE))
2060 pte = pte_mkwrite(pte);
2061 return pte;
2062}
2063
2064static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
2065{
2066
2067
2068
2069
2070
2071
2072 if (unlikely(!src)) {
2073 void *kaddr = kmap_atomic(dst, KM_USER0);
2074 void __user *uaddr = (void __user *)(va & PAGE_MASK);
2075
2076
2077
2078
2079
2080
2081
2082 if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
2083 clear_page(kaddr);
2084 kunmap_atomic(kaddr, KM_USER0);
2085 flush_dcache_page(dst);
2086 } else
2087 copy_user_highpage(dst, src, va, vma);
2088}
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2109 unsigned long address, pte_t *page_table, pmd_t *pmd,
2110 spinlock_t *ptl, pte_t orig_pte)
2111 __releases(ptl)
2112{
2113 struct page *old_page, *new_page;
2114 pte_t entry;
2115 int reuse = 0, ret = 0;
2116 int page_mkwrite = 0;
2117 struct page *dirty_page = NULL;
2118
2119 old_page = vm_normal_page(vma, address, orig_pte);
2120 if (!old_page) {
2121
2122
2123
2124
2125
2126
2127
2128 if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
2129 (VM_WRITE|VM_SHARED))
2130 goto reuse;
2131 goto gotten;
2132 }
2133
2134
2135
2136
2137
2138 if (PageAnon(old_page) && !PageKsm(old_page)) {
2139 if (!trylock_page(old_page)) {
2140 page_cache_get(old_page);
2141 pte_unmap_unlock(page_table, ptl);
2142 lock_page(old_page);
2143 page_table = pte_offset_map_lock(mm, pmd, address,
2144 &ptl);
2145 if (!pte_same(*page_table, orig_pte)) {
2146 unlock_page(old_page);
2147 page_cache_release(old_page);
2148 goto unlock;
2149 }
2150 page_cache_release(old_page);
2151 }
2152 reuse = reuse_swap_page(old_page);
2153 if (reuse)
2154
2155
2156
2157
2158
2159 page_move_anon_rmap(old_page, vma, address);
2160 unlock_page(old_page);
2161 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
2162 (VM_WRITE|VM_SHARED))) {
2163
2164
2165
2166
2167
2168 if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
2169 struct vm_fault vmf;
2170 int tmp;
2171
2172 vmf.virtual_address = (void __user *)(address &
2173 PAGE_MASK);
2174 vmf.pgoff = old_page->index;
2175 vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
2176 vmf.page = old_page;
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186 page_cache_get(old_page);
2187 pte_unmap_unlock(page_table, ptl);
2188
2189 tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
2190 if (unlikely(tmp &
2191 (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
2192 ret = tmp;
2193 goto unwritable_page;
2194 }
2195 if (unlikely(!(tmp & VM_FAULT_LOCKED))) {
2196 lock_page(old_page);
2197 if (!old_page->mapping) {
2198 ret = 0;
2199 unlock_page(old_page);
2200 goto unwritable_page;
2201 }
2202 } else
2203 VM_BUG_ON(!PageLocked(old_page));
2204
2205
2206
2207
2208
2209
2210
2211 page_table = pte_offset_map_lock(mm, pmd, address,
2212 &ptl);
2213 if (!pte_same(*page_table, orig_pte)) {
2214 unlock_page(old_page);
2215 page_cache_release(old_page);
2216 goto unlock;
2217 }
2218
2219 page_mkwrite = 1;
2220 }
2221 dirty_page = old_page;
2222 get_page(dirty_page);
2223 reuse = 1;
2224 }
2225
2226 if (reuse) {
2227reuse:
2228 flush_cache_page(vma, address, pte_pfn(orig_pte));
2229 entry = pte_mkyoung(orig_pte);
2230 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2231 if (ptep_set_access_flags(vma, address, page_table, entry,1))
2232 update_mmu_cache(vma, address, page_table);
2233 ret |= VM_FAULT_WRITE;
2234 goto unlock;
2235 }
2236
2237
2238
2239
2240 page_cache_get(old_page);
2241gotten:
2242 pte_unmap_unlock(page_table, ptl);
2243
2244 if (unlikely(anon_vma_prepare(vma)))
2245 goto oom;
2246
2247 if (is_zero_pfn(pte_pfn(orig_pte))) {
2248 new_page = alloc_zeroed_user_highpage_movable(vma, address);
2249 if (!new_page)
2250 goto oom;
2251 } else {
2252 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
2253 if (!new_page)
2254 goto oom;
2255 cow_user_page(new_page, old_page, address, vma);
2256 }
2257 __SetPageUptodate(new_page);
2258
2259
2260
2261
2262
2263 if ((vma->vm_flags & VM_LOCKED) && old_page) {
2264 lock_page(old_page);
2265 clear_page_mlock(old_page);
2266 unlock_page(old_page);
2267 }
2268
2269 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
2270 goto oom_free_new;
2271
2272
2273
2274
2275 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2276 if (likely(pte_same(*page_table, orig_pte))) {
2277 if (old_page) {
2278 if (!PageAnon(old_page)) {
2279 dec_mm_counter_fast(mm, MM_FILEPAGES);
2280 inc_mm_counter_fast(mm, MM_ANONPAGES);
2281 }
2282 } else
2283 inc_mm_counter_fast(mm, MM_ANONPAGES);
2284 flush_cache_page(vma, address, pte_pfn(orig_pte));
2285 entry = mk_pte(new_page, vma->vm_page_prot);
2286 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2287
2288
2289
2290
2291
2292
2293 ptep_clear_flush(vma, address, page_table);
2294 page_add_new_anon_rmap(new_page, vma, address);
2295
2296
2297
2298
2299
2300 set_pte_at_notify(mm, address, page_table, entry);
2301 update_mmu_cache(vma, address, page_table);
2302 if (old_page) {
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325 page_remove_rmap(old_page);
2326 }
2327
2328
2329 new_page = old_page;
2330 ret |= VM_FAULT_WRITE;
2331 } else
2332 mem_cgroup_uncharge_page(new_page);
2333
2334 if (new_page)
2335 page_cache_release(new_page);
2336 if (old_page)
2337 page_cache_release(old_page);
2338unlock:
2339 pte_unmap_unlock(page_table, ptl);
2340 if (dirty_page) {
2341
2342
2343
2344
2345
2346
2347
2348
2349 if (!page_mkwrite) {
2350 wait_on_page_locked(dirty_page);
2351 set_page_dirty_balance(dirty_page, page_mkwrite);
2352 }
2353 put_page(dirty_page);
2354 if (page_mkwrite) {
2355 struct address_space *mapping = dirty_page->mapping;
2356
2357 set_page_dirty(dirty_page);
2358 unlock_page(dirty_page);
2359 page_cache_release(dirty_page);
2360 if (mapping) {
2361
2362
2363
2364
2365 balance_dirty_pages_ratelimited(mapping);
2366 }
2367 }
2368
2369
2370 if (vma->vm_file)
2371 file_update_time(vma->vm_file);
2372 }
2373 return ret;
2374oom_free_new:
2375 page_cache_release(new_page);
2376oom:
2377 if (old_page) {
2378 if (page_mkwrite) {
2379 unlock_page(old_page);
2380 page_cache_release(old_page);
2381 }
2382 page_cache_release(old_page);
2383 }
2384 return VM_FAULT_OOM;
2385
2386unwritable_page:
2387 page_cache_release(old_page);
2388 return ret;
2389}
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK))
2424
2425static void reset_vma_truncate_counts(struct address_space *mapping)
2426{
2427 struct vm_area_struct *vma;
2428 struct prio_tree_iter iter;
2429
2430 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
2431 vma->vm_truncate_count = 0;
2432 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
2433 vma->vm_truncate_count = 0;
2434}
2435
2436static int unmap_mapping_range_vma(struct vm_area_struct *vma,
2437 unsigned long start_addr, unsigned long end_addr,
2438 struct zap_details *details)
2439{
2440 unsigned long restart_addr;
2441 int need_break;
2442
2443
2444
2445
2446
2447
2448
2449
2450again:
2451 restart_addr = vma->vm_truncate_count;
2452 if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
2453 start_addr = restart_addr;
2454 if (start_addr >= end_addr) {
2455
2456 vma->vm_truncate_count = details->truncate_count;
2457 return 0;
2458 }
2459 }
2460
2461 restart_addr = zap_page_range(vma, start_addr,
2462 end_addr - start_addr, details);
2463 need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
2464
2465 if (restart_addr >= end_addr) {
2466
2467 vma->vm_truncate_count = details->truncate_count;
2468 if (!need_break)
2469 return 0;
2470 } else {
2471
2472 vma->vm_truncate_count = restart_addr;
2473 if (!need_break)
2474 goto again;
2475 }
2476
2477 spin_unlock(details->i_mmap_lock);
2478 cond_resched();
2479 spin_lock(details->i_mmap_lock);
2480 return -EINTR;
2481}
2482
2483static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
2484 struct zap_details *details)
2485{
2486 struct vm_area_struct *vma;
2487 struct prio_tree_iter iter;
2488 pgoff_t vba, vea, zba, zea;
2489
2490restart:
2491 vma_prio_tree_foreach(vma, &iter, root,
2492 details->first_index, details->last_index) {
2493
2494 if (vma->vm_truncate_count == details->truncate_count)
2495 continue;
2496
2497 vba = vma->vm_pgoff;
2498 vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
2499
2500 zba = details->first_index;
2501 if (zba < vba)
2502 zba = vba;
2503 zea = details->last_index;
2504 if (zea > vea)
2505 zea = vea;
2506
2507 if (unmap_mapping_range_vma(vma,
2508 ((zba - vba) << PAGE_SHIFT) + vma->vm_start,
2509 ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,
2510 details) < 0)
2511 goto restart;
2512 }
2513}
2514
2515static inline void unmap_mapping_range_list(struct list_head *head,
2516 struct zap_details *details)
2517{
2518 struct vm_area_struct *vma;
2519
2520
2521
2522
2523
2524
2525
2526restart:
2527 list_for_each_entry(vma, head, shared.vm_set.list) {
2528
2529 if (vma->vm_truncate_count == details->truncate_count)
2530 continue;
2531 details->nonlinear_vma = vma;
2532 if (unmap_mapping_range_vma(vma, vma->vm_start,
2533 vma->vm_end, details) < 0)
2534 goto restart;
2535 }
2536}
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552void unmap_mapping_range(struct address_space *mapping,
2553 loff_t const holebegin, loff_t const holelen, int even_cows)
2554{
2555 struct zap_details details;
2556 pgoff_t hba = holebegin >> PAGE_SHIFT;
2557 pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
2558
2559
2560 if (sizeof(holelen) > sizeof(hlen)) {
2561 long long holeend =
2562 (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
2563 if (holeend & ~(long long)ULONG_MAX)
2564 hlen = ULONG_MAX - hba + 1;
2565 }
2566
2567 details.check_mapping = even_cows? NULL: mapping;
2568 details.nonlinear_vma = NULL;
2569 details.first_index = hba;
2570 details.last_index = hba + hlen - 1;
2571 if (details.last_index < details.first_index)
2572 details.last_index = ULONG_MAX;
2573 details.i_mmap_lock = &mapping->i_mmap_lock;
2574
2575 mutex_lock(&mapping->unmap_mutex);
2576 spin_lock(&mapping->i_mmap_lock);
2577
2578
2579 mapping->truncate_count++;
2580 if (unlikely(is_restart_addr(mapping->truncate_count))) {
2581 if (mapping->truncate_count == 0)
2582 reset_vma_truncate_counts(mapping);
2583 mapping->truncate_count++;
2584 }
2585 details.truncate_count = mapping->truncate_count;
2586
2587 if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
2588 unmap_mapping_range_tree(&mapping->i_mmap, &details);
2589 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
2590 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
2591 spin_unlock(&mapping->i_mmap_lock);
2592 mutex_unlock(&mapping->unmap_mutex);
2593}
2594EXPORT_SYMBOL(unmap_mapping_range);
2595
2596int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
2597{
2598 struct address_space *mapping = inode->i_mapping;
2599
2600
2601
2602
2603
2604
2605 if (!inode->i_op->truncate_range)
2606 return -ENOSYS;
2607
2608 mutex_lock(&inode->i_mutex);
2609 down_write(&inode->i_alloc_sem);
2610 unmap_mapping_range(mapping, offset, (end - offset), 1);
2611 truncate_inode_pages_range(mapping, offset, end);
2612 unmap_mapping_range(mapping, offset, (end - offset), 1);
2613 inode->i_op->truncate_range(inode, offset, end);
2614 up_write(&inode->i_alloc_sem);
2615 mutex_unlock(&inode->i_mutex);
2616
2617 return 0;
2618}
2619
2620
2621
2622
2623
2624
2625static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2626 unsigned long address, pte_t *page_table, pmd_t *pmd,
2627 unsigned int flags, pte_t orig_pte)
2628{
2629 spinlock_t *ptl;
2630 struct page *page, *swapcache = NULL;
2631 swp_entry_t entry;
2632 pte_t pte;
2633 int locked;
2634 struct mem_cgroup *ptr = NULL;
2635 int exclusive = 0;
2636 int ret = 0;
2637
2638 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
2639 goto out;
2640
2641 entry = pte_to_swp_entry(orig_pte);
2642 if (unlikely(non_swap_entry(entry))) {
2643 if (is_migration_entry(entry)) {
2644 migration_entry_wait(mm, pmd, address);
2645 } else if (is_hwpoison_entry(entry)) {
2646 ret = VM_FAULT_HWPOISON;
2647 } else {
2648 print_bad_pte(vma, address, orig_pte, NULL);
2649 ret = VM_FAULT_SIGBUS;
2650 }
2651 goto out;
2652 }
2653 delayacct_set_flag(DELAYACCT_PF_SWAPIN);
2654 page = lookup_swap_cache(entry);
2655 if (!page) {
2656 grab_swap_token(mm);
2657 page = swapin_readahead(entry,
2658 GFP_HIGHUSER_MOVABLE, vma, address);
2659 if (!page) {
2660
2661
2662
2663
2664 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2665 if (likely(pte_same(*page_table, orig_pte)))
2666 ret = VM_FAULT_OOM;
2667 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2668 goto unlock;
2669 }
2670
2671
2672 ret = VM_FAULT_MAJOR;
2673 count_vm_event(PGMAJFAULT);
2674 } else if (PageHWPoison(page)) {
2675
2676
2677
2678
2679 ret = VM_FAULT_HWPOISON;
2680 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2681 goto out_release;
2682 }
2683
2684 locked = lock_page_or_retry(page, mm, flags);
2685 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2686 if (!locked) {
2687 ret |= VM_FAULT_RETRY;
2688 goto out_release;
2689 }
2690
2691
2692
2693
2694
2695
2696
2697 if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
2698 goto out_page;
2699
2700 if (ksm_might_need_to_copy(page, vma, address)) {
2701 swapcache = page;
2702 page = ksm_does_need_to_copy(page, vma, address);
2703
2704 if (unlikely(!page)) {
2705 ret = VM_FAULT_OOM;
2706 page = swapcache;
2707 swapcache = NULL;
2708 goto out_page;
2709 }
2710 }
2711
2712 if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
2713 ret = VM_FAULT_OOM;
2714 goto out_page;
2715 }
2716
2717
2718
2719
2720 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2721 if (unlikely(!pte_same(*page_table, orig_pte)))
2722 goto out_nomap;
2723
2724 if (unlikely(!PageUptodate(page))) {
2725 ret = VM_FAULT_SIGBUS;
2726 goto out_nomap;
2727 }
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743 inc_mm_counter_fast(mm, MM_ANONPAGES);
2744 dec_mm_counter_fast(mm, MM_SWAPENTS);
2745 pte = mk_pte(page, vma->vm_page_prot);
2746 if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
2747 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
2748 flags &= ~FAULT_FLAG_WRITE;
2749 ret |= VM_FAULT_WRITE;
2750 exclusive = 1;
2751 }
2752 flush_icache_page(vma, page);
2753 set_pte_at(mm, address, page_table, pte);
2754 do_page_add_anon_rmap(page, vma, address, exclusive);
2755
2756 mem_cgroup_commit_charge_swapin(page, ptr);
2757
2758 swap_free(entry);
2759 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
2760 try_to_free_swap(page);
2761 unlock_page(page);
2762 if (swapcache) {
2763
2764
2765
2766
2767
2768
2769
2770
2771 unlock_page(swapcache);
2772 page_cache_release(swapcache);
2773 }
2774
2775 if (flags & FAULT_FLAG_WRITE) {
2776 ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
2777 if (ret & VM_FAULT_ERROR)
2778 ret &= VM_FAULT_ERROR;
2779 goto out;
2780 }
2781
2782
2783 update_mmu_cache(vma, address, page_table);
2784unlock:
2785 pte_unmap_unlock(page_table, ptl);
2786out:
2787 return ret;
2788out_nomap:
2789 mem_cgroup_cancel_charge_swapin(ptr);
2790 pte_unmap_unlock(page_table, ptl);
2791out_page:
2792 unlock_page(page);
2793out_release:
2794 page_cache_release(page);
2795 if (swapcache) {
2796 unlock_page(swapcache);
2797 page_cache_release(swapcache);
2798 }
2799 return ret;
2800}
2801
2802
2803
2804
2805
2806
2807static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
2808{
2809 address &= PAGE_MASK;
2810 if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
2811 struct vm_area_struct *prev = vma->vm_prev;
2812
2813
2814
2815
2816
2817
2818
2819 if (prev && prev->vm_end == address)
2820 return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
2821
2822 expand_stack(vma, address - PAGE_SIZE);
2823 }
2824 if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
2825 struct vm_area_struct *next = vma->vm_next;
2826
2827
2828 if (next && next->vm_start == address + PAGE_SIZE)
2829 return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
2830
2831 expand_upwards(vma, address + PAGE_SIZE);
2832 }
2833 return 0;
2834}
2835
2836
2837
2838
2839
2840
2841static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2842 unsigned long address, pte_t *page_table, pmd_t *pmd,
2843 unsigned int flags)
2844{
2845 struct page *page;
2846 spinlock_t *ptl;
2847 pte_t entry;
2848
2849 pte_unmap(page_table);
2850
2851
2852 if (check_stack_guard_page(vma, address) < 0)
2853 return VM_FAULT_SIGBUS;
2854
2855
2856 if (!(flags & FAULT_FLAG_WRITE)) {
2857 entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
2858 vma->vm_page_prot));
2859 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2860 if (!pte_none(*page_table))
2861 goto unlock;
2862 goto setpte;
2863 }
2864
2865
2866 if (unlikely(anon_vma_prepare(vma)))
2867 goto oom;
2868 page = alloc_zeroed_user_highpage_movable(vma, address);
2869 if (!page)
2870 goto oom;
2871 __SetPageUptodate(page);
2872
2873 if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))
2874 goto oom_free_page;
2875
2876 entry = mk_pte(page, vma->vm_page_prot);
2877 if (vma->vm_flags & VM_WRITE)
2878 entry = pte_mkwrite(pte_mkdirty(entry));
2879
2880 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2881 if (!pte_none(*page_table))
2882 goto release;
2883
2884 inc_mm_counter_fast(mm, MM_ANONPAGES);
2885 page_add_new_anon_rmap(page, vma, address);
2886setpte:
2887 set_pte_at(mm, address, page_table, entry);
2888
2889
2890 update_mmu_cache(vma, address, page_table);
2891unlock:
2892 pte_unmap_unlock(page_table, ptl);
2893 return 0;
2894release:
2895 mem_cgroup_uncharge_page(page);
2896 page_cache_release(page);
2897 goto unlock;
2898oom_free_page:
2899 page_cache_release(page);
2900oom:
2901 return VM_FAULT_OOM;
2902}
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2918 unsigned long address, pmd_t *pmd,
2919 pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
2920{
2921 pte_t *page_table;
2922 spinlock_t *ptl;
2923 struct page *page;
2924 pte_t entry;
2925 int anon = 0;
2926 int charged = 0;
2927 struct page *dirty_page = NULL;
2928 struct vm_fault vmf;
2929 int ret;
2930 int page_mkwrite = 0;
2931
2932 vmf.virtual_address = (void __user *)(address & PAGE_MASK);
2933 vmf.pgoff = pgoff;
2934 vmf.flags = flags;
2935 vmf.page = NULL;
2936
2937 ret = vma->vm_ops->fault(vma, &vmf);
2938 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
2939 VM_FAULT_RETRY)))
2940 return ret;
2941
2942 if (unlikely(PageHWPoison(vmf.page))) {
2943 if (ret & VM_FAULT_LOCKED)
2944 unlock_page(vmf.page);
2945 return VM_FAULT_HWPOISON;
2946 }
2947
2948
2949
2950
2951
2952 if (unlikely(!(ret & VM_FAULT_LOCKED)))
2953 lock_page(vmf.page);
2954 else
2955 VM_BUG_ON(!PageLocked(vmf.page));
2956
2957
2958
2959
2960 page = vmf.page;
2961 if (flags & FAULT_FLAG_WRITE) {
2962 if (!(vma->vm_flags & VM_SHARED)) {
2963 anon = 1;
2964 if (unlikely(anon_vma_prepare(vma))) {
2965 ret = VM_FAULT_OOM;
2966 goto out;
2967 }
2968 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE,
2969 vma, address);
2970 if (!page) {
2971 ret = VM_FAULT_OOM;
2972 goto out;
2973 }
2974 if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) {
2975 ret = VM_FAULT_OOM;
2976 page_cache_release(page);
2977 goto out;
2978 }
2979 charged = 1;
2980
2981
2982
2983
2984 if (vma->vm_flags & VM_LOCKED)
2985 clear_page_mlock(vmf.page);
2986 copy_user_highpage(page, vmf.page, address, vma);
2987 __SetPageUptodate(page);
2988 } else {
2989
2990
2991
2992
2993
2994 if (vma->vm_ops->page_mkwrite) {
2995 int tmp;
2996
2997 unlock_page(page);
2998 vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
2999 tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
3000 if (unlikely(tmp &
3001 (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
3002 ret = tmp;
3003 goto unwritable_page;
3004 }
3005 if (unlikely(!(tmp & VM_FAULT_LOCKED))) {
3006 lock_page(page);
3007 if (!page->mapping) {
3008 ret = 0;
3009 unlock_page(page);
3010 goto unwritable_page;
3011 }
3012 } else
3013 VM_BUG_ON(!PageLocked(page));
3014 page_mkwrite = 1;
3015 }
3016 }
3017
3018 }
3019
3020 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033 if (likely(pte_same(*page_table, orig_pte))) {
3034 flush_icache_page(vma, page);
3035 entry = mk_pte(page, vma->vm_page_prot);
3036 if (flags & FAULT_FLAG_WRITE)
3037 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
3038 if (anon) {
3039 inc_mm_counter_fast(mm, MM_ANONPAGES);
3040 page_add_new_anon_rmap(page, vma, address);
3041 } else {
3042 inc_mm_counter_fast(mm, MM_FILEPAGES);
3043 page_add_file_rmap(page);
3044 if (flags & FAULT_FLAG_WRITE) {
3045 dirty_page = page;
3046 get_page(dirty_page);
3047 }
3048 }
3049 set_pte_at(mm, address, page_table, entry);
3050
3051
3052 update_mmu_cache(vma, address, page_table);
3053 } else {
3054 if (charged)
3055 mem_cgroup_uncharge_page(page);
3056 if (anon)
3057 page_cache_release(page);
3058 else
3059 anon = 1;
3060 }
3061
3062 pte_unmap_unlock(page_table, ptl);
3063
3064out:
3065 if (dirty_page) {
3066 struct address_space *mapping = page->mapping;
3067
3068 if (set_page_dirty(dirty_page))
3069 page_mkwrite = 1;
3070 unlock_page(dirty_page);
3071 put_page(dirty_page);
3072 if (page_mkwrite && mapping) {
3073
3074
3075
3076
3077 balance_dirty_pages_ratelimited(mapping);
3078 }
3079
3080
3081 if (vma->vm_file)
3082 file_update_time(vma->vm_file);
3083 } else {
3084 unlock_page(vmf.page);
3085 if (anon)
3086 page_cache_release(vmf.page);
3087 }
3088
3089 return ret;
3090
3091unwritable_page:
3092 page_cache_release(page);
3093 return ret;
3094}
3095
3096static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3097 unsigned long address, pte_t *page_table, pmd_t *pmd,
3098 unsigned int flags, pte_t orig_pte)
3099{
3100 pgoff_t pgoff = (((address & PAGE_MASK)
3101 - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
3102
3103 pte_unmap(page_table);
3104 return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
3105}
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3117 unsigned long address, pte_t *page_table, pmd_t *pmd,
3118 unsigned int flags, pte_t orig_pte)
3119{
3120 pgoff_t pgoff;
3121
3122 flags |= FAULT_FLAG_NONLINEAR;
3123
3124 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
3125 return 0;
3126
3127 if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
3128
3129
3130
3131 print_bad_pte(vma, address, orig_pte, NULL);
3132 return VM_FAULT_SIGBUS;
3133 }
3134
3135 pgoff = pte_to_pgoff(orig_pte);
3136 return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
3137}
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152static inline int handle_pte_fault(struct mm_struct *mm,
3153 struct vm_area_struct *vma, unsigned long address,
3154 pte_t *pte, pmd_t *pmd, unsigned int flags)
3155{
3156 pte_t entry;
3157 spinlock_t *ptl;
3158
3159 entry = *pte;
3160 if (!pte_present(entry)) {
3161 if (pte_none(entry)) {
3162 if (vma->vm_ops) {
3163 if (likely(vma->vm_ops->fault))
3164 return do_linear_fault(mm, vma, address,
3165 pte, pmd, flags, entry);
3166 }
3167 return do_anonymous_page(mm, vma, address,
3168 pte, pmd, flags);
3169 }
3170 if (pte_file(entry))
3171 return do_nonlinear_fault(mm, vma, address,
3172 pte, pmd, flags, entry);
3173 return do_swap_page(mm, vma, address,
3174 pte, pmd, flags, entry);
3175 }
3176
3177 ptl = pte_lockptr(mm, pmd);
3178 spin_lock(ptl);
3179 if (unlikely(!pte_same(*pte, entry)))
3180 goto unlock;
3181 if (flags & FAULT_FLAG_WRITE) {
3182 if (!pte_write(entry))
3183 return do_wp_page(mm, vma, address,
3184 pte, pmd, ptl, entry);
3185 entry = pte_mkdirty(entry);
3186 }
3187 entry = pte_mkyoung(entry);
3188 if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) {
3189 update_mmu_cache(vma, address, pte);
3190 } else {
3191
3192
3193
3194
3195
3196
3197 if (flags & FAULT_FLAG_WRITE)
3198 flush_tlb_fix_spurious_fault(vma, address);
3199 }
3200unlock:
3201 pte_unmap_unlock(pte, ptl);
3202 return 0;
3203}
3204
3205
3206
3207
3208int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3209 unsigned long address, unsigned int flags)
3210{
3211 pgd_t *pgd;
3212 pud_t *pud;
3213 pmd_t *pmd;
3214 pte_t *pte;
3215
3216 __set_current_state(TASK_RUNNING);
3217
3218 count_vm_event(PGFAULT);
3219
3220
3221 check_sync_rss_stat(current);
3222
3223 if (unlikely(is_vm_hugetlb_page(vma)))
3224 return hugetlb_fault(mm, vma, address, flags);
3225
3226 pgd = pgd_offset(mm, address);
3227 pud = pud_alloc(mm, pgd, address);
3228 if (!pud)
3229 return VM_FAULT_OOM;
3230 pmd = pmd_alloc(mm, pud, address);
3231 if (!pmd)
3232 return VM_FAULT_OOM;
3233 pte = pte_alloc_map(mm, pmd, address);
3234 if (!pte)
3235 return VM_FAULT_OOM;
3236
3237 return handle_pte_fault(mm, vma, address, pte, pmd, flags);
3238}
3239
3240#ifndef __PAGETABLE_PUD_FOLDED
3241
3242
3243
3244
3245int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
3246{
3247 pud_t *new = pud_alloc_one(mm, address);
3248 if (!new)
3249 return -ENOMEM;
3250
3251 smp_wmb();
3252
3253 spin_lock(&mm->page_table_lock);
3254 if (pgd_present(*pgd))
3255 pud_free(mm, new);
3256 else
3257 pgd_populate(mm, pgd, new);
3258 spin_unlock(&mm->page_table_lock);
3259 return 0;
3260}
3261#endif
3262
3263#ifndef __PAGETABLE_PMD_FOLDED
3264
3265
3266
3267
3268int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
3269{
3270 pmd_t *new = pmd_alloc_one(mm, address);
3271 if (!new)
3272 return -ENOMEM;
3273
3274 smp_wmb();
3275
3276 spin_lock(&mm->page_table_lock);
3277#ifndef __ARCH_HAS_4LEVEL_HACK
3278 if (pud_present(*pud))
3279 pmd_free(mm, new);
3280 else
3281 pud_populate(mm, pud, new);
3282#else
3283 if (pgd_present(*pud))
3284 pmd_free(mm, new);
3285 else
3286 pgd_populate(mm, pud, new);
3287#endif
3288 spin_unlock(&mm->page_table_lock);
3289 return 0;
3290}
3291#endif
3292
3293int make_pages_present(unsigned long addr, unsigned long end)
3294{
3295 int ret, len, write;
3296 struct vm_area_struct * vma;
3297
3298 vma = find_vma(current->mm, addr);
3299 if (!vma)
3300 return -ENOMEM;
3301 write = (vma->vm_flags & VM_WRITE) != 0;
3302 BUG_ON(addr >= end);
3303 BUG_ON(end > vma->vm_end);
3304 len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
3305 ret = get_user_pages(current, current->mm, addr,
3306 len, write, 0, NULL, NULL);
3307 if (ret < 0)
3308 return ret;
3309 return ret == len ? 0 : -EFAULT;
3310}
3311
3312#if !defined(__HAVE_ARCH_GATE_AREA)
3313
3314#if defined(AT_SYSINFO_EHDR)
3315static struct vm_area_struct gate_vma;
3316
3317static int __init gate_vma_init(void)
3318{
3319 gate_vma.vm_mm = NULL;
3320 gate_vma.vm_start = FIXADDR_USER_START;
3321 gate_vma.vm_end = FIXADDR_USER_END;
3322 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
3323 gate_vma.vm_page_prot = __P101;
3324
3325
3326
3327
3328
3329
3330 gate_vma.vm_flags |= VM_ALWAYSDUMP;
3331 return 0;
3332}
3333__initcall(gate_vma_init);
3334#endif
3335
3336struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
3337{
3338#ifdef AT_SYSINFO_EHDR
3339 return &gate_vma;
3340#else
3341 return NULL;
3342#endif
3343}
3344
3345int in_gate_area_no_task(unsigned long addr)
3346{
3347#ifdef AT_SYSINFO_EHDR
3348 if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
3349 return 1;
3350#endif
3351 return 0;
3352}
3353
3354#endif
3355
3356static int __follow_pte(struct mm_struct *mm, unsigned long address,
3357 pte_t **ptepp, spinlock_t **ptlp)
3358{
3359 pgd_t *pgd;
3360 pud_t *pud;
3361 pmd_t *pmd;
3362 pte_t *ptep;
3363
3364 pgd = pgd_offset(mm, address);
3365 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
3366 goto out;
3367
3368 pud = pud_offset(pgd, address);
3369 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
3370 goto out;
3371
3372 pmd = pmd_offset(pud, address);
3373 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
3374 goto out;
3375
3376
3377 if (pmd_huge(*pmd))
3378 goto out;
3379
3380 ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
3381 if (!ptep)
3382 goto out;
3383 if (!pte_present(*ptep))
3384 goto unlock;
3385 *ptepp = ptep;
3386 return 0;
3387unlock:
3388 pte_unmap_unlock(ptep, *ptlp);
3389out:
3390 return -EINVAL;
3391}
3392
3393static inline int follow_pte(struct mm_struct *mm, unsigned long address,
3394 pte_t **ptepp, spinlock_t **ptlp)
3395{
3396 int res;
3397
3398
3399 (void) __cond_lock(*ptlp,
3400 !(res = __follow_pte(mm, address, ptepp, ptlp)));
3401 return res;
3402}
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414int follow_pfn(struct vm_area_struct *vma, unsigned long address,
3415 unsigned long *pfn)
3416{
3417 int ret = -EINVAL;
3418 spinlock_t *ptl;
3419 pte_t *ptep;
3420
3421 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
3422 return ret;
3423
3424 ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
3425 if (ret)
3426 return ret;
3427 *pfn = pte_pfn(*ptep);
3428 pte_unmap_unlock(ptep, ptl);
3429 return 0;
3430}
3431EXPORT_SYMBOL(follow_pfn);
3432
3433#ifdef CONFIG_HAVE_IOREMAP_PROT
3434int follow_phys(struct vm_area_struct *vma,
3435 unsigned long address, unsigned int flags,
3436 unsigned long *prot, resource_size_t *phys)
3437{
3438 int ret = -EINVAL;
3439 pte_t *ptep, pte;
3440 spinlock_t *ptl;
3441
3442 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
3443 goto out;
3444
3445 if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
3446 goto out;
3447 pte = *ptep;
3448
3449 if ((flags & FOLL_WRITE) && !pte_write(pte))
3450 goto unlock;
3451
3452 *prot = pgprot_val(pte_pgprot(pte));
3453 *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
3454
3455 ret = 0;
3456unlock:
3457 pte_unmap_unlock(ptep, ptl);
3458out:
3459 return ret;
3460}
3461
3462int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
3463 void *buf, int len, int write)
3464{
3465 resource_size_t phys_addr;
3466 unsigned long prot = 0;
3467 void __iomem *maddr;
3468 int offset = addr & (PAGE_SIZE-1);
3469
3470 if (follow_phys(vma, addr, write, &prot, &phys_addr))
3471 return -EINVAL;
3472
3473 maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
3474 if (write)
3475 memcpy_toio(maddr + offset, buf, len);
3476 else
3477 memcpy_fromio(buf, maddr + offset, len);
3478 iounmap(maddr);
3479
3480 return len;
3481}
3482#endif
3483
3484
3485
3486
3487
3488
3489int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
3490{
3491 struct mm_struct *mm;
3492 struct vm_area_struct *vma;
3493 void *old_buf = buf;
3494
3495 mm = get_task_mm(tsk);
3496 if (!mm)
3497 return 0;
3498
3499 down_read(&mm->mmap_sem);
3500
3501 while (len) {
3502 int bytes, ret, offset;
3503 void *maddr;
3504 struct page *page = NULL;
3505
3506 ret = get_user_pages(tsk, mm, addr, 1,
3507 write, 1, &page, &vma);
3508 if (ret <= 0) {
3509
3510
3511
3512
3513#ifdef CONFIG_HAVE_IOREMAP_PROT
3514 vma = find_vma(mm, addr);
3515 if (!vma)
3516 break;
3517 if (vma->vm_ops && vma->vm_ops->access)
3518 ret = vma->vm_ops->access(vma, addr, buf,
3519 len, write);
3520 if (ret <= 0)
3521#endif
3522 break;
3523 bytes = ret;
3524 } else {
3525 bytes = len;
3526 offset = addr & (PAGE_SIZE-1);
3527 if (bytes > PAGE_SIZE-offset)
3528 bytes = PAGE_SIZE-offset;
3529
3530 maddr = kmap(page);
3531 if (write) {
3532 copy_to_user_page(vma, page, addr,
3533 maddr + offset, buf, bytes);
3534 set_page_dirty_lock(page);
3535 } else {
3536 copy_from_user_page(vma, page, addr,
3537 buf, maddr + offset, bytes);
3538 }
3539 kunmap(page);
3540 page_cache_release(page);
3541 }
3542 len -= bytes;
3543 buf += bytes;
3544 addr += bytes;
3545 }
3546 up_read(&mm->mmap_sem);
3547 mmput(mm);
3548
3549 return buf - old_buf;
3550}
3551
3552
3553
3554
3555void print_vma_addr(char *prefix, unsigned long ip)
3556{
3557 struct mm_struct *mm = current->mm;
3558 struct vm_area_struct *vma;
3559
3560
3561
3562
3563
3564 if (preempt_count())
3565 return;
3566
3567 down_read(&mm->mmap_sem);
3568 vma = find_vma(mm, ip);
3569 if (vma && vma->vm_file) {
3570 struct file *f = vma->vm_file;
3571 char *buf = (char *)__get_free_page(GFP_KERNEL);
3572 if (buf) {
3573 char *p, *s;
3574
3575 p = d_path(&f->f_path, buf, PAGE_SIZE);
3576 if (IS_ERR(p))
3577 p = "?";
3578 s = strrchr(p, '/');
3579 if (s)
3580 p = s+1;
3581 printk("%s%s[%lx+%lx]", prefix, p,
3582 vma->vm_start,
3583 vma->vm_end - vma->vm_start);
3584 free_page((unsigned long)buf);
3585 }
3586 }
3587 up_read(¤t->mm->mmap_sem);
3588}
3589
3590#ifdef CONFIG_PROVE_LOCKING
3591void might_fault(void)
3592{
3593
3594
3595
3596
3597
3598
3599 if (segment_eq(get_fs(), KERNEL_DS))
3600 return;
3601
3602 might_sleep();
3603
3604
3605
3606
3607
3608 if (!in_atomic() && current->mm)
3609 might_lock_read(¤t->mm->mmap_sem);
3610}
3611EXPORT_SYMBOL(might_fault);
3612#endif
3613