1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41#include <linux/kernel_stat.h>
42#include <linux/mm.h>
43#include <linux/hugetlb.h>
44#include <linux/mman.h>
45#include <linux/swap.h>
46#include <linux/highmem.h>
47#include <linux/pagemap.h>
48#include <linux/rmap.h>
49#include <linux/module.h>
50#include <linux/delayacct.h>
51#include <linux/init.h>
52#include <linux/writeback.h>
53
54#include <asm/pgalloc.h>
55#include <asm/uaccess.h>
56#include <asm/tlb.h>
57#include <asm/tlbflush.h>
58#include <asm/pgtable.h>
59
60#include <linux/swapops.h>
61#include <linux/elf.h>
62
63#ifndef CONFIG_NEED_MULTIPLE_NODES
64
65unsigned long max_mapnr;
66struct page *mem_map;
67
68EXPORT_SYMBOL(max_mapnr);
69EXPORT_SYMBOL(mem_map);
70#endif
71
72unsigned long num_physpages;
73
74
75
76
77
78
79
80void * high_memory;
81unsigned long vmalloc_earlyreserve;
82
83EXPORT_SYMBOL(num_physpages);
84EXPORT_SYMBOL(high_memory);
85EXPORT_SYMBOL(vmalloc_earlyreserve);
86
87int randomize_va_space __read_mostly = 1;
88
89static int __init disable_randmaps(char *s)
90{
91 randomize_va_space = 0;
92 return 1;
93}
94__setup("norandmaps", disable_randmaps);
95
96
97
98
99
100
101
102
103void pgd_clear_bad(pgd_t *pgd)
104{
105 pgd_ERROR(*pgd);
106 pgd_clear(pgd);
107}
108
109void pud_clear_bad(pud_t *pud)
110{
111 pud_ERROR(*pud);
112 pud_clear(pud);
113}
114
115void pmd_clear_bad(pmd_t *pmd)
116{
117 pmd_ERROR(*pmd);
118 pmd_clear(pmd);
119}
120
121
122
123
124
125static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
126{
127 struct page *page = pmd_page(*pmd);
128 pmd_clear(pmd);
129 pte_lock_deinit(page);
130 pte_free_tlb(tlb, page);
131 dec_zone_page_state(page, NR_PAGETABLE);
132 tlb->mm->nr_ptes--;
133}
134
135static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
136 unsigned long addr, unsigned long end,
137 unsigned long floor, unsigned long ceiling)
138{
139 pmd_t *pmd;
140 unsigned long next;
141 unsigned long start;
142
143 start = addr;
144 pmd = pmd_offset(pud, addr);
145 do {
146 next = pmd_addr_end(addr, end);
147 if (pmd_none_or_clear_bad(pmd))
148 continue;
149 free_pte_range(tlb, pmd);
150 } while (pmd++, addr = next, addr != end);
151
152 start &= PUD_MASK;
153 if (start < floor)
154 return;
155 if (ceiling) {
156 ceiling &= PUD_MASK;
157 if (!ceiling)
158 return;
159 }
160 if (end - 1 > ceiling - 1)
161 return;
162
163 pmd = pmd_offset(pud, start);
164 pud_clear(pud);
165 pmd_free_tlb(tlb, pmd);
166}
167
168static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
169 unsigned long addr, unsigned long end,
170 unsigned long floor, unsigned long ceiling)
171{
172 pud_t *pud;
173 unsigned long next;
174 unsigned long start;
175
176 start = addr;
177 pud = pud_offset(pgd, addr);
178 do {
179 next = pud_addr_end(addr, end);
180 if (pud_none_or_clear_bad(pud))
181 continue;
182 free_pmd_range(tlb, pud, addr, next, floor, ceiling);
183 } while (pud++, addr = next, addr != end);
184
185 start &= PGDIR_MASK;
186 if (start < floor)
187 return;
188 if (ceiling) {
189 ceiling &= PGDIR_MASK;
190 if (!ceiling)
191 return;
192 }
193 if (end - 1 > ceiling - 1)
194 return;
195
196 pud = pud_offset(pgd, start);
197 pgd_clear(pgd);
198 pud_free_tlb(tlb, pud);
199}
200
201
202
203
204
205
206void free_pgd_range(struct mmu_gather **tlb,
207 unsigned long addr, unsigned long end,
208 unsigned long floor, unsigned long ceiling)
209{
210 pgd_t *pgd;
211 unsigned long next;
212 unsigned long start;
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240 addr &= PMD_MASK;
241 if (addr < floor) {
242 addr += PMD_SIZE;
243 if (!addr)
244 return;
245 }
246 if (ceiling) {
247 ceiling &= PMD_MASK;
248 if (!ceiling)
249 return;
250 }
251 if (end - 1 > ceiling - 1)
252 end -= PMD_SIZE;
253 if (addr > end - 1)
254 return;
255
256 start = addr;
257 pgd = pgd_offset((*tlb)->mm, addr);
258 do {
259 next = pgd_addr_end(addr, end);
260 if (pgd_none_or_clear_bad(pgd))
261 continue;
262 free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
263 } while (pgd++, addr = next, addr != end);
264
265 if (!(*tlb)->fullmm)
266 flush_tlb_pgtables((*tlb)->mm, start, end);
267}
268
269void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
270 unsigned long floor, unsigned long ceiling)
271{
272 while (vma) {
273 struct vm_area_struct *next = vma->vm_next;
274 unsigned long addr = vma->vm_start;
275
276
277
278
279 anon_vma_unlink(vma);
280 unlink_file_vma(vma);
281
282 if (is_vm_hugetlb_page(vma)) {
283 hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
284 floor, next? next->vm_start: ceiling);
285 } else {
286
287
288
289 while (next && next->vm_start <= vma->vm_end + PMD_SIZE
290 && !is_vm_hugetlb_page(next)) {
291 vma = next;
292 next = vma->vm_next;
293 anon_vma_unlink(vma);
294 unlink_file_vma(vma);
295 }
296 free_pgd_range(tlb, addr, vma->vm_end,
297 floor, next? next->vm_start: ceiling);
298 }
299 vma = next;
300 }
301}
302
303int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
304{
305 struct page *new = pte_alloc_one(mm, address);
306 if (!new)
307 return -ENOMEM;
308
309 pte_lock_init(new);
310 spin_lock(&mm->page_table_lock);
311 if (pmd_present(*pmd)) {
312 pte_lock_deinit(new);
313 pte_free(new);
314 } else {
315 mm->nr_ptes++;
316 inc_zone_page_state(new, NR_PAGETABLE);
317 pmd_populate(mm, pmd, new);
318 }
319 spin_unlock(&mm->page_table_lock);
320 return 0;
321}
322
323int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
324{
325 pte_t *new = pte_alloc_one_kernel(&init_mm, address);
326 if (!new)
327 return -ENOMEM;
328
329 spin_lock(&init_mm.page_table_lock);
330 if (pmd_present(*pmd))
331 pte_free_kernel(new);
332 else
333 pmd_populate_kernel(&init_mm, pmd, new);
334 spin_unlock(&init_mm.page_table_lock);
335 return 0;
336}
337
338static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
339{
340 if (file_rss)
341 add_mm_counter(mm, file_rss, file_rss);
342 if (anon_rss)
343 add_mm_counter(mm, anon_rss, anon_rss);
344}
345
346
347
348
349
350
351
352
353void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
354{
355 printk(KERN_ERR "Bad pte = %08llx, process = %s, "
356 "vm_flags = %lx, vaddr = %lx\n",
357 (long long)pte_val(pte),
358 (vma->vm_mm == current->mm ? current->comm : "???"),
359 vma->vm_flags, vaddr);
360 dump_stack();
361}
362
363static inline int is_cow_mapping(unsigned int flags)
364{
365 return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
366}
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
389{
390 unsigned long pfn = pte_pfn(pte);
391
392 if (unlikely(vma->vm_flags & VM_PFNMAP)) {
393 unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
394 if (pfn == vma->vm_pgoff + off)
395 return NULL;
396 if (!is_cow_mapping(vma->vm_flags))
397 return NULL;
398 }
399
400
401
402
403
404
405
406 if (unlikely(!pfn_valid(pfn))) {
407 print_bad_pte(vma, pte, addr);
408 return NULL;
409 }
410
411
412
413
414
415
416
417
418 return pfn_to_page(pfn);
419}
420
421
422
423
424
425
426
427static inline void
428copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
429 pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
430 unsigned long addr, int *rss)
431{
432 unsigned long vm_flags = vma->vm_flags;
433 pte_t pte = *src_pte;
434 struct page *page;
435
436
437 if (unlikely(!pte_present(pte))) {
438 if (!pte_file(pte)) {
439 swp_entry_t entry = pte_to_swp_entry(pte);
440
441 swap_duplicate(entry);
442
443 if (unlikely(list_empty(&dst_mm->mmlist))) {
444 spin_lock(&mmlist_lock);
445 if (list_empty(&dst_mm->mmlist))
446 list_add(&dst_mm->mmlist,
447 &src_mm->mmlist);
448 spin_unlock(&mmlist_lock);
449 }
450 if (is_write_migration_entry(entry) &&
451 is_cow_mapping(vm_flags)) {
452
453
454
455
456 make_migration_entry_read(&entry);
457 pte = swp_entry_to_pte(entry);
458 set_pte_at(src_mm, addr, src_pte, pte);
459 }
460 }
461 goto out_set_pte;
462 }
463
464
465
466
467
468 if (is_cow_mapping(vm_flags)) {
469 ptep_set_wrprotect(src_mm, addr, src_pte);
470 pte = pte_wrprotect(pte);
471 }
472
473
474
475
476
477 if (vm_flags & VM_SHARED)
478 pte = pte_mkclean(pte);
479 pte = pte_mkold(pte);
480
481 page = vm_normal_page(vma, addr, pte);
482 if (page) {
483 get_page(page);
484 page_dup_rmap(page, vma, addr);
485 rss[!!PageAnon(page)]++;
486 }
487
488out_set_pte:
489 set_pte_at(dst_mm, addr, dst_pte, pte);
490}
491
492static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
493 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
494 unsigned long addr, unsigned long end)
495{
496 pte_t *src_pte, *dst_pte;
497 spinlock_t *src_ptl, *dst_ptl;
498 int progress = 0;
499 int rss[2];
500
501again:
502 rss[1] = rss[0] = 0;
503 dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
504 if (!dst_pte)
505 return -ENOMEM;
506 src_pte = pte_offset_map_nested(src_pmd, addr);
507 src_ptl = pte_lockptr(src_mm, src_pmd);
508 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
509 arch_enter_lazy_mmu_mode();
510
511 do {
512
513
514
515
516 if (progress >= 32) {
517 progress = 0;
518 if (need_resched() ||
519 need_lockbreak(src_ptl) ||
520 need_lockbreak(dst_ptl))
521 break;
522 }
523 if (pte_none(*src_pte)) {
524 progress++;
525 continue;
526 }
527 copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
528 progress += 8;
529 } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
530
531 arch_leave_lazy_mmu_mode();
532 spin_unlock(src_ptl);
533 pte_unmap_nested(src_pte - 1);
534 add_mm_rss(dst_mm, rss[0], rss[1]);
535 pte_unmap_unlock(dst_pte - 1, dst_ptl);
536 cond_resched();
537 if (addr != end)
538 goto again;
539 return 0;
540}
541
542static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
543 pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
544 unsigned long addr, unsigned long end)
545{
546 pmd_t *src_pmd, *dst_pmd;
547 unsigned long next;
548
549 dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
550 if (!dst_pmd)
551 return -ENOMEM;
552 src_pmd = pmd_offset(src_pud, addr);
553 do {
554 next = pmd_addr_end(addr, end);
555 if (pmd_none_or_clear_bad(src_pmd))
556 continue;
557 if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
558 vma, addr, next))
559 return -ENOMEM;
560 } while (dst_pmd++, src_pmd++, addr = next, addr != end);
561 return 0;
562}
563
564static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
565 pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
566 unsigned long addr, unsigned long end)
567{
568 pud_t *src_pud, *dst_pud;
569 unsigned long next;
570
571 dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
572 if (!dst_pud)
573 return -ENOMEM;
574 src_pud = pud_offset(src_pgd, addr);
575 do {
576 next = pud_addr_end(addr, end);
577 if (pud_none_or_clear_bad(src_pud))
578 continue;
579 if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
580 vma, addr, next))
581 return -ENOMEM;
582 } while (dst_pud++, src_pud++, addr = next, addr != end);
583 return 0;
584}
585
586int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
587 struct vm_area_struct *vma)
588{
589 pgd_t *src_pgd, *dst_pgd;
590 unsigned long next;
591 unsigned long addr = vma->vm_start;
592 unsigned long end = vma->vm_end;
593
594
595
596
597
598
599
600 if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) {
601 if (!vma->anon_vma)
602 return 0;
603 }
604
605 if (is_vm_hugetlb_page(vma))
606 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
607
608 dst_pgd = pgd_offset(dst_mm, addr);
609 src_pgd = pgd_offset(src_mm, addr);
610 do {
611 next = pgd_addr_end(addr, end);
612 if (pgd_none_or_clear_bad(src_pgd))
613 continue;
614 if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
615 vma, addr, next))
616 return -ENOMEM;
617 } while (dst_pgd++, src_pgd++, addr = next, addr != end);
618 return 0;
619}
620
621static unsigned long zap_pte_range(struct mmu_gather *tlb,
622 struct vm_area_struct *vma, pmd_t *pmd,
623 unsigned long addr, unsigned long end,
624 long *zap_work, struct zap_details *details)
625{
626 struct mm_struct *mm = tlb->mm;
627 pte_t *pte;
628 spinlock_t *ptl;
629 int file_rss = 0;
630 int anon_rss = 0;
631
632 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
633 arch_enter_lazy_mmu_mode();
634 do {
635 pte_t ptent = *pte;
636 if (pte_none(ptent)) {
637 (*zap_work)--;
638 continue;
639 }
640
641 (*zap_work) -= PAGE_SIZE;
642
643 if (pte_present(ptent)) {
644 struct page *page;
645
646 page = vm_normal_page(vma, addr, ptent);
647 if (unlikely(details) && page) {
648
649
650
651
652
653 if (details->check_mapping &&
654 details->check_mapping != page->mapping)
655 continue;
656
657
658
659
660 if (details->nonlinear_vma &&
661 (page->index < details->first_index ||
662 page->index > details->last_index))
663 continue;
664 }
665 ptent = ptep_get_and_clear_full(mm, addr, pte,
666 tlb->fullmm);
667 tlb_remove_tlb_entry(tlb, pte, addr);
668 if (unlikely(!page))
669 continue;
670 if (unlikely(details) && details->nonlinear_vma
671 && linear_page_index(details->nonlinear_vma,
672 addr) != page->index)
673 set_pte_at(mm, addr, pte,
674 pgoff_to_pte(page->index));
675 if (PageAnon(page))
676 anon_rss--;
677 else {
678 if (pte_dirty(ptent))
679 set_page_dirty(page);
680 if (pte_young(ptent))
681 SetPageReferenced(page);
682 file_rss--;
683 }
684 page_remove_rmap(page, vma);
685 tlb_remove_page(tlb, page);
686 continue;
687 }
688
689
690
691
692 if (unlikely(details))
693 continue;
694 if (!pte_file(ptent))
695 free_swap_and_cache(pte_to_swp_entry(ptent));
696 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
697 } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
698
699 add_mm_rss(mm, file_rss, anon_rss);
700 arch_leave_lazy_mmu_mode();
701 pte_unmap_unlock(pte - 1, ptl);
702
703 return addr;
704}
705
706static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
707 struct vm_area_struct *vma, pud_t *pud,
708 unsigned long addr, unsigned long end,
709 long *zap_work, struct zap_details *details)
710{
711 pmd_t *pmd;
712 unsigned long next;
713
714 pmd = pmd_offset(pud, addr);
715 do {
716 next = pmd_addr_end(addr, end);
717 if (pmd_none_or_clear_bad(pmd)) {
718 (*zap_work)--;
719 continue;
720 }
721 next = zap_pte_range(tlb, vma, pmd, addr, next,
722 zap_work, details);
723 } while (pmd++, addr = next, (addr != end && *zap_work > 0));
724
725 return addr;
726}
727
728static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
729 struct vm_area_struct *vma, pgd_t *pgd,
730 unsigned long addr, unsigned long end,
731 long *zap_work, struct zap_details *details)
732{
733 pud_t *pud;
734 unsigned long next;
735
736 pud = pud_offset(pgd, addr);
737 do {
738 next = pud_addr_end(addr, end);
739 if (pud_none_or_clear_bad(pud)) {
740 (*zap_work)--;
741 continue;
742 }
743 next = zap_pmd_range(tlb, vma, pud, addr, next,
744 zap_work, details);
745 } while (pud++, addr = next, (addr != end && *zap_work > 0));
746
747 return addr;
748}
749
750static unsigned long unmap_page_range(struct mmu_gather *tlb,
751 struct vm_area_struct *vma,
752 unsigned long addr, unsigned long end,
753 long *zap_work, struct zap_details *details)
754{
755 pgd_t *pgd;
756 unsigned long next;
757
758 if (details && !details->check_mapping && !details->nonlinear_vma)
759 details = NULL;
760
761 BUG_ON(addr >= end);
762 tlb_start_vma(tlb, vma);
763 pgd = pgd_offset(vma->vm_mm, addr);
764 do {
765 next = pgd_addr_end(addr, end);
766 if (pgd_none_or_clear_bad(pgd)) {
767 (*zap_work)--;
768 continue;
769 }
770 next = zap_pud_range(tlb, vma, pgd, addr, next,
771 zap_work, details);
772 } while (pgd++, addr = next, (addr != end && *zap_work > 0));
773 tlb_end_vma(tlb, vma);
774
775 return addr;
776}
777
778#ifdef CONFIG_PREEMPT
779# define ZAP_BLOCK_SIZE (8 * PAGE_SIZE)
780#else
781
782# define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE)
783#endif
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811unsigned long unmap_vmas(struct mmu_gather **tlbp,
812 struct vm_area_struct *vma, unsigned long start_addr,
813 unsigned long end_addr, unsigned long *nr_accounted,
814 struct zap_details *details)
815{
816 long zap_work = ZAP_BLOCK_SIZE;
817 unsigned long tlb_start = 0;
818 int tlb_start_valid = 0;
819 unsigned long start = start_addr;
820 spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
821 int fullmm = (*tlbp)->fullmm;
822
823 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
824 unsigned long end;
825
826 start = max(vma->vm_start, start_addr);
827 if (start >= vma->vm_end)
828 continue;
829 end = min(vma->vm_end, end_addr);
830 if (end <= vma->vm_start)
831 continue;
832
833 if (vma->vm_flags & VM_ACCOUNT)
834 *nr_accounted += (end - start) >> PAGE_SHIFT;
835
836 while (start != end) {
837 if (!tlb_start_valid) {
838 tlb_start = start;
839 tlb_start_valid = 1;
840 }
841
842 if (unlikely(is_vm_hugetlb_page(vma))) {
843 unmap_hugepage_range(vma, start, end);
844 zap_work -= (end - start) /
845 (HPAGE_SIZE / PAGE_SIZE);
846 start = end;
847 } else
848 start = unmap_page_range(*tlbp, vma,
849 start, end, &zap_work, details);
850
851 if (zap_work > 0) {
852 BUG_ON(start != end);
853 break;
854 }
855
856 tlb_finish_mmu(*tlbp, tlb_start, start);
857
858 if (need_resched() ||
859 (i_mmap_lock && need_lockbreak(i_mmap_lock))) {
860 if (i_mmap_lock) {
861 *tlbp = NULL;
862 goto out;
863 }
864 cond_resched();
865 }
866
867 *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
868 tlb_start_valid = 0;
869 zap_work = ZAP_BLOCK_SIZE;
870 }
871 }
872out:
873 return start;
874}
875
876
877
878
879
880
881
882
883unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
884 unsigned long size, struct zap_details *details)
885{
886 struct mm_struct *mm = vma->vm_mm;
887 struct mmu_gather *tlb;
888 unsigned long end = address + size;
889 unsigned long nr_accounted = 0;
890
891 lru_add_drain();
892 tlb = tlb_gather_mmu(mm, 0);
893 update_hiwater_rss(mm);
894 end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
895 if (tlb)
896 tlb_finish_mmu(tlb, address, end);
897 return end;
898}
899
900
901
902
903struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
904 unsigned int flags)
905{
906 pgd_t *pgd;
907 pud_t *pud;
908 pmd_t *pmd;
909 pte_t *ptep, pte;
910 spinlock_t *ptl;
911 struct page *page;
912 struct mm_struct *mm = vma->vm_mm;
913
914 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
915 if (!IS_ERR(page)) {
916 BUG_ON(flags & FOLL_GET);
917 goto out;
918 }
919
920 page = NULL;
921 pgd = pgd_offset(mm, address);
922 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
923 goto no_page_table;
924
925 pud = pud_offset(pgd, address);
926 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
927 goto no_page_table;
928
929 pmd = pmd_offset(pud, address);
930 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
931 goto no_page_table;
932
933 if (pmd_huge(*pmd)) {
934 BUG_ON(flags & FOLL_GET);
935 page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
936 goto out;
937 }
938
939 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
940 if (!ptep)
941 goto out;
942
943 pte = *ptep;
944 if (!pte_present(pte))
945 goto unlock;
946 if ((flags & FOLL_WRITE) && !pte_write(pte))
947 goto unlock;
948 page = vm_normal_page(vma, address, pte);
949 if (unlikely(!page))
950 goto unlock;
951
952 if (flags & FOLL_GET)
953 get_page(page);
954 if (flags & FOLL_TOUCH) {
955 if ((flags & FOLL_WRITE) &&
956 !pte_dirty(pte) && !PageDirty(page))
957 set_page_dirty(page);
958 mark_page_accessed(page);
959 }
960unlock:
961 pte_unmap_unlock(ptep, ptl);
962out:
963 return page;
964
965no_page_table:
966
967
968
969
970 if (flags & FOLL_ANON) {
971 page = ZERO_PAGE(address);
972 if (flags & FOLL_GET)
973 get_page(page);
974 BUG_ON(flags & FOLL_WRITE);
975 }
976 return page;
977}
978
979int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
980 unsigned long start, int len, int write, int force,
981 struct page **pages, struct vm_area_struct **vmas)
982{
983 int i;
984 unsigned int vm_flags;
985
986
987
988
989
990 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
991 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
992 i = 0;
993
994 do {
995 struct vm_area_struct *vma;
996 unsigned int foll_flags;
997
998 vma = find_extend_vma(mm, start);
999 if (!vma && in_gate_area(tsk, start)) {
1000 unsigned long pg = start & PAGE_MASK;
1001 struct vm_area_struct *gate_vma = get_gate_vma(tsk);
1002 pgd_t *pgd;
1003 pud_t *pud;
1004 pmd_t *pmd;
1005 pte_t *pte;
1006 if (write)
1007 return i ? : -EFAULT;
1008 if (pg > TASK_SIZE)
1009 pgd = pgd_offset_k(pg);
1010 else
1011 pgd = pgd_offset_gate(mm, pg);
1012 BUG_ON(pgd_none(*pgd));
1013 pud = pud_offset(pgd, pg);
1014 BUG_ON(pud_none(*pud));
1015 pmd = pmd_offset(pud, pg);
1016 if (pmd_none(*pmd))
1017 return i ? : -EFAULT;
1018 pte = pte_offset_map(pmd, pg);
1019 if (pte_none(*pte)) {
1020 pte_unmap(pte);
1021 return i ? : -EFAULT;
1022 }
1023 if (pages) {
1024 struct page *page = vm_normal_page(gate_vma, start, *pte);
1025 pages[i] = page;
1026 if (page)
1027 get_page(page);
1028 }
1029 pte_unmap(pte);
1030 if (vmas)
1031 vmas[i] = gate_vma;
1032 i++;
1033 start += PAGE_SIZE;
1034 len--;
1035 continue;
1036 }
1037
1038 if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
1039 || !(vm_flags & vma->vm_flags))
1040 return i ? : -EFAULT;
1041
1042 if (is_vm_hugetlb_page(vma)) {
1043 i = follow_hugetlb_page(mm, vma, pages, vmas,
1044 &start, &len, i);
1045 continue;
1046 }
1047
1048 foll_flags = FOLL_TOUCH;
1049 if (pages)
1050 foll_flags |= FOLL_GET;
1051 if (!write && !(vma->vm_flags & VM_LOCKED) &&
1052 (!vma->vm_ops || !vma->vm_ops->nopage))
1053 foll_flags |= FOLL_ANON;
1054
1055 do {
1056 struct page *page;
1057
1058 if (write)
1059 foll_flags |= FOLL_WRITE;
1060
1061 cond_resched();
1062 while (!(page = follow_page(vma, start, foll_flags))) {
1063 int ret;
1064 ret = __handle_mm_fault(mm, vma, start,
1065 foll_flags & FOLL_WRITE);
1066
1067
1068
1069
1070
1071
1072 if (ret & VM_FAULT_WRITE)
1073 foll_flags &= ~FOLL_WRITE;
1074
1075 switch (ret & ~VM_FAULT_WRITE) {
1076 case VM_FAULT_MINOR:
1077 tsk->min_flt++;
1078 break;
1079 case VM_FAULT_MAJOR:
1080 tsk->maj_flt++;
1081 break;
1082 case VM_FAULT_SIGBUS:
1083 return i ? i : -EFAULT;
1084 case VM_FAULT_OOM:
1085 return i ? i : -ENOMEM;
1086 default:
1087 BUG();
1088 }
1089 cond_resched();
1090 }
1091 if (pages) {
1092 pages[i] = page;
1093
1094 flush_anon_page(vma, page, start);
1095 flush_dcache_page(page);
1096 }
1097 if (vmas)
1098 vmas[i] = vma;
1099 i++;
1100 start += PAGE_SIZE;
1101 len--;
1102 } while (len && start < vma->vm_end);
1103 } while (len);
1104 return i;
1105}
1106EXPORT_SYMBOL(get_user_pages);
1107
1108static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
1109 unsigned long addr, unsigned long end, pgprot_t prot)
1110{
1111 pte_t *pte;
1112 spinlock_t *ptl;
1113 int err = 0;
1114
1115 pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
1116 if (!pte)
1117 return -EAGAIN;
1118 arch_enter_lazy_mmu_mode();
1119 do {
1120 struct page *page = ZERO_PAGE(addr);
1121 pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
1122
1123 if (unlikely(!pte_none(*pte))) {
1124 err = -EEXIST;
1125 pte++;
1126 break;
1127 }
1128 page_cache_get(page);
1129 page_add_file_rmap(page);
1130 inc_mm_counter(mm, file_rss);
1131 set_pte_at(mm, addr, pte, zero_pte);
1132 } while (pte++, addr += PAGE_SIZE, addr != end);
1133 arch_leave_lazy_mmu_mode();
1134 pte_unmap_unlock(pte - 1, ptl);
1135 return err;
1136}
1137
1138static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
1139 unsigned long addr, unsigned long end, pgprot_t prot)
1140{
1141 pmd_t *pmd;
1142 unsigned long next;
1143 int err;
1144
1145 pmd = pmd_alloc(mm, pud, addr);
1146 if (!pmd)
1147 return -EAGAIN;
1148 do {
1149 next = pmd_addr_end(addr, end);
1150 err = zeromap_pte_range(mm, pmd, addr, next, prot);
1151 if (err)
1152 break;
1153 } while (pmd++, addr = next, addr != end);
1154 return err;
1155}
1156
1157static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
1158 unsigned long addr, unsigned long end, pgprot_t prot)
1159{
1160 pud_t *pud;
1161 unsigned long next;
1162 int err;
1163
1164 pud = pud_alloc(mm, pgd, addr);
1165 if (!pud)
1166 return -EAGAIN;
1167 do {
1168 next = pud_addr_end(addr, end);
1169 err = zeromap_pmd_range(mm, pud, addr, next, prot);
1170 if (err)
1171 break;
1172 } while (pud++, addr = next, addr != end);
1173 return err;
1174}
1175
1176int zeromap_page_range(struct vm_area_struct *vma,
1177 unsigned long addr, unsigned long size, pgprot_t prot)
1178{
1179 pgd_t *pgd;
1180 unsigned long next;
1181 unsigned long end = addr + size;
1182 struct mm_struct *mm = vma->vm_mm;
1183 int err;
1184
1185 BUG_ON(addr >= end);
1186 pgd = pgd_offset(mm, addr);
1187 flush_cache_range(vma, addr, end);
1188 do {
1189 next = pgd_addr_end(addr, end);
1190 err = zeromap_pud_range(mm, pgd, addr, next, prot);
1191 if (err)
1192 break;
1193 } while (pgd++, addr = next, addr != end);
1194 return err;
1195}
1196
1197pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)
1198{
1199 pgd_t * pgd = pgd_offset(mm, addr);
1200 pud_t * pud = pud_alloc(mm, pgd, addr);
1201 if (pud) {
1202 pmd_t * pmd = pmd_alloc(mm, pud, addr);
1203 if (pmd)
1204 return pte_alloc_map_lock(mm, pmd, addr, ptl);
1205 }
1206 return NULL;
1207}
1208
1209
1210
1211
1212
1213
1214
1215
1216static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *page, pgprot_t prot)
1217{
1218 int retval;
1219 pte_t *pte;
1220 spinlock_t *ptl;
1221
1222 retval = -EINVAL;
1223 if (PageAnon(page))
1224 goto out;
1225 retval = -ENOMEM;
1226 flush_dcache_page(page);
1227 pte = get_locked_pte(mm, addr, &ptl);
1228 if (!pte)
1229 goto out;
1230 retval = -EBUSY;
1231 if (!pte_none(*pte))
1232 goto out_unlock;
1233
1234
1235 get_page(page);
1236 inc_mm_counter(mm, file_rss);
1237 page_add_file_rmap(page);
1238 set_pte_at(mm, addr, pte, mk_pte(page, prot));
1239
1240 retval = 0;
1241out_unlock:
1242 pte_unmap_unlock(pte, ptl);
1243out:
1244 return retval;
1245}
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page)
1270{
1271 if (addr < vma->vm_start || addr >= vma->vm_end)
1272 return -EFAULT;
1273 if (!page_count(page))
1274 return -EINVAL;
1275 vma->vm_flags |= VM_INSERTPAGE;
1276 return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
1277}
1278EXPORT_SYMBOL(vm_insert_page);
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1293 unsigned long pfn)
1294{
1295 struct mm_struct *mm = vma->vm_mm;
1296 int retval;
1297 pte_t *pte, entry;
1298 spinlock_t *ptl;
1299
1300 BUG_ON(!(vma->vm_flags & VM_PFNMAP));
1301 BUG_ON(is_cow_mapping(vma->vm_flags));
1302
1303 retval = -ENOMEM;
1304 pte = get_locked_pte(mm, addr, &ptl);
1305 if (!pte)
1306 goto out;
1307 retval = -EBUSY;
1308 if (!pte_none(*pte))
1309 goto out_unlock;
1310
1311
1312 entry = pfn_pte(pfn, vma->vm_page_prot);
1313 set_pte_at(mm, addr, pte, entry);
1314 update_mmu_cache(vma, addr, entry);
1315
1316 retval = 0;
1317out_unlock:
1318 pte_unmap_unlock(pte, ptl);
1319
1320out:
1321 return retval;
1322}
1323EXPORT_SYMBOL(vm_insert_pfn);
1324
1325
1326
1327
1328
1329
1330static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
1331 unsigned long addr, unsigned long end,
1332 unsigned long pfn, pgprot_t prot)
1333{
1334 pte_t *pte;
1335 spinlock_t *ptl;
1336
1337 pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
1338 if (!pte)
1339 return -ENOMEM;
1340 arch_enter_lazy_mmu_mode();
1341 do {
1342 BUG_ON(!pte_none(*pte));
1343 set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
1344 pfn++;
1345 } while (pte++, addr += PAGE_SIZE, addr != end);
1346 arch_leave_lazy_mmu_mode();
1347 pte_unmap_unlock(pte - 1, ptl);
1348 return 0;
1349}
1350
1351static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
1352 unsigned long addr, unsigned long end,
1353 unsigned long pfn, pgprot_t prot)
1354{
1355 pmd_t *pmd;
1356 unsigned long next;
1357
1358 pfn -= addr >> PAGE_SHIFT;
1359 pmd = pmd_alloc(mm, pud, addr);
1360 if (!pmd)
1361 return -ENOMEM;
1362 do {
1363 next = pmd_addr_end(addr, end);
1364 if (remap_pte_range(mm, pmd, addr, next,
1365 pfn + (addr >> PAGE_SHIFT), prot))
1366 return -ENOMEM;
1367 } while (pmd++, addr = next, addr != end);
1368 return 0;
1369}
1370
1371static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
1372 unsigned long addr, unsigned long end,
1373 unsigned long pfn, pgprot_t prot)
1374{
1375 pud_t *pud;
1376 unsigned long next;
1377
1378 pfn -= addr >> PAGE_SHIFT;
1379 pud = pud_alloc(mm, pgd, addr);
1380 if (!pud)
1381 return -ENOMEM;
1382 do {
1383 next = pud_addr_end(addr, end);
1384 if (remap_pmd_range(mm, pud, addr, next,
1385 pfn + (addr >> PAGE_SHIFT), prot))
1386 return -ENOMEM;
1387 } while (pud++, addr = next, addr != end);
1388 return 0;
1389}
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1402 unsigned long pfn, unsigned long size, pgprot_t prot)
1403{
1404 pgd_t *pgd;
1405 unsigned long next;
1406 unsigned long end = addr + PAGE_ALIGN(size);
1407 struct mm_struct *mm = vma->vm_mm;
1408 int err;
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428 if (is_cow_mapping(vma->vm_flags)) {
1429 if (addr != vma->vm_start || end != vma->vm_end)
1430 return -EINVAL;
1431 vma->vm_pgoff = pfn;
1432 }
1433
1434 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
1435
1436 BUG_ON(addr >= end);
1437 pfn -= addr >> PAGE_SHIFT;
1438 pgd = pgd_offset(mm, addr);
1439 flush_cache_range(vma, addr, end);
1440 do {
1441 next = pgd_addr_end(addr, end);
1442 err = remap_pud_range(mm, pgd, addr, next,
1443 pfn + (addr >> PAGE_SHIFT), prot);
1444 if (err)
1445 break;
1446 } while (pgd++, addr = next, addr != end);
1447 return err;
1448}
1449EXPORT_SYMBOL(remap_pfn_range);
1450
1451static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
1452 unsigned long addr, unsigned long end,
1453 pte_fn_t fn, void *data)
1454{
1455 pte_t *pte;
1456 int err;
1457 struct page *pmd_page;
1458 spinlock_t *uninitialized_var(ptl);
1459
1460 pte = (mm == &init_mm) ?
1461 pte_alloc_kernel(pmd, addr) :
1462 pte_alloc_map_lock(mm, pmd, addr, &ptl);
1463 if (!pte)
1464 return -ENOMEM;
1465
1466 BUG_ON(pmd_huge(*pmd));
1467
1468 pmd_page = pmd_page(*pmd);
1469
1470 do {
1471 err = fn(pte, pmd_page, addr, data);
1472 if (err)
1473 break;
1474 } while (pte++, addr += PAGE_SIZE, addr != end);
1475
1476 if (mm != &init_mm)
1477 pte_unmap_unlock(pte-1, ptl);
1478 return err;
1479}
1480
1481static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
1482 unsigned long addr, unsigned long end,
1483 pte_fn_t fn, void *data)
1484{
1485 pmd_t *pmd;
1486 unsigned long next;
1487 int err;
1488
1489 pmd = pmd_alloc(mm, pud, addr);
1490 if (!pmd)
1491 return -ENOMEM;
1492 do {
1493 next = pmd_addr_end(addr, end);
1494 err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
1495 if (err)
1496 break;
1497 } while (pmd++, addr = next, addr != end);
1498 return err;
1499}
1500
1501static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
1502 unsigned long addr, unsigned long end,
1503 pte_fn_t fn, void *data)
1504{
1505 pud_t *pud;
1506 unsigned long next;
1507 int err;
1508
1509 pud = pud_alloc(mm, pgd, addr);
1510 if (!pud)
1511 return -ENOMEM;
1512 do {
1513 next = pud_addr_end(addr, end);
1514 err = apply_to_pmd_range(mm, pud, addr, next, fn, data);
1515 if (err)
1516 break;
1517 } while (pud++, addr = next, addr != end);
1518 return err;
1519}
1520
1521
1522
1523
1524
1525int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
1526 unsigned long size, pte_fn_t fn, void *data)
1527{
1528 pgd_t *pgd;
1529 unsigned long next;
1530 unsigned long end = addr + size;
1531 int err;
1532
1533 BUG_ON(addr >= end);
1534 pgd = pgd_offset(mm, addr);
1535 do {
1536 next = pgd_addr_end(addr, end);
1537 err = apply_to_pud_range(mm, pgd, addr, next, fn, data);
1538 if (err)
1539 break;
1540 } while (pgd++, addr = next, addr != end);
1541 return err;
1542}
1543EXPORT_SYMBOL_GPL(apply_to_page_range);
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
1555 pte_t *page_table, pte_t orig_pte)
1556{
1557 int same = 1;
1558#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
1559 if (sizeof(pte_t) > sizeof(unsigned long)) {
1560 spinlock_t *ptl = pte_lockptr(mm, pmd);
1561 spin_lock(ptl);
1562 same = pte_same(*page_table, orig_pte);
1563 spin_unlock(ptl);
1564 }
1565#endif
1566 pte_unmap(page_table);
1567 return same;
1568}
1569
1570
1571
1572
1573
1574
1575
1576static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
1577{
1578 if (likely(vma->vm_flags & VM_WRITE))
1579 pte = pte_mkwrite(pte);
1580 return pte;
1581}
1582
1583static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
1584{
1585
1586
1587
1588
1589
1590
1591 if (unlikely(!src)) {
1592 void *kaddr = kmap_atomic(dst, KM_USER0);
1593 void __user *uaddr = (void __user *)(va & PAGE_MASK);
1594
1595
1596
1597
1598
1599
1600
1601 if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
1602 memset(kaddr, 0, PAGE_SIZE);
1603 kunmap_atomic(kaddr, KM_USER0);
1604 flush_dcache_page(dst);
1605 return;
1606
1607 }
1608 copy_user_highpage(dst, src, va, vma);
1609}
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1630 unsigned long address, pte_t *page_table, pmd_t *pmd,
1631 spinlock_t *ptl, pte_t orig_pte)
1632{
1633 struct page *old_page, *new_page;
1634 pte_t entry;
1635 int reuse = 0, ret = VM_FAULT_MINOR;
1636 struct page *dirty_page = NULL;
1637
1638 old_page = vm_normal_page(vma, address, orig_pte);
1639 if (!old_page)
1640 goto gotten;
1641
1642
1643
1644
1645
1646 if (PageAnon(old_page)) {
1647 if (!TestSetPageLocked(old_page)) {
1648 reuse = can_share_swap_page(old_page);
1649 unlock_page(old_page);
1650 }
1651 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
1652 (VM_WRITE|VM_SHARED))) {
1653
1654
1655
1656
1657
1658 if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
1659
1660
1661
1662
1663
1664
1665
1666
1667 page_cache_get(old_page);
1668 pte_unmap_unlock(page_table, ptl);
1669
1670 if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
1671 goto unwritable_page;
1672
1673
1674
1675
1676
1677
1678
1679 page_table = pte_offset_map_lock(mm, pmd, address,
1680 &ptl);
1681 page_cache_release(old_page);
1682 if (!pte_same(*page_table, orig_pte))
1683 goto unlock;
1684 }
1685 dirty_page = old_page;
1686 get_page(dirty_page);
1687 reuse = 1;
1688 }
1689
1690 if (reuse) {
1691 flush_cache_page(vma, address, pte_pfn(orig_pte));
1692 entry = pte_mkyoung(orig_pte);
1693 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
1694 if (ptep_set_access_flags(vma, address, page_table, entry,1)) {
1695 update_mmu_cache(vma, address, entry);
1696 lazy_mmu_prot_update(entry);
1697 }
1698 ret |= VM_FAULT_WRITE;
1699 goto unlock;
1700 }
1701
1702
1703
1704
1705 page_cache_get(old_page);
1706gotten:
1707 pte_unmap_unlock(page_table, ptl);
1708
1709 if (unlikely(anon_vma_prepare(vma)))
1710 goto oom;
1711 if (old_page == ZERO_PAGE(address)) {
1712 new_page = alloc_zeroed_user_highpage(vma, address);
1713 if (!new_page)
1714 goto oom;
1715 } else {
1716 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
1717 if (!new_page)
1718 goto oom;
1719 cow_user_page(new_page, old_page, address, vma);
1720 }
1721
1722
1723
1724
1725 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
1726 if (likely(pte_same(*page_table, orig_pte))) {
1727 if (old_page) {
1728 page_remove_rmap(old_page, vma);
1729 if (!PageAnon(old_page)) {
1730 dec_mm_counter(mm, file_rss);
1731 inc_mm_counter(mm, anon_rss);
1732 }
1733 } else
1734 inc_mm_counter(mm, anon_rss);
1735 flush_cache_page(vma, address, pte_pfn(orig_pte));
1736 entry = mk_pte(new_page, vma->vm_page_prot);
1737 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
1738 lazy_mmu_prot_update(entry);
1739
1740
1741
1742
1743
1744
1745 ptep_clear_flush(vma, address, page_table);
1746 set_pte_at(mm, address, page_table, entry);
1747 update_mmu_cache(vma, address, entry);
1748 lru_cache_add_active(new_page);
1749 page_add_new_anon_rmap(new_page, vma, address);
1750
1751
1752 new_page = old_page;
1753 ret |= VM_FAULT_WRITE;
1754 }
1755 if (new_page)
1756 page_cache_release(new_page);
1757 if (old_page)
1758 page_cache_release(old_page);
1759unlock:
1760 pte_unmap_unlock(page_table, ptl);
1761 if (dirty_page) {
1762 set_page_dirty_balance(dirty_page);
1763 put_page(dirty_page);
1764 }
1765 return ret;
1766oom:
1767 if (old_page)
1768 page_cache_release(old_page);
1769 return VM_FAULT_OOM;
1770
1771unwritable_page:
1772 page_cache_release(old_page);
1773 return VM_FAULT_SIGBUS;
1774}
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK))
1809
1810static void reset_vma_truncate_counts(struct address_space *mapping)
1811{
1812 struct vm_area_struct *vma;
1813 struct prio_tree_iter iter;
1814
1815 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
1816 vma->vm_truncate_count = 0;
1817 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
1818 vma->vm_truncate_count = 0;
1819}
1820
1821static int unmap_mapping_range_vma(struct vm_area_struct *vma,
1822 unsigned long start_addr, unsigned long end_addr,
1823 struct zap_details *details)
1824{
1825 unsigned long restart_addr;
1826 int need_break;
1827
1828again:
1829 restart_addr = vma->vm_truncate_count;
1830 if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
1831 start_addr = restart_addr;
1832 if (start_addr >= end_addr) {
1833
1834 vma->vm_truncate_count = details->truncate_count;
1835 return 0;
1836 }
1837 }
1838
1839 restart_addr = zap_page_range(vma, start_addr,
1840 end_addr - start_addr, details);
1841 need_break = need_resched() ||
1842 need_lockbreak(details->i_mmap_lock);
1843
1844 if (restart_addr >= end_addr) {
1845
1846 vma->vm_truncate_count = details->truncate_count;
1847 if (!need_break)
1848 return 0;
1849 } else {
1850
1851 vma->vm_truncate_count = restart_addr;
1852 if (!need_break)
1853 goto again;
1854 }
1855
1856 spin_unlock(details->i_mmap_lock);
1857 cond_resched();
1858 spin_lock(details->i_mmap_lock);
1859 return -EINTR;
1860}
1861
1862static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
1863 struct zap_details *details)
1864{
1865 struct vm_area_struct *vma;
1866 struct prio_tree_iter iter;
1867 pgoff_t vba, vea, zba, zea;
1868
1869restart:
1870 vma_prio_tree_foreach(vma, &iter, root,
1871 details->first_index, details->last_index) {
1872
1873 if (vma->vm_truncate_count == details->truncate_count)
1874 continue;
1875
1876 vba = vma->vm_pgoff;
1877 vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
1878
1879 zba = details->first_index;
1880 if (zba < vba)
1881 zba = vba;
1882 zea = details->last_index;
1883 if (zea > vea)
1884 zea = vea;
1885
1886 if (unmap_mapping_range_vma(vma,
1887 ((zba - vba) << PAGE_SHIFT) + vma->vm_start,
1888 ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,
1889 details) < 0)
1890 goto restart;
1891 }
1892}
1893
1894static inline void unmap_mapping_range_list(struct list_head *head,
1895 struct zap_details *details)
1896{
1897 struct vm_area_struct *vma;
1898
1899
1900
1901
1902
1903
1904
1905restart:
1906 list_for_each_entry(vma, head, shared.vm_set.list) {
1907
1908 if (vma->vm_truncate_count == details->truncate_count)
1909 continue;
1910 details->nonlinear_vma = vma;
1911 if (unmap_mapping_range_vma(vma, vma->vm_start,
1912 vma->vm_end, details) < 0)
1913 goto restart;
1914 }
1915}
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931void unmap_mapping_range(struct address_space *mapping,
1932 loff_t const holebegin, loff_t const holelen, int even_cows)
1933{
1934 struct zap_details details;
1935 pgoff_t hba = holebegin >> PAGE_SHIFT;
1936 pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
1937
1938
1939 if (sizeof(holelen) > sizeof(hlen)) {
1940 long long holeend =
1941 (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
1942 if (holeend & ~(long long)ULONG_MAX)
1943 hlen = ULONG_MAX - hba + 1;
1944 }
1945
1946 details.check_mapping = even_cows? NULL: mapping;
1947 details.nonlinear_vma = NULL;
1948 details.first_index = hba;
1949 details.last_index = hba + hlen - 1;
1950 if (details.last_index < details.first_index)
1951 details.last_index = ULONG_MAX;
1952 details.i_mmap_lock = &mapping->i_mmap_lock;
1953
1954 spin_lock(&mapping->i_mmap_lock);
1955
1956
1957 smp_wmb();
1958
1959 mapping->truncate_count++;
1960
1961
1962
1963
1964
1965
1966 smp_mb();
1967 if (unlikely(is_restart_addr(mapping->truncate_count))) {
1968 if (mapping->truncate_count == 0)
1969 reset_vma_truncate_counts(mapping);
1970 mapping->truncate_count++;
1971 }
1972 details.truncate_count = mapping->truncate_count;
1973
1974 if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
1975 unmap_mapping_range_tree(&mapping->i_mmap, &details);
1976 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
1977 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
1978 spin_unlock(&mapping->i_mmap_lock);
1979}
1980EXPORT_SYMBOL(unmap_mapping_range);
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991int vmtruncate(struct inode * inode, loff_t offset)
1992{
1993 struct address_space *mapping = inode->i_mapping;
1994 unsigned long limit;
1995
1996 if (inode->i_size < offset)
1997 goto do_expand;
1998
1999
2000
2001
2002 if (IS_SWAPFILE(inode))
2003 goto out_busy;
2004 i_size_write(inode, offset);
2005 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
2006 truncate_inode_pages(mapping, offset);
2007 goto out_truncate;
2008
2009do_expand:
2010 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
2011 if (limit != RLIM_INFINITY && offset > limit)
2012 goto out_sig;
2013 if (offset > inode->i_sb->s_maxbytes)
2014 goto out_big;
2015 i_size_write(inode, offset);
2016
2017out_truncate:
2018 if (inode->i_op && inode->i_op->truncate)
2019 inode->i_op->truncate(inode);
2020 return 0;
2021out_sig:
2022 send_sig(SIGXFSZ, current, 0);
2023out_big:
2024 return -EFBIG;
2025out_busy:
2026 return -ETXTBSY;
2027}
2028EXPORT_SYMBOL(vmtruncate);
2029
2030int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
2031{
2032 struct address_space *mapping = inode->i_mapping;
2033
2034
2035
2036
2037
2038
2039 if (!inode->i_op || !inode->i_op->truncate_range)
2040 return -ENOSYS;
2041
2042 mutex_lock(&inode->i_mutex);
2043 down_write(&inode->i_alloc_sem);
2044 unmap_mapping_range(mapping, offset, (end - offset), 1);
2045 truncate_inode_pages_range(mapping, offset, end);
2046 inode->i_op->truncate_range(inode, offset, end);
2047 up_write(&inode->i_alloc_sem);
2048 mutex_unlock(&inode->i_mutex);
2049
2050 return 0;
2051}
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069void swapin_readahead(swp_entry_t entry, unsigned long addr,struct vm_area_struct *vma)
2070{
2071#ifdef CONFIG_NUMA
2072 struct vm_area_struct *next_vma = vma ? vma->vm_next : NULL;
2073#endif
2074 int i, num;
2075 struct page *new_page;
2076 unsigned long offset;
2077
2078
2079
2080
2081 num = valid_swaphandles(entry, &offset);
2082 for (i = 0; i < num; offset++, i++) {
2083
2084 new_page = read_swap_cache_async(swp_entry(swp_type(entry),
2085 offset), vma, addr);
2086 if (!new_page)
2087 break;
2088 page_cache_release(new_page);
2089#ifdef CONFIG_NUMA
2090
2091
2092
2093 addr += PAGE_SIZE;
2094 if (addr == 0)
2095 vma = NULL;
2096 if (vma) {
2097 if (addr >= vma->vm_end) {
2098 vma = next_vma;
2099 next_vma = vma ? vma->vm_next : NULL;
2100 }
2101 if (vma && addr < vma->vm_start)
2102 vma = NULL;
2103 } else {
2104 if (next_vma && addr >= next_vma->vm_start) {
2105 vma = next_vma;
2106 next_vma = vma->vm_next;
2107 }
2108 }
2109#endif
2110 }
2111 lru_add_drain();
2112}
2113
2114
2115
2116
2117
2118
2119static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2120 unsigned long address, pte_t *page_table, pmd_t *pmd,
2121 int write_access, pte_t orig_pte)
2122{
2123 spinlock_t *ptl;
2124 struct page *page;
2125 swp_entry_t entry;
2126 pte_t pte;
2127 int ret = VM_FAULT_MINOR;
2128
2129 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
2130 goto out;
2131
2132 entry = pte_to_swp_entry(orig_pte);
2133 if (is_migration_entry(entry)) {
2134 migration_entry_wait(mm, pmd, address);
2135 goto out;
2136 }
2137 delayacct_set_flag(DELAYACCT_PF_SWAPIN);
2138 page = lookup_swap_cache(entry);
2139 if (!page) {
2140 grab_swap_token();
2141 swapin_readahead(entry, address, vma);
2142 page = read_swap_cache_async(entry, vma, address);
2143 if (!page) {
2144
2145
2146
2147
2148 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2149 if (likely(pte_same(*page_table, orig_pte)))
2150 ret = VM_FAULT_OOM;
2151 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2152 goto unlock;
2153 }
2154
2155
2156 ret = VM_FAULT_MAJOR;
2157 count_vm_event(PGMAJFAULT);
2158 }
2159
2160 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2161 mark_page_accessed(page);
2162 lock_page(page);
2163
2164
2165
2166
2167 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2168 if (unlikely(!pte_same(*page_table, orig_pte)))
2169 goto out_nomap;
2170
2171 if (unlikely(!PageUptodate(page))) {
2172 ret = VM_FAULT_SIGBUS;
2173 goto out_nomap;
2174 }
2175
2176
2177
2178 inc_mm_counter(mm, anon_rss);
2179 pte = mk_pte(page, vma->vm_page_prot);
2180 if (write_access && can_share_swap_page(page)) {
2181 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
2182 write_access = 0;
2183 }
2184
2185 flush_icache_page(vma, page);
2186 set_pte_at(mm, address, page_table, pte);
2187 page_add_anon_rmap(page, vma, address);
2188
2189 swap_free(entry);
2190 if (vm_swap_full())
2191 remove_exclusive_swap_page(page);
2192 unlock_page(page);
2193
2194 if (write_access) {
2195 if (do_wp_page(mm, vma, address,
2196 page_table, pmd, ptl, pte) == VM_FAULT_OOM)
2197 ret = VM_FAULT_OOM;
2198 goto out;
2199 }
2200
2201
2202 update_mmu_cache(vma, address, pte);
2203 lazy_mmu_prot_update(pte);
2204unlock:
2205 pte_unmap_unlock(page_table, ptl);
2206out:
2207 return ret;
2208out_nomap:
2209 pte_unmap_unlock(page_table, ptl);
2210 unlock_page(page);
2211 page_cache_release(page);
2212 return ret;
2213}
2214
2215
2216
2217
2218
2219
2220static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2221 unsigned long address, pte_t *page_table, pmd_t *pmd,
2222 int write_access)
2223{
2224 struct page *page;
2225 spinlock_t *ptl;
2226 pte_t entry;
2227
2228 if (write_access) {
2229
2230 pte_unmap(page_table);
2231
2232 if (unlikely(anon_vma_prepare(vma)))
2233 goto oom;
2234 page = alloc_zeroed_user_highpage(vma, address);
2235 if (!page)
2236 goto oom;
2237
2238 entry = mk_pte(page, vma->vm_page_prot);
2239 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2240
2241 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2242 if (!pte_none(*page_table))
2243 goto release;
2244 inc_mm_counter(mm, anon_rss);
2245 lru_cache_add_active(page);
2246 page_add_new_anon_rmap(page, vma, address);
2247 } else {
2248
2249 page = ZERO_PAGE(address);
2250 page_cache_get(page);
2251 entry = mk_pte(page, vma->vm_page_prot);
2252
2253 ptl = pte_lockptr(mm, pmd);
2254 spin_lock(ptl);
2255 if (!pte_none(*page_table))
2256 goto release;
2257 inc_mm_counter(mm, file_rss);
2258 page_add_file_rmap(page);
2259 }
2260
2261 set_pte_at(mm, address, page_table, entry);
2262
2263
2264 update_mmu_cache(vma, address, entry);
2265 lazy_mmu_prot_update(entry);
2266unlock:
2267 pte_unmap_unlock(page_table, ptl);
2268 return VM_FAULT_MINOR;
2269release:
2270 page_cache_release(page);
2271 goto unlock;
2272oom:
2273 return VM_FAULT_OOM;
2274}
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2290 unsigned long address, pte_t *page_table, pmd_t *pmd,
2291 int write_access)
2292{
2293 spinlock_t *ptl;
2294 struct page *new_page;
2295 struct address_space *mapping = NULL;
2296 pte_t entry;
2297 unsigned int sequence = 0;
2298 int ret = VM_FAULT_MINOR;
2299 int anon = 0;
2300 struct page *dirty_page = NULL;
2301
2302 pte_unmap(page_table);
2303 BUG_ON(vma->vm_flags & VM_PFNMAP);
2304
2305 if (vma->vm_file) {
2306 mapping = vma->vm_file->f_mapping;
2307 sequence = mapping->truncate_count;
2308 smp_rmb();
2309 }
2310retry:
2311 new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321 if (unlikely(new_page == NOPAGE_SIGBUS))
2322 return VM_FAULT_SIGBUS;
2323 else if (unlikely(new_page == NOPAGE_OOM))
2324 return VM_FAULT_OOM;
2325 else if (unlikely(new_page == NOPAGE_REFAULT))
2326 return VM_FAULT_MINOR;
2327
2328
2329
2330
2331 if (write_access) {
2332 if (!(vma->vm_flags & VM_SHARED)) {
2333 struct page *page;
2334
2335 if (unlikely(anon_vma_prepare(vma)))
2336 goto oom;
2337 page = alloc_page_vma(GFP_HIGHUSER, vma, address);
2338 if (!page)
2339 goto oom;
2340 copy_user_highpage(page, new_page, address, vma);
2341 page_cache_release(new_page);
2342 new_page = page;
2343 anon = 1;
2344
2345 } else {
2346
2347
2348
2349 if (vma->vm_ops->page_mkwrite &&
2350 vma->vm_ops->page_mkwrite(vma, new_page) < 0
2351 ) {
2352 page_cache_release(new_page);
2353 return VM_FAULT_SIGBUS;
2354 }
2355 }
2356 }
2357
2358 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2359
2360
2361
2362
2363
2364 if (mapping && unlikely(sequence != mapping->truncate_count)) {
2365 pte_unmap_unlock(page_table, ptl);
2366 page_cache_release(new_page);
2367 cond_resched();
2368 sequence = mapping->truncate_count;
2369 smp_rmb();
2370 goto retry;
2371 }
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384 if (pte_none(*page_table)) {
2385 flush_icache_page(vma, new_page);
2386 entry = mk_pte(new_page, vma->vm_page_prot);
2387 if (write_access)
2388 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2389 set_pte_at(mm, address, page_table, entry);
2390 if (anon) {
2391 inc_mm_counter(mm, anon_rss);
2392 lru_cache_add_active(new_page);
2393 page_add_new_anon_rmap(new_page, vma, address);
2394 } else {
2395 inc_mm_counter(mm, file_rss);
2396 page_add_file_rmap(new_page);
2397 if (write_access) {
2398 dirty_page = new_page;
2399 get_page(dirty_page);
2400 }
2401 }
2402 } else {
2403
2404 page_cache_release(new_page);
2405 goto unlock;
2406 }
2407
2408
2409 update_mmu_cache(vma, address, entry);
2410 lazy_mmu_prot_update(entry);
2411unlock:
2412 pte_unmap_unlock(page_table, ptl);
2413 if (dirty_page) {
2414 set_page_dirty_balance(dirty_page);
2415 put_page(dirty_page);
2416 }
2417 return ret;
2418oom:
2419 page_cache_release(new_page);
2420 return VM_FAULT_OOM;
2421}
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
2440 unsigned long address, pte_t *page_table, pmd_t *pmd,
2441 int write_access)
2442{
2443 spinlock_t *ptl;
2444 pte_t entry;
2445 unsigned long pfn;
2446 int ret = VM_FAULT_MINOR;
2447
2448 pte_unmap(page_table);
2449 BUG_ON(!(vma->vm_flags & VM_PFNMAP));
2450 BUG_ON(is_cow_mapping(vma->vm_flags));
2451
2452 pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK);
2453 if (unlikely(pfn == NOPFN_OOM))
2454 return VM_FAULT_OOM;
2455 else if (unlikely(pfn == NOPFN_SIGBUS))
2456 return VM_FAULT_SIGBUS;
2457 else if (unlikely(pfn == NOPFN_REFAULT))
2458 return VM_FAULT_MINOR;
2459
2460 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2461
2462
2463 if (pte_none(*page_table)) {
2464 entry = pfn_pte(pfn, vma->vm_page_prot);
2465 if (write_access)
2466 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2467 set_pte_at(mm, address, page_table, entry);
2468 }
2469 pte_unmap_unlock(page_table, ptl);
2470 return ret;
2471}
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
2483 unsigned long address, pte_t *page_table, pmd_t *pmd,
2484 int write_access, pte_t orig_pte)
2485{
2486 pgoff_t pgoff;
2487 int err;
2488
2489 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
2490 return VM_FAULT_MINOR;
2491
2492 if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
2493
2494
2495
2496 print_bad_pte(vma, orig_pte, address);
2497 return VM_FAULT_OOM;
2498 }
2499
2500
2501 pgoff = pte_to_pgoff(orig_pte);
2502 err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
2503 vma->vm_page_prot, pgoff, 0);
2504 if (err == -ENOMEM)
2505 return VM_FAULT_OOM;
2506 if (err)
2507 return VM_FAULT_SIGBUS;
2508 return VM_FAULT_MAJOR;
2509}
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524static inline int handle_pte_fault(struct mm_struct *mm,
2525 struct vm_area_struct *vma, unsigned long address,
2526 pte_t *pte, pmd_t *pmd, int write_access)
2527{
2528 pte_t entry;
2529 spinlock_t *ptl;
2530
2531 entry = *pte;
2532 if (!pte_present(entry)) {
2533 if (pte_none(entry)) {
2534 if (vma->vm_ops) {
2535 if (vma->vm_ops->nopage)
2536 return do_no_page(mm, vma, address,
2537 pte, pmd,
2538 write_access);
2539 if (unlikely(vma->vm_ops->nopfn))
2540 return do_no_pfn(mm, vma, address, pte,
2541 pmd, write_access);
2542 }
2543 return do_anonymous_page(mm, vma, address,
2544 pte, pmd, write_access);
2545 }
2546 if (pte_file(entry))
2547 return do_file_page(mm, vma, address,
2548 pte, pmd, write_access, entry);
2549 return do_swap_page(mm, vma, address,
2550 pte, pmd, write_access, entry);
2551 }
2552
2553 ptl = pte_lockptr(mm, pmd);
2554 spin_lock(ptl);
2555 if (unlikely(!pte_same(*pte, entry)))
2556 goto unlock;
2557 if (write_access) {
2558 if (!pte_write(entry))
2559 return do_wp_page(mm, vma, address,
2560 pte, pmd, ptl, entry);
2561 entry = pte_mkdirty(entry);
2562 }
2563 entry = pte_mkyoung(entry);
2564 if (ptep_set_access_flags(vma, address, pte, entry, write_access)) {
2565 update_mmu_cache(vma, address, entry);
2566 lazy_mmu_prot_update(entry);
2567 } else {
2568
2569
2570
2571
2572
2573
2574 if (write_access)
2575 flush_tlb_page(vma, address);
2576 }
2577unlock:
2578 pte_unmap_unlock(pte, ptl);
2579 return VM_FAULT_MINOR;
2580}
2581
2582
2583
2584
2585int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2586 unsigned long address, int write_access)
2587{
2588 pgd_t *pgd;
2589 pud_t *pud;
2590 pmd_t *pmd;
2591 pte_t *pte;
2592
2593 __set_current_state(TASK_RUNNING);
2594
2595 count_vm_event(PGFAULT);
2596
2597 if (unlikely(is_vm_hugetlb_page(vma)))
2598 return hugetlb_fault(mm, vma, address, write_access);
2599
2600 pgd = pgd_offset(mm, address);
2601 pud = pud_alloc(mm, pgd, address);
2602 if (!pud)
2603 return VM_FAULT_OOM;
2604 pmd = pmd_alloc(mm, pud, address);
2605 if (!pmd)
2606 return VM_FAULT_OOM;
2607 pte = pte_alloc_map(mm, pmd, address);
2608 if (!pte)
2609 return VM_FAULT_OOM;
2610
2611 return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
2612}
2613
2614EXPORT_SYMBOL_GPL(__handle_mm_fault);
2615
2616#ifndef __PAGETABLE_PUD_FOLDED
2617
2618
2619
2620
2621int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
2622{
2623 pud_t *new = pud_alloc_one(mm, address);
2624 if (!new)
2625 return -ENOMEM;
2626
2627 spin_lock(&mm->page_table_lock);
2628 if (pgd_present(*pgd))
2629 pud_free(new);
2630 else
2631 pgd_populate(mm, pgd, new);
2632 spin_unlock(&mm->page_table_lock);
2633 return 0;
2634}
2635#endif
2636
2637#ifndef __PAGETABLE_PMD_FOLDED
2638
2639
2640
2641
2642int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
2643{
2644 pmd_t *new = pmd_alloc_one(mm, address);
2645 if (!new)
2646 return -ENOMEM;
2647
2648 spin_lock(&mm->page_table_lock);
2649#ifndef __ARCH_HAS_4LEVEL_HACK
2650 if (pud_present(*pud))
2651 pmd_free(new);
2652 else
2653 pud_populate(mm, pud, new);
2654#else
2655 if (pgd_present(*pud))
2656 pmd_free(new);
2657 else
2658 pgd_populate(mm, pud, new);
2659#endif
2660 spin_unlock(&mm->page_table_lock);
2661 return 0;
2662}
2663#endif
2664
2665int make_pages_present(unsigned long addr, unsigned long end)
2666{
2667 int ret, len, write;
2668 struct vm_area_struct * vma;
2669
2670 vma = find_vma(current->mm, addr);
2671 if (!vma)
2672 return -1;
2673 write = (vma->vm_flags & VM_WRITE) != 0;
2674 BUG_ON(addr >= end);
2675 BUG_ON(end > vma->vm_end);
2676 len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
2677 ret = get_user_pages(current, current->mm, addr,
2678 len, write, 0, NULL, NULL);
2679 if (ret < 0)
2680 return ret;
2681 return ret == len ? 0 : -1;
2682}
2683
2684
2685
2686
2687struct page * vmalloc_to_page(void * vmalloc_addr)
2688{
2689 unsigned long addr = (unsigned long) vmalloc_addr;
2690 struct page *page = NULL;
2691 pgd_t *pgd = pgd_offset_k(addr);
2692 pud_t *pud;
2693 pmd_t *pmd;
2694 pte_t *ptep, pte;
2695
2696 if (!pgd_none(*pgd)) {
2697 pud = pud_offset(pgd, addr);
2698 if (!pud_none(*pud)) {
2699 pmd = pmd_offset(pud, addr);
2700 if (!pmd_none(*pmd)) {
2701 ptep = pte_offset_map(pmd, addr);
2702 pte = *ptep;
2703 if (pte_present(pte))
2704 page = pte_page(pte);
2705 pte_unmap(ptep);
2706 }
2707 }
2708 }
2709 return page;
2710}
2711
2712EXPORT_SYMBOL(vmalloc_to_page);
2713
2714
2715
2716
2717unsigned long vmalloc_to_pfn(void * vmalloc_addr)
2718{
2719 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
2720}
2721
2722EXPORT_SYMBOL(vmalloc_to_pfn);
2723
2724#if !defined(__HAVE_ARCH_GATE_AREA)
2725
2726#if defined(AT_SYSINFO_EHDR)
2727static struct vm_area_struct gate_vma;
2728
2729static int __init gate_vma_init(void)
2730{
2731 gate_vma.vm_mm = NULL;
2732 gate_vma.vm_start = FIXADDR_USER_START;
2733 gate_vma.vm_end = FIXADDR_USER_END;
2734 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
2735 gate_vma.vm_page_prot = __P101;
2736
2737
2738
2739
2740
2741
2742 gate_vma.vm_flags |= VM_ALWAYSDUMP;
2743 return 0;
2744}
2745__initcall(gate_vma_init);
2746#endif
2747
2748struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
2749{
2750#ifdef AT_SYSINFO_EHDR
2751 return &gate_vma;
2752#else
2753 return NULL;
2754#endif
2755}
2756
2757int in_gate_area_no_task(unsigned long addr)
2758{
2759#ifdef AT_SYSINFO_EHDR
2760 if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
2761 return 1;
2762#endif
2763 return 0;
2764}
2765
2766#endif
2767
2768
2769
2770
2771
2772
2773int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
2774{
2775 struct mm_struct *mm;
2776 struct vm_area_struct *vma;
2777 struct page *page;
2778 void *old_buf = buf;
2779
2780 mm = get_task_mm(tsk);
2781 if (!mm)
2782 return 0;
2783
2784 down_read(&mm->mmap_sem);
2785
2786 while (len) {
2787 int bytes, ret, offset;
2788 void *maddr;
2789
2790 ret = get_user_pages(tsk, mm, addr, 1,
2791 write, 1, &page, &vma);
2792 if (ret <= 0)
2793 break;
2794
2795 bytes = len;
2796 offset = addr & (PAGE_SIZE-1);
2797 if (bytes > PAGE_SIZE-offset)
2798 bytes = PAGE_SIZE-offset;
2799
2800 maddr = kmap(page);
2801 if (write) {
2802 copy_to_user_page(vma, page, addr,
2803 maddr + offset, buf, bytes);
2804 set_page_dirty_lock(page);
2805 } else {
2806 copy_from_user_page(vma, page, addr,
2807 buf, maddr + offset, bytes);
2808 }
2809 kunmap(page);
2810 page_cache_release(page);
2811 len -= bytes;
2812 buf += bytes;
2813 addr += bytes;
2814 }
2815 up_read(&mm->mmap_sem);
2816 mmput(mm);
2817
2818 return buf - old_buf;
2819}
2820