1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41#include <linux/kernel_stat.h>
42#include <linux/mm.h>
43#include <linux/hugetlb.h>
44#include <linux/mman.h>
45#include <linux/swap.h>
46#include <linux/highmem.h>
47#include <linux/pagemap.h>
48#include <linux/rmap.h>
49#include <linux/acct.h>
50#include <linux/module.h>
51#include <linux/init.h>
52
53#include <asm/pgalloc.h>
54#include <asm/uaccess.h>
55#include <asm/tlb.h>
56#include <asm/tlbflush.h>
57#include <asm/pgtable.h>
58
59#include <linux/swapops.h>
60#include <linux/elf.h>
61
62#ifndef CONFIG_DISCONTIGMEM
63
64unsigned long max_mapnr;
65struct page *mem_map;
66
67EXPORT_SYMBOL(max_mapnr);
68EXPORT_SYMBOL(mem_map);
69#endif
70
71unsigned long num_physpages;
72
73
74
75
76
77
78
79void * high_memory;
80unsigned long vmalloc_earlyreserve;
81
82EXPORT_SYMBOL(num_physpages);
83EXPORT_SYMBOL(high_memory);
84EXPORT_SYMBOL(vmalloc_earlyreserve);
85
86
87
88
89
90static inline void clear_pmd_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long start, unsigned long end)
91{
92 struct page *page;
93
94 if (pmd_none(*pmd))
95 return;
96 if (unlikely(pmd_bad(*pmd))) {
97 pmd_ERROR(*pmd);
98 pmd_clear(pmd);
99 return;
100 }
101 if (!((start | end) & ~PMD_MASK)) {
102
103 page = pmd_page(*pmd);
104 pmd_clear(pmd);
105 dec_page_state(nr_page_table_pages);
106 tlb->mm->nr_ptes--;
107 pte_free_tlb(tlb, page);
108 }
109}
110
111static inline void clear_pud_range(struct mmu_gather *tlb, pud_t *pud, unsigned long start, unsigned long end)
112{
113 unsigned long addr = start, next;
114 pmd_t *pmd, *__pmd;
115
116 if (pud_none(*pud))
117 return;
118 if (unlikely(pud_bad(*pud))) {
119 pud_ERROR(*pud);
120 pud_clear(pud);
121 return;
122 }
123
124 pmd = __pmd = pmd_offset(pud, start);
125 do {
126 next = (addr + PMD_SIZE) & PMD_MASK;
127 if (next > end || next <= addr)
128 next = end;
129
130 clear_pmd_range(tlb, pmd, addr, next);
131 pmd++;
132 addr = next;
133 } while (addr && (addr < end));
134
135 if (!((start | end) & ~PUD_MASK)) {
136
137 pud_clear(pud);
138 pmd_free_tlb(tlb, __pmd);
139 }
140}
141
142
143static inline void clear_pgd_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start, unsigned long end)
144{
145 unsigned long addr = start, next;
146 pud_t *pud, *__pud;
147
148 if (pgd_none(*pgd))
149 return;
150 if (unlikely(pgd_bad(*pgd))) {
151 pgd_ERROR(*pgd);
152 pgd_clear(pgd);
153 return;
154 }
155
156 pud = __pud = pud_offset(pgd, start);
157 do {
158 next = (addr + PUD_SIZE) & PUD_MASK;
159 if (next > end || next <= addr)
160 next = end;
161
162 clear_pud_range(tlb, pud, addr, next);
163 pud++;
164 addr = next;
165 } while (addr && (addr < end));
166
167 if (!((start | end) & ~PGDIR_MASK)) {
168
169 pgd_clear(pgd);
170 pud_free_tlb(tlb, __pud);
171 }
172}
173
174
175
176
177
178
179void clear_page_range(struct mmu_gather *tlb, unsigned long start, unsigned long end)
180{
181 unsigned long addr = start, next;
182 pgd_t * pgd = pgd_offset(tlb->mm, start);
183 unsigned long i;
184
185 for (i = pgd_index(start); i <= pgd_index(end-1); i++) {
186 next = (addr + PGDIR_SIZE) & PGDIR_MASK;
187 if (next > end || next <= addr)
188 next = end;
189
190 clear_pgd_range(tlb, pgd, addr, next);
191 pgd++;
192 addr = next;
193 }
194}
195
196pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
197{
198 if (!pmd_present(*pmd)) {
199 struct page *new;
200
201 spin_unlock(&mm->page_table_lock);
202 new = pte_alloc_one(mm, address);
203 spin_lock(&mm->page_table_lock);
204 if (!new)
205 return NULL;
206
207
208
209
210 if (pmd_present(*pmd)) {
211 pte_free(new);
212 goto out;
213 }
214 mm->nr_ptes++;
215 inc_page_state(nr_page_table_pages);
216 pmd_populate(mm, pmd, new);
217 }
218out:
219 return pte_offset_map(pmd, address);
220}
221
222pte_t fastcall * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
223{
224 if (!pmd_present(*pmd)) {
225 pte_t *new;
226
227 spin_unlock(&mm->page_table_lock);
228 new = pte_alloc_one_kernel(mm, address);
229 spin_lock(&mm->page_table_lock);
230 if (!new)
231 return NULL;
232
233
234
235
236
237 if (pmd_present(*pmd)) {
238 pte_free_kernel(new);
239 goto out;
240 }
241 pmd_populate_kernel(mm, pmd, new);
242 }
243out:
244 return pte_offset_kernel(pmd, address);
245}
246
247
248
249
250
251
252
253
254
255
256static inline void
257copy_swap_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t pte)
258{
259 if (pte_file(pte))
260 return;
261 swap_duplicate(pte_to_swp_entry(pte));
262 if (list_empty(&dst_mm->mmlist)) {
263 spin_lock(&mmlist_lock);
264 list_add(&dst_mm->mmlist, &src_mm->mmlist);
265 spin_unlock(&mmlist_lock);
266 }
267}
268
269static inline void
270copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
271 pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
272 unsigned long addr)
273{
274 pte_t pte = *src_pte;
275 struct page *page;
276 unsigned long pfn;
277
278
279 if (!pte_present(pte)) {
280 copy_swap_pte(dst_mm, src_mm, pte);
281 set_pte(dst_pte, pte);
282 return;
283 }
284 pfn = pte_pfn(pte);
285
286
287
288
289
290 page = NULL;
291 if (pfn_valid(pfn))
292 page = pfn_to_page(pfn);
293
294 if (!page || PageReserved(page)) {
295 set_pte(dst_pte, pte);
296 return;
297 }
298
299
300
301
302
303 if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
304 ptep_set_wrprotect(src_pte);
305 pte = *src_pte;
306 }
307
308
309
310
311
312 if (vm_flags & VM_SHARED)
313 pte = pte_mkclean(pte);
314 pte = pte_mkold(pte);
315 get_page(page);
316 dst_mm->rss++;
317 if (PageAnon(page))
318 dst_mm->anon_rss++;
319 set_pte(dst_pte, pte);
320 page_dup_rmap(page);
321}
322
323static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
324 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
325 unsigned long addr, unsigned long end)
326{
327 pte_t *src_pte, *dst_pte;
328 pte_t *s, *d;
329 unsigned long vm_flags = vma->vm_flags;
330
331 d = dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
332 if (!dst_pte)
333 return -ENOMEM;
334
335 spin_lock(&src_mm->page_table_lock);
336 s = src_pte = pte_offset_map_nested(src_pmd, addr);
337 for (; addr < end; addr += PAGE_SIZE, s++, d++) {
338 if (pte_none(*s))
339 continue;
340 copy_one_pte(dst_mm, src_mm, d, s, vm_flags, addr);
341 }
342 pte_unmap_nested(src_pte);
343 pte_unmap(dst_pte);
344 spin_unlock(&src_mm->page_table_lock);
345 cond_resched_lock(&dst_mm->page_table_lock);
346 return 0;
347}
348
349static int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
350 pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
351 unsigned long addr, unsigned long end)
352{
353 pmd_t *src_pmd, *dst_pmd;
354 int err = 0;
355 unsigned long next;
356
357 src_pmd = pmd_offset(src_pud, addr);
358 dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
359 if (!dst_pmd)
360 return -ENOMEM;
361
362 for (; addr < end; addr = next, src_pmd++, dst_pmd++) {
363 next = (addr + PMD_SIZE) & PMD_MASK;
364 if (next > end || next <= addr)
365 next = end;
366 if (pmd_none(*src_pmd))
367 continue;
368 if (pmd_bad(*src_pmd)) {
369 pmd_ERROR(*src_pmd);
370 pmd_clear(src_pmd);
371 continue;
372 }
373 err = copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
374 vma, addr, next);
375 if (err)
376 break;
377 }
378 return err;
379}
380
381static int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
382 pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
383 unsigned long addr, unsigned long end)
384{
385 pud_t *src_pud, *dst_pud;
386 int err = 0;
387 unsigned long next;
388
389 src_pud = pud_offset(src_pgd, addr);
390 dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
391 if (!dst_pud)
392 return -ENOMEM;
393
394 for (; addr < end; addr = next, src_pud++, dst_pud++) {
395 next = (addr + PUD_SIZE) & PUD_MASK;
396 if (next > end || next <= addr)
397 next = end;
398 if (pud_none(*src_pud))
399 continue;
400 if (pud_bad(*src_pud)) {
401 pud_ERROR(*src_pud);
402 pud_clear(src_pud);
403 continue;
404 }
405 err = copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
406 vma, addr, next);
407 if (err)
408 break;
409 }
410 return err;
411}
412
413int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
414 struct vm_area_struct *vma)
415{
416 pgd_t *src_pgd, *dst_pgd;
417 unsigned long addr, start, end, next;
418 int err = 0;
419
420 if (is_vm_hugetlb_page(vma))
421 return copy_hugetlb_page_range(dst, src, vma);
422
423 start = vma->vm_start;
424 src_pgd = pgd_offset(src, start);
425 dst_pgd = pgd_offset(dst, start);
426
427 end = vma->vm_end;
428 addr = start;
429 while (addr && (addr < end-1)) {
430 next = (addr + PGDIR_SIZE) & PGDIR_MASK;
431 if (next > end || next <= addr)
432 next = end;
433 if (pgd_none(*src_pgd))
434 goto next_pgd;
435 if (pgd_bad(*src_pgd)) {
436 pgd_ERROR(*src_pgd);
437 pgd_clear(src_pgd);
438 goto next_pgd;
439 }
440 err = copy_pud_range(dst, src, dst_pgd, src_pgd,
441 vma, addr, next);
442 if (err)
443 break;
444
445next_pgd:
446 src_pgd++;
447 dst_pgd++;
448 addr = next;
449 }
450
451 return err;
452}
453
454static void zap_pte_range(struct mmu_gather *tlb,
455 pmd_t *pmd, unsigned long address,
456 unsigned long size, struct zap_details *details)
457{
458 unsigned long offset;
459 pte_t *ptep;
460
461 if (pmd_none(*pmd))
462 return;
463 if (unlikely(pmd_bad(*pmd))) {
464 pmd_ERROR(*pmd);
465 pmd_clear(pmd);
466 return;
467 }
468 ptep = pte_offset_map(pmd, address);
469 offset = address & ~PMD_MASK;
470 if (offset + size > PMD_SIZE)
471 size = PMD_SIZE - offset;
472 size &= PAGE_MASK;
473 if (details && !details->check_mapping && !details->nonlinear_vma)
474 details = NULL;
475 for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
476 pte_t pte = *ptep;
477 if (pte_none(pte))
478 continue;
479 if (pte_present(pte)) {
480 struct page *page = NULL;
481 unsigned long pfn = pte_pfn(pte);
482 if (pfn_valid(pfn)) {
483 page = pfn_to_page(pfn);
484 if (PageReserved(page))
485 page = NULL;
486 }
487 if (unlikely(details) && page) {
488
489
490
491
492
493 if (details->check_mapping &&
494 details->check_mapping != page->mapping)
495 continue;
496
497
498
499
500 if (details->nonlinear_vma &&
501 (page->index < details->first_index ||
502 page->index > details->last_index))
503 continue;
504 }
505 pte = ptep_get_and_clear(ptep);
506 tlb_remove_tlb_entry(tlb, ptep, address+offset);
507 if (unlikely(!page))
508 continue;
509 if (unlikely(details) && details->nonlinear_vma
510 && linear_page_index(details->nonlinear_vma,
511 address+offset) != page->index)
512 set_pte(ptep, pgoff_to_pte(page->index));
513 if (pte_dirty(pte))
514 set_page_dirty(page);
515 if (PageAnon(page))
516 tlb->mm->anon_rss--;
517 else if (pte_young(pte))
518 mark_page_accessed(page);
519 tlb->freed++;
520 page_remove_rmap(page);
521 tlb_remove_page(tlb, page);
522 continue;
523 }
524
525
526
527
528 if (unlikely(details))
529 continue;
530 if (!pte_file(pte))
531 free_swap_and_cache(pte_to_swp_entry(pte));
532 pte_clear(ptep);
533 }
534 pte_unmap(ptep-1);
535}
536
537static void zap_pmd_range(struct mmu_gather *tlb,
538 pud_t *pud, unsigned long address,
539 unsigned long size, struct zap_details *details)
540{
541 pmd_t * pmd;
542 unsigned long end;
543
544 if (pud_none(*pud))
545 return;
546 if (unlikely(pud_bad(*pud))) {
547 pud_ERROR(*pud);
548 pud_clear(pud);
549 return;
550 }
551 pmd = pmd_offset(pud, address);
552 end = address + size;
553 if (end > ((address + PUD_SIZE) & PUD_MASK))
554 end = ((address + PUD_SIZE) & PUD_MASK);
555 do {
556 zap_pte_range(tlb, pmd, address, end - address, details);
557 address = (address + PMD_SIZE) & PMD_MASK;
558 pmd++;
559 } while (address && (address < end));
560}
561
562static void zap_pud_range(struct mmu_gather *tlb,
563 pgd_t * pgd, unsigned long address,
564 unsigned long end, struct zap_details *details)
565{
566 pud_t * pud;
567
568 if (pgd_none(*pgd))
569 return;
570 if (unlikely(pgd_bad(*pgd))) {
571 pgd_ERROR(*pgd);
572 pgd_clear(pgd);
573 return;
574 }
575 pud = pud_offset(pgd, address);
576 do {
577 zap_pmd_range(tlb, pud, address, end - address, details);
578 address = (address + PUD_SIZE) & PUD_MASK;
579 pud++;
580 } while (address && (address < end));
581}
582
583static void unmap_page_range(struct mmu_gather *tlb,
584 struct vm_area_struct *vma, unsigned long address,
585 unsigned long end, struct zap_details *details)
586{
587 unsigned long next;
588 pgd_t *pgd;
589 int i;
590
591 BUG_ON(address >= end);
592 pgd = pgd_offset(vma->vm_mm, address);
593 tlb_start_vma(tlb, vma);
594 for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
595 next = (address + PGDIR_SIZE) & PGDIR_MASK;
596 if (next <= address || next > end)
597 next = end;
598 zap_pud_range(tlb, pgd, address, next, details);
599 address = next;
600 pgd++;
601 }
602 tlb_end_vma(tlb, vma);
603}
604
605#ifdef CONFIG_PREEMPT
606# define ZAP_BLOCK_SIZE (8 * PAGE_SIZE)
607#else
608
609# define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE)
610#endif
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
640 struct vm_area_struct *vma, unsigned long start_addr,
641 unsigned long end_addr, unsigned long *nr_accounted,
642 struct zap_details *details)
643{
644 unsigned long zap_bytes = ZAP_BLOCK_SIZE;
645 unsigned long tlb_start = 0;
646 int tlb_start_valid = 0;
647 int ret = 0;
648 spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
649 int fullmm = tlb_is_full_mm(*tlbp);
650
651 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
652 unsigned long start;
653 unsigned long end;
654
655 start = max(vma->vm_start, start_addr);
656 if (start >= vma->vm_end)
657 continue;
658 end = min(vma->vm_end, end_addr);
659 if (end <= vma->vm_start)
660 continue;
661
662 if (vma->vm_flags & VM_ACCOUNT)
663 *nr_accounted += (end - start) >> PAGE_SHIFT;
664
665 ret++;
666 while (start != end) {
667 unsigned long block;
668
669 if (!tlb_start_valid) {
670 tlb_start = start;
671 tlb_start_valid = 1;
672 }
673
674 if (is_vm_hugetlb_page(vma)) {
675 block = end - start;
676 unmap_hugepage_range(vma, start, end);
677 } else {
678 block = min(zap_bytes, end - start);
679 unmap_page_range(*tlbp, vma, start,
680 start + block, details);
681 }
682
683 start += block;
684 zap_bytes -= block;
685 if ((long)zap_bytes > 0)
686 continue;
687
688 tlb_finish_mmu(*tlbp, tlb_start, start);
689
690 if (need_resched() ||
691 need_lockbreak(&mm->page_table_lock) ||
692 (i_mmap_lock && need_lockbreak(i_mmap_lock))) {
693 if (i_mmap_lock) {
694
695 *tlbp = tlb_gather_mmu(mm, fullmm);
696 details->break_addr = start;
697 goto out;
698 }
699 spin_unlock(&mm->page_table_lock);
700 cond_resched();
701 spin_lock(&mm->page_table_lock);
702 }
703
704 *tlbp = tlb_gather_mmu(mm, fullmm);
705 tlb_start_valid = 0;
706 zap_bytes = ZAP_BLOCK_SIZE;
707 }
708 }
709out:
710 return ret;
711}
712
713
714
715
716
717
718
719
720void zap_page_range(struct vm_area_struct *vma, unsigned long address,
721 unsigned long size, struct zap_details *details)
722{
723 struct mm_struct *mm = vma->vm_mm;
724 struct mmu_gather *tlb;
725 unsigned long end = address + size;
726 unsigned long nr_accounted = 0;
727
728 if (is_vm_hugetlb_page(vma)) {
729 zap_hugepage_range(vma, address, size);
730 return;
731 }
732
733 lru_add_drain();
734 spin_lock(&mm->page_table_lock);
735 tlb = tlb_gather_mmu(mm, 0);
736 unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
737 tlb_finish_mmu(tlb, address, end);
738 acct_update_integrals();
739 spin_unlock(&mm->page_table_lock);
740}
741
742
743
744
745
746static struct page *
747__follow_page(struct mm_struct *mm, unsigned long address, int read, int write)
748{
749 pgd_t *pgd;
750 pud_t *pud;
751 pmd_t *pmd;
752 pte_t *ptep, pte;
753 unsigned long pfn;
754 struct page *page;
755
756 page = follow_huge_addr(mm, address, write);
757 if (! IS_ERR(page))
758 return page;
759
760 pgd = pgd_offset(mm, address);
761 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
762 goto out;
763
764 pud = pud_offset(pgd, address);
765 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
766 goto out;
767
768 pmd = pmd_offset(pud, address);
769 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
770 goto out;
771 if (pmd_huge(*pmd))
772 return follow_huge_pmd(mm, address, pmd, write);
773
774 ptep = pte_offset_map(pmd, address);
775 if (!ptep)
776 goto out;
777
778 pte = *ptep;
779 pte_unmap(ptep);
780 if (pte_present(pte)) {
781 if (write && !pte_write(pte))
782 goto out;
783 if (read && !pte_read(pte))
784 goto out;
785 pfn = pte_pfn(pte);
786 if (pfn_valid(pfn)) {
787 page = pfn_to_page(pfn);
788 if (write && !pte_dirty(pte) && !PageDirty(page))
789 set_page_dirty(page);
790 mark_page_accessed(page);
791 return page;
792 }
793 }
794
795out:
796 return NULL;
797}
798
799struct page *
800follow_page(struct mm_struct *mm, unsigned long address, int write)
801{
802 return __follow_page(mm, address, 0, write);
803}
804
805int
806check_user_page_readable(struct mm_struct *mm, unsigned long address)
807{
808 return __follow_page(mm, address, 1, 0) != NULL;
809}
810
811EXPORT_SYMBOL(check_user_page_readable);
812
813
814
815
816
817
818
819static inline struct page *get_page_map(struct page *page)
820{
821 if (!pfn_valid(page_to_pfn(page)))
822 return NULL;
823 return page;
824}
825
826
827static inline int
828untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma,
829 unsigned long address)
830{
831 pgd_t *pgd;
832 pud_t *pud;
833 pmd_t *pmd;
834
835
836 if (vma->vm_ops && vma->vm_ops->nopage)
837 return 0;
838
839
840 pgd = pgd_offset(mm, address);
841 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
842 return 1;
843
844 pud = pud_offset(pgd, address);
845 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
846 return 1;
847
848
849 pmd = pmd_offset(pud, address);
850 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
851 return 1;
852
853
854 return 0;
855}
856
857
858int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
859 unsigned long start, int len, int write, int force,
860 struct page **pages, struct vm_area_struct **vmas)
861{
862 int i;
863 unsigned int flags;
864
865
866
867
868
869 flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
870 flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
871 i = 0;
872
873 do {
874 struct vm_area_struct * vma;
875
876 vma = find_extend_vma(mm, start);
877 if (!vma && in_gate_area(tsk, start)) {
878 unsigned long pg = start & PAGE_MASK;
879 struct vm_area_struct *gate_vma = get_gate_vma(tsk);
880 pgd_t *pgd;
881 pud_t *pud;
882 pmd_t *pmd;
883 pte_t *pte;
884 if (write)
885 return i ? : -EFAULT;
886 if (pg > TASK_SIZE)
887 pgd = pgd_offset_k(pg);
888 else
889 pgd = pgd_offset_gate(mm, pg);
890 BUG_ON(pgd_none(*pgd));
891 pud = pud_offset(pgd, pg);
892 BUG_ON(pud_none(*pud));
893 pmd = pmd_offset(pud, pg);
894 BUG_ON(pmd_none(*pmd));
895 pte = pte_offset_map(pmd, pg);
896 BUG_ON(pte_none(*pte));
897 if (pages) {
898 pages[i] = pte_page(*pte);
899 get_page(pages[i]);
900 }
901 pte_unmap(pte);
902 if (vmas)
903 vmas[i] = gate_vma;
904 i++;
905 start += PAGE_SIZE;
906 len--;
907 continue;
908 }
909
910 if (!vma || (vma->vm_flags & VM_IO)
911 || !(flags & vma->vm_flags))
912 return i ? : -EFAULT;
913
914 if (is_vm_hugetlb_page(vma)) {
915 i = follow_hugetlb_page(mm, vma, pages, vmas,
916 &start, &len, i);
917 continue;
918 }
919 spin_lock(&mm->page_table_lock);
920 do {
921 struct page *map;
922 int lookup_write = write;
923
924 cond_resched_lock(&mm->page_table_lock);
925 while (!(map = follow_page(mm, start, lookup_write))) {
926
927
928
929
930
931
932
933 if (!lookup_write &&
934 untouched_anonymous_page(mm,vma,start)) {
935 map = ZERO_PAGE(start);
936 break;
937 }
938 spin_unlock(&mm->page_table_lock);
939 switch (handle_mm_fault(mm,vma,start,write)) {
940 case VM_FAULT_MINOR:
941 tsk->min_flt++;
942 break;
943 case VM_FAULT_MAJOR:
944 tsk->maj_flt++;
945 break;
946 case VM_FAULT_SIGBUS:
947 return i ? i : -EFAULT;
948 case VM_FAULT_OOM:
949 return i ? i : -ENOMEM;
950 default:
951 BUG();
952 }
953
954
955
956
957
958
959
960 lookup_write = write && !force;
961 spin_lock(&mm->page_table_lock);
962 }
963 if (pages) {
964 pages[i] = get_page_map(map);
965 if (!pages[i]) {
966 spin_unlock(&mm->page_table_lock);
967 while (i--)
968 page_cache_release(pages[i]);
969 i = -EFAULT;
970 goto out;
971 }
972 flush_dcache_page(pages[i]);
973 if (!PageReserved(pages[i]))
974 page_cache_get(pages[i]);
975 }
976 if (vmas)
977 vmas[i] = vma;
978 i++;
979 start += PAGE_SIZE;
980 len--;
981 } while(len && start < vma->vm_end);
982 spin_unlock(&mm->page_table_lock);
983 } while(len);
984out:
985 return i;
986}
987
988EXPORT_SYMBOL(get_user_pages);
989
990static void zeromap_pte_range(pte_t * pte, unsigned long address,
991 unsigned long size, pgprot_t prot)
992{
993 unsigned long end;
994
995 address &= ~PMD_MASK;
996 end = address + size;
997 if (end > PMD_SIZE)
998 end = PMD_SIZE;
999 do {
1000 pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
1001 BUG_ON(!pte_none(*pte));
1002 set_pte(pte, zero_pte);
1003 address += PAGE_SIZE;
1004 pte++;
1005 } while (address && (address < end));
1006}
1007
1008static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd,
1009 unsigned long address, unsigned long size, pgprot_t prot)
1010{
1011 unsigned long base, end;
1012
1013 base = address & PUD_MASK;
1014 address &= ~PUD_MASK;
1015 end = address + size;
1016 if (end > PUD_SIZE)
1017 end = PUD_SIZE;
1018 do {
1019 pte_t * pte = pte_alloc_map(mm, pmd, base + address);
1020 if (!pte)
1021 return -ENOMEM;
1022 zeromap_pte_range(pte, base + address, end - address, prot);
1023 pte_unmap(pte);
1024 address = (address + PMD_SIZE) & PMD_MASK;
1025 pmd++;
1026 } while (address && (address < end));
1027 return 0;
1028}
1029
1030static inline int zeromap_pud_range(struct mm_struct *mm, pud_t * pud,
1031 unsigned long address,
1032 unsigned long size, pgprot_t prot)
1033{
1034 unsigned long base, end;
1035 int error = 0;
1036
1037 base = address & PGDIR_MASK;
1038 address &= ~PGDIR_MASK;
1039 end = address + size;
1040 if (end > PGDIR_SIZE)
1041 end = PGDIR_SIZE;
1042 do {
1043 pmd_t * pmd = pmd_alloc(mm, pud, base + address);
1044 error = -ENOMEM;
1045 if (!pmd)
1046 break;
1047 error = zeromap_pmd_range(mm, pmd, base + address,
1048 end - address, prot);
1049 if (error)
1050 break;
1051 address = (address + PUD_SIZE) & PUD_MASK;
1052 pud++;
1053 } while (address && (address < end));
1054 return 0;
1055}
1056
1057int zeromap_page_range(struct vm_area_struct *vma, unsigned long address,
1058 unsigned long size, pgprot_t prot)
1059{
1060 int i;
1061 int error = 0;
1062 pgd_t * pgd;
1063 unsigned long beg = address;
1064 unsigned long end = address + size;
1065 unsigned long next;
1066 struct mm_struct *mm = vma->vm_mm;
1067
1068 pgd = pgd_offset(mm, address);
1069 flush_cache_range(vma, beg, end);
1070 BUG_ON(address >= end);
1071 BUG_ON(end > vma->vm_end);
1072
1073 spin_lock(&mm->page_table_lock);
1074 for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
1075 pud_t *pud = pud_alloc(mm, pgd, address);
1076 error = -ENOMEM;
1077 if (!pud)
1078 break;
1079 next = (address + PGDIR_SIZE) & PGDIR_MASK;
1080 if (next <= beg || next > end)
1081 next = end;
1082 error = zeromap_pud_range(mm, pud, address,
1083 next - address, prot);
1084 if (error)
1085 break;
1086 address = next;
1087 pgd++;
1088 }
1089
1090
1091
1092 flush_tlb_range(vma, beg, end);
1093 spin_unlock(&mm->page_table_lock);
1094 return error;
1095}
1096
1097
1098
1099
1100
1101
1102static inline void
1103remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
1104 unsigned long pfn, pgprot_t prot)
1105{
1106 unsigned long end;
1107
1108 address &= ~PMD_MASK;
1109 end = address + size;
1110 if (end > PMD_SIZE)
1111 end = PMD_SIZE;
1112 do {
1113 BUG_ON(!pte_none(*pte));
1114 if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
1115 set_pte(pte, pfn_pte(pfn, prot));
1116 address += PAGE_SIZE;
1117 pfn++;
1118 pte++;
1119 } while (address && (address < end));
1120}
1121
1122static inline int
1123remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
1124 unsigned long size, unsigned long pfn, pgprot_t prot)
1125{
1126 unsigned long base, end;
1127
1128 base = address & PUD_MASK;
1129 address &= ~PUD_MASK;
1130 end = address + size;
1131 if (end > PUD_SIZE)
1132 end = PUD_SIZE;
1133 pfn -= (address >> PAGE_SHIFT);
1134 do {
1135 pte_t * pte = pte_alloc_map(mm, pmd, base + address);
1136 if (!pte)
1137 return -ENOMEM;
1138 remap_pte_range(pte, base + address, end - address,
1139 (address >> PAGE_SHIFT) + pfn, prot);
1140 pte_unmap(pte);
1141 address = (address + PMD_SIZE) & PMD_MASK;
1142 pmd++;
1143 } while (address && (address < end));
1144 return 0;
1145}
1146
1147static inline int remap_pud_range(struct mm_struct *mm, pud_t * pud,
1148 unsigned long address, unsigned long size,
1149 unsigned long pfn, pgprot_t prot)
1150{
1151 unsigned long base, end;
1152 int error;
1153
1154 base = address & PGDIR_MASK;
1155 address &= ~PGDIR_MASK;
1156 end = address + size;
1157 if (end > PGDIR_SIZE)
1158 end = PGDIR_SIZE;
1159 pfn -= address >> PAGE_SHIFT;
1160 do {
1161 pmd_t *pmd = pmd_alloc(mm, pud, base+address);
1162 error = -ENOMEM;
1163 if (!pmd)
1164 break;
1165 error = remap_pmd_range(mm, pmd, base + address, end - address,
1166 (address >> PAGE_SHIFT) + pfn, prot);
1167 if (error)
1168 break;
1169 address = (address + PUD_SIZE) & PUD_MASK;
1170 pud++;
1171 } while (address && (address < end));
1172 return error;
1173}
1174
1175
1176int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
1177 unsigned long pfn, unsigned long size, pgprot_t prot)
1178{
1179 int error = 0;
1180 pgd_t *pgd;
1181 unsigned long beg = from;
1182 unsigned long end = from + size;
1183 unsigned long next;
1184 struct mm_struct *mm = vma->vm_mm;
1185 int i;
1186
1187 pfn -= from >> PAGE_SHIFT;
1188 pgd = pgd_offset(mm, from);
1189 flush_cache_range(vma, beg, end);
1190 BUG_ON(from >= end);
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200 vma->vm_flags |= VM_IO | VM_RESERVED;
1201
1202 spin_lock(&mm->page_table_lock);
1203 for (i = pgd_index(beg); i <= pgd_index(end-1); i++) {
1204 pud_t *pud = pud_alloc(mm, pgd, from);
1205 error = -ENOMEM;
1206 if (!pud)
1207 break;
1208 next = (from + PGDIR_SIZE) & PGDIR_MASK;
1209 if (next > end || next <= from)
1210 next = end;
1211 error = remap_pud_range(mm, pud, from, end - from,
1212 pfn + (from >> PAGE_SHIFT), prot);
1213 if (error)
1214 break;
1215 from = next;
1216 pgd++;
1217 }
1218
1219
1220
1221 flush_tlb_range(vma, beg, end);
1222 spin_unlock(&mm->page_table_lock);
1223
1224 return error;
1225}
1226
1227EXPORT_SYMBOL(remap_pfn_range);
1228
1229
1230
1231
1232
1233
1234
1235static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
1236{
1237 if (likely(vma->vm_flags & VM_WRITE))
1238 pte = pte_mkwrite(pte);
1239 return pte;
1240}
1241
1242
1243
1244
1245static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address,
1246 pte_t *page_table)
1247{
1248 pte_t entry;
1249
1250 flush_cache_page(vma, address);
1251 entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)),
1252 vma);
1253 ptep_establish(vma, address, page_table, entry);
1254 update_mmu_cache(vma, address, entry);
1255}
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1278 unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte)
1279{
1280 struct page *old_page, *new_page;
1281 unsigned long pfn = pte_pfn(pte);
1282 pte_t entry;
1283
1284 if (unlikely(!pfn_valid(pfn))) {
1285
1286
1287
1288
1289
1290 pte_unmap(page_table);
1291 printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n",
1292 address);
1293 spin_unlock(&mm->page_table_lock);
1294 return VM_FAULT_OOM;
1295 }
1296 old_page = pfn_to_page(pfn);
1297
1298 if (!TestSetPageLocked(old_page)) {
1299 int reuse = can_share_swap_page(old_page);
1300 unlock_page(old_page);
1301 if (reuse) {
1302 flush_cache_page(vma, address);
1303 entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
1304 vma);
1305 ptep_set_access_flags(vma, address, page_table, entry, 1);
1306 update_mmu_cache(vma, address, entry);
1307 pte_unmap(page_table);
1308 spin_unlock(&mm->page_table_lock);
1309 return VM_FAULT_MINOR;
1310 }
1311 }
1312 pte_unmap(page_table);
1313
1314
1315
1316
1317 if (!PageReserved(old_page))
1318 page_cache_get(old_page);
1319 spin_unlock(&mm->page_table_lock);
1320
1321 if (unlikely(anon_vma_prepare(vma)))
1322 goto no_new_page;
1323 if (old_page == ZERO_PAGE(address)) {
1324 new_page = alloc_zeroed_user_highpage(vma, address);
1325 if (!new_page)
1326 goto no_new_page;
1327 } else {
1328 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
1329 if (!new_page)
1330 goto no_new_page;
1331 copy_user_highpage(new_page, old_page, address);
1332 }
1333
1334
1335
1336 spin_lock(&mm->page_table_lock);
1337 page_table = pte_offset_map(pmd, address);
1338 if (likely(pte_same(*page_table, pte))) {
1339 if (PageAnon(old_page))
1340 mm->anon_rss--;
1341 if (PageReserved(old_page)) {
1342 ++mm->rss;
1343 acct_update_integrals();
1344 update_mem_hiwater();
1345 } else
1346 page_remove_rmap(old_page);
1347 break_cow(vma, new_page, address, page_table);
1348 lru_cache_add_active(new_page);
1349 page_add_anon_rmap(new_page, vma, address);
1350
1351
1352 new_page = old_page;
1353 }
1354 pte_unmap(page_table);
1355 page_cache_release(new_page);
1356 page_cache_release(old_page);
1357 spin_unlock(&mm->page_table_lock);
1358 return VM_FAULT_MINOR;
1359
1360no_new_page:
1361 page_cache_release(old_page);
1362 return VM_FAULT_OOM;
1363}
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK))
1398
1399static void reset_vma_truncate_counts(struct address_space *mapping)
1400{
1401 struct vm_area_struct *vma;
1402 struct prio_tree_iter iter;
1403
1404 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
1405 vma->vm_truncate_count = 0;
1406 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
1407 vma->vm_truncate_count = 0;
1408}
1409
1410static int unmap_mapping_range_vma(struct vm_area_struct *vma,
1411 unsigned long start_addr, unsigned long end_addr,
1412 struct zap_details *details)
1413{
1414 unsigned long restart_addr;
1415 int need_break;
1416
1417again:
1418 restart_addr = vma->vm_truncate_count;
1419 if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
1420 start_addr = restart_addr;
1421 if (start_addr >= end_addr) {
1422
1423 vma->vm_truncate_count = details->truncate_count;
1424 return 0;
1425 }
1426 }
1427
1428 details->break_addr = end_addr;
1429 zap_page_range(vma, start_addr, end_addr - start_addr, details);
1430
1431
1432
1433
1434
1435
1436
1437 need_break = need_resched() ||
1438 need_lockbreak(details->i_mmap_lock);
1439
1440 if (details->break_addr >= end_addr) {
1441
1442 vma->vm_truncate_count = details->truncate_count;
1443 if (!need_break)
1444 return 0;
1445 } else {
1446
1447 vma->vm_truncate_count = details->break_addr;
1448 if (!need_break)
1449 goto again;
1450 }
1451
1452 spin_unlock(details->i_mmap_lock);
1453 cond_resched();
1454 spin_lock(details->i_mmap_lock);
1455 return -EINTR;
1456}
1457
1458static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
1459 struct zap_details *details)
1460{
1461 struct vm_area_struct *vma;
1462 struct prio_tree_iter iter;
1463 pgoff_t vba, vea, zba, zea;
1464
1465restart:
1466 vma_prio_tree_foreach(vma, &iter, root,
1467 details->first_index, details->last_index) {
1468
1469 if (vma->vm_truncate_count == details->truncate_count)
1470 continue;
1471
1472 vba = vma->vm_pgoff;
1473 vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
1474
1475 zba = details->first_index;
1476 if (zba < vba)
1477 zba = vba;
1478 zea = details->last_index;
1479 if (zea > vea)
1480 zea = vea;
1481
1482 if (unmap_mapping_range_vma(vma,
1483 ((zba - vba) << PAGE_SHIFT) + vma->vm_start,
1484 ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,
1485 details) < 0)
1486 goto restart;
1487 }
1488}
1489
1490static inline void unmap_mapping_range_list(struct list_head *head,
1491 struct zap_details *details)
1492{
1493 struct vm_area_struct *vma;
1494
1495
1496
1497
1498
1499
1500
1501restart:
1502 list_for_each_entry(vma, head, shared.vm_set.list) {
1503
1504 if (vma->vm_truncate_count == details->truncate_count)
1505 continue;
1506 details->nonlinear_vma = vma;
1507 if (unmap_mapping_range_vma(vma, vma->vm_start,
1508 vma->vm_end, details) < 0)
1509 goto restart;
1510 }
1511}
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529void unmap_mapping_range(struct address_space *mapping,
1530 loff_t const holebegin, loff_t const holelen, int even_cows)
1531{
1532 struct zap_details details;
1533 pgoff_t hba = holebegin >> PAGE_SHIFT;
1534 pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
1535
1536
1537 if (sizeof(holelen) > sizeof(hlen)) {
1538 long long holeend =
1539 (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
1540 if (holeend & ~(long long)ULONG_MAX)
1541 hlen = ULONG_MAX - hba + 1;
1542 }
1543
1544 details.check_mapping = even_cows? NULL: mapping;
1545 details.nonlinear_vma = NULL;
1546 details.first_index = hba;
1547 details.last_index = hba + hlen - 1;
1548 if (details.last_index < details.first_index)
1549 details.last_index = ULONG_MAX;
1550 details.i_mmap_lock = &mapping->i_mmap_lock;
1551
1552 spin_lock(&mapping->i_mmap_lock);
1553
1554
1555 smp_wmb();
1556
1557 mapping->truncate_count++;
1558
1559
1560
1561
1562
1563
1564 smp_mb();
1565 if (unlikely(is_restart_addr(mapping->truncate_count))) {
1566 if (mapping->truncate_count == 0)
1567 reset_vma_truncate_counts(mapping);
1568 mapping->truncate_count++;
1569 }
1570 details.truncate_count = mapping->truncate_count;
1571
1572 if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
1573 unmap_mapping_range_tree(&mapping->i_mmap, &details);
1574 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
1575 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
1576 spin_unlock(&mapping->i_mmap_lock);
1577}
1578EXPORT_SYMBOL(unmap_mapping_range);
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588int vmtruncate(struct inode * inode, loff_t offset)
1589{
1590 struct address_space *mapping = inode->i_mapping;
1591 unsigned long limit;
1592
1593 if (inode->i_size < offset)
1594 goto do_expand;
1595
1596
1597
1598
1599 if (IS_SWAPFILE(inode))
1600 goto out_busy;
1601 i_size_write(inode, offset);
1602 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
1603 truncate_inode_pages(mapping, offset);
1604 goto out_truncate;
1605
1606do_expand:
1607 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
1608 if (limit != RLIM_INFINITY && offset > limit)
1609 goto out_sig;
1610 if (offset > inode->i_sb->s_maxbytes)
1611 goto out_big;
1612 i_size_write(inode, offset);
1613
1614out_truncate:
1615 if (inode->i_op && inode->i_op->truncate)
1616 inode->i_op->truncate(inode);
1617 return 0;
1618out_sig:
1619 send_sig(SIGXFSZ, current, 0);
1620out_big:
1621 return -EFBIG;
1622out_busy:
1623 return -ETXTBSY;
1624}
1625
1626EXPORT_SYMBOL(vmtruncate);
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639void swapin_readahead(swp_entry_t entry, unsigned long addr,struct vm_area_struct *vma)
1640{
1641#ifdef CONFIG_NUMA
1642 struct vm_area_struct *next_vma = vma ? vma->vm_next : NULL;
1643#endif
1644 int i, num;
1645 struct page *new_page;
1646 unsigned long offset;
1647
1648
1649
1650
1651 num = valid_swaphandles(entry, &offset);
1652 for (i = 0; i < num; offset++, i++) {
1653
1654 new_page = read_swap_cache_async(swp_entry(swp_type(entry),
1655 offset), vma, addr);
1656 if (!new_page)
1657 break;
1658 page_cache_release(new_page);
1659#ifdef CONFIG_NUMA
1660
1661
1662
1663 addr += PAGE_SIZE;
1664 if (addr == 0)
1665 vma = NULL;
1666 if (vma) {
1667 if (addr >= vma->vm_end) {
1668 vma = next_vma;
1669 next_vma = vma ? vma->vm_next : NULL;
1670 }
1671 if (vma && addr < vma->vm_start)
1672 vma = NULL;
1673 } else {
1674 if (next_vma && addr >= next_vma->vm_start) {
1675 vma = next_vma;
1676 next_vma = vma->vm_next;
1677 }
1678 }
1679#endif
1680 }
1681 lru_add_drain();
1682}
1683
1684
1685
1686
1687
1688static int do_swap_page(struct mm_struct * mm,
1689 struct vm_area_struct * vma, unsigned long address,
1690 pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access)
1691{
1692 struct page *page;
1693 swp_entry_t entry = pte_to_swp_entry(orig_pte);
1694 pte_t pte;
1695 int ret = VM_FAULT_MINOR;
1696
1697 pte_unmap(page_table);
1698 spin_unlock(&mm->page_table_lock);
1699 page = lookup_swap_cache(entry);
1700 if (!page) {
1701 swapin_readahead(entry, address, vma);
1702 page = read_swap_cache_async(entry, vma, address);
1703 if (!page) {
1704
1705
1706
1707
1708 spin_lock(&mm->page_table_lock);
1709 page_table = pte_offset_map(pmd, address);
1710 if (likely(pte_same(*page_table, orig_pte)))
1711 ret = VM_FAULT_OOM;
1712 else
1713 ret = VM_FAULT_MINOR;
1714 pte_unmap(page_table);
1715 spin_unlock(&mm->page_table_lock);
1716 goto out;
1717 }
1718
1719
1720 ret = VM_FAULT_MAJOR;
1721 inc_page_state(pgmajfault);
1722 grab_swap_token();
1723 }
1724
1725 mark_page_accessed(page);
1726 lock_page(page);
1727
1728
1729
1730
1731
1732 spin_lock(&mm->page_table_lock);
1733 page_table = pte_offset_map(pmd, address);
1734 if (unlikely(!pte_same(*page_table, orig_pte))) {
1735 pte_unmap(page_table);
1736 spin_unlock(&mm->page_table_lock);
1737 unlock_page(page);
1738 page_cache_release(page);
1739 ret = VM_FAULT_MINOR;
1740 goto out;
1741 }
1742
1743
1744
1745 swap_free(entry);
1746 if (vm_swap_full())
1747 remove_exclusive_swap_page(page);
1748
1749 mm->rss++;
1750 acct_update_integrals();
1751 update_mem_hiwater();
1752
1753 pte = mk_pte(page, vma->vm_page_prot);
1754 if (write_access && can_share_swap_page(page)) {
1755 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
1756 write_access = 0;
1757 }
1758 unlock_page(page);
1759
1760 flush_icache_page(vma, page);
1761 set_pte(page_table, pte);
1762 page_add_anon_rmap(page, vma, address);
1763
1764 if (write_access) {
1765 if (do_wp_page(mm, vma, address,
1766 page_table, pmd, pte) == VM_FAULT_OOM)
1767 ret = VM_FAULT_OOM;
1768 goto out;
1769 }
1770
1771
1772 update_mmu_cache(vma, address, pte);
1773 pte_unmap(page_table);
1774 spin_unlock(&mm->page_table_lock);
1775out:
1776 return ret;
1777}
1778
1779
1780
1781
1782
1783
1784static int
1785do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
1786 pte_t *page_table, pmd_t *pmd, int write_access,
1787 unsigned long addr)
1788{
1789 pte_t entry;
1790 struct page * page = ZERO_PAGE(addr);
1791
1792
1793 entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
1794
1795
1796 if (write_access) {
1797
1798 pte_unmap(page_table);
1799 spin_unlock(&mm->page_table_lock);
1800
1801 if (unlikely(anon_vma_prepare(vma)))
1802 goto no_mem;
1803 page = alloc_zeroed_user_highpage(vma, addr);
1804 if (!page)
1805 goto no_mem;
1806
1807 spin_lock(&mm->page_table_lock);
1808 page_table = pte_offset_map(pmd, addr);
1809
1810 if (!pte_none(*page_table)) {
1811 pte_unmap(page_table);
1812 page_cache_release(page);
1813 spin_unlock(&mm->page_table_lock);
1814 goto out;
1815 }
1816 mm->rss++;
1817 acct_update_integrals();
1818 update_mem_hiwater();
1819 entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
1820 vma->vm_page_prot)),
1821 vma);
1822 lru_cache_add_active(page);
1823 SetPageReferenced(page);
1824 page_add_anon_rmap(page, vma, addr);
1825 }
1826
1827 set_pte(page_table, entry);
1828 pte_unmap(page_table);
1829
1830
1831 update_mmu_cache(vma, addr, entry);
1832 spin_unlock(&mm->page_table_lock);
1833out:
1834 return VM_FAULT_MINOR;
1835no_mem:
1836 return VM_FAULT_OOM;
1837}
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851static int
1852do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
1853 unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
1854{
1855 struct page * new_page;
1856 struct address_space *mapping = NULL;
1857 pte_t entry;
1858 unsigned int sequence = 0;
1859 int ret = VM_FAULT_MINOR;
1860 int anon = 0;
1861
1862 if (!vma->vm_ops || !vma->vm_ops->nopage)
1863 return do_anonymous_page(mm, vma, page_table,
1864 pmd, write_access, address);
1865 pte_unmap(page_table);
1866 spin_unlock(&mm->page_table_lock);
1867
1868 if (vma->vm_file) {
1869 mapping = vma->vm_file->f_mapping;
1870 sequence = mapping->truncate_count;
1871 smp_rmb();
1872 }
1873retry:
1874 cond_resched();
1875 new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885 if (new_page == NOPAGE_SIGBUS)
1886 return VM_FAULT_SIGBUS;
1887 if (new_page == NOPAGE_OOM)
1888 return VM_FAULT_OOM;
1889
1890
1891
1892
1893 if (write_access && !(vma->vm_flags & VM_SHARED)) {
1894 struct page *page;
1895
1896 if (unlikely(anon_vma_prepare(vma)))
1897 goto oom;
1898 page = alloc_page_vma(GFP_HIGHUSER, vma, address);
1899 if (!page)
1900 goto oom;
1901 copy_user_highpage(page, new_page, address);
1902 page_cache_release(new_page);
1903 new_page = page;
1904 anon = 1;
1905 }
1906
1907 spin_lock(&mm->page_table_lock);
1908
1909
1910
1911
1912
1913 if (mapping && unlikely(sequence != mapping->truncate_count)) {
1914 sequence = mapping->truncate_count;
1915 spin_unlock(&mm->page_table_lock);
1916 page_cache_release(new_page);
1917 goto retry;
1918 }
1919 page_table = pte_offset_map(pmd, address);
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932 if (pte_none(*page_table)) {
1933 if (!PageReserved(new_page))
1934 ++mm->rss;
1935 acct_update_integrals();
1936 update_mem_hiwater();
1937
1938 flush_icache_page(vma, new_page);
1939 entry = mk_pte(new_page, vma->vm_page_prot);
1940 if (write_access)
1941 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
1942 set_pte(page_table, entry);
1943 if (anon) {
1944 lru_cache_add_active(new_page);
1945 page_add_anon_rmap(new_page, vma, address);
1946 } else
1947 page_add_file_rmap(new_page);
1948 pte_unmap(page_table);
1949 } else {
1950
1951 pte_unmap(page_table);
1952 page_cache_release(new_page);
1953 spin_unlock(&mm->page_table_lock);
1954 goto out;
1955 }
1956
1957
1958 update_mmu_cache(vma, address, entry);
1959 spin_unlock(&mm->page_table_lock);
1960out:
1961 return ret;
1962oom:
1963 page_cache_release(new_page);
1964 ret = VM_FAULT_OOM;
1965 goto out;
1966}
1967
1968
1969
1970
1971
1972
1973static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma,
1974 unsigned long address, int write_access, pte_t *pte, pmd_t *pmd)
1975{
1976 unsigned long pgoff;
1977 int err;
1978
1979 BUG_ON(!vma->vm_ops || !vma->vm_ops->nopage);
1980
1981
1982
1983
1984 if (!vma->vm_ops || !vma->vm_ops->populate ||
1985 (write_access && !(vma->vm_flags & VM_SHARED))) {
1986 pte_clear(pte);
1987 return do_no_page(mm, vma, address, write_access, pte, pmd);
1988 }
1989
1990 pgoff = pte_to_pgoff(*pte);
1991
1992 pte_unmap(pte);
1993 spin_unlock(&mm->page_table_lock);
1994
1995 err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, vma->vm_page_prot, pgoff, 0);
1996 if (err == -ENOMEM)
1997 return VM_FAULT_OOM;
1998 if (err)
1999 return VM_FAULT_SIGBUS;
2000 return VM_FAULT_MAJOR;
2001}
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024static inline int handle_pte_fault(struct mm_struct *mm,
2025 struct vm_area_struct * vma, unsigned long address,
2026 int write_access, pte_t *pte, pmd_t *pmd)
2027{
2028 pte_t entry;
2029
2030 entry = *pte;
2031 if (!pte_present(entry)) {
2032
2033
2034
2035
2036
2037 if (pte_none(entry))
2038 return do_no_page(mm, vma, address, write_access, pte, pmd);
2039 if (pte_file(entry))
2040 return do_file_page(mm, vma, address, write_access, pte, pmd);
2041 return do_swap_page(mm, vma, address, pte, pmd, entry, write_access);
2042 }
2043
2044 if (write_access) {
2045 if (!pte_write(entry))
2046 return do_wp_page(mm, vma, address, pte, pmd, entry);
2047
2048 entry = pte_mkdirty(entry);
2049 }
2050 entry = pte_mkyoung(entry);
2051 ptep_set_access_flags(vma, address, pte, entry, write_access);
2052 update_mmu_cache(vma, address, entry);
2053 pte_unmap(pte);
2054 spin_unlock(&mm->page_table_lock);
2055 return VM_FAULT_MINOR;
2056}
2057
2058
2059
2060
2061int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
2062 unsigned long address, int write_access)
2063{
2064 pgd_t *pgd;
2065 pud_t *pud;
2066 pmd_t *pmd;
2067 pte_t *pte;
2068
2069 __set_current_state(TASK_RUNNING);
2070
2071 inc_page_state(pgfault);
2072
2073 if (is_vm_hugetlb_page(vma))
2074 return VM_FAULT_SIGBUS;
2075
2076
2077
2078
2079
2080 pgd = pgd_offset(mm, address);
2081 spin_lock(&mm->page_table_lock);
2082
2083 pud = pud_alloc(mm, pgd, address);
2084 if (!pud)
2085 goto oom;
2086
2087 pmd = pmd_alloc(mm, pud, address);
2088 if (!pmd)
2089 goto oom;
2090
2091 pte = pte_alloc_map(mm, pmd, address);
2092 if (!pte)
2093 goto oom;
2094
2095 return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
2096
2097 oom:
2098 spin_unlock(&mm->page_table_lock);
2099 return VM_FAULT_OOM;
2100}
2101
2102#ifndef __ARCH_HAS_4LEVEL_HACK
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
2113{
2114 pud_t *new;
2115
2116 spin_unlock(&mm->page_table_lock);
2117 new = pud_alloc_one(mm, address);
2118 spin_lock(&mm->page_table_lock);
2119 if (!new)
2120 return NULL;
2121
2122
2123
2124
2125
2126 if (pgd_present(*pgd)) {
2127 pud_free(new);
2128 goto out;
2129 }
2130 pgd_populate(mm, pgd, new);
2131 out:
2132 return pud_offset(pgd, address);
2133}
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
2145{
2146 pmd_t *new;
2147
2148 spin_unlock(&mm->page_table_lock);
2149 new = pmd_alloc_one(mm, address);
2150 spin_lock(&mm->page_table_lock);
2151 if (!new)
2152 return NULL;
2153
2154
2155
2156
2157
2158 if (pud_present(*pud)) {
2159 pmd_free(new);
2160 goto out;
2161 }
2162 pud_populate(mm, pud, new);
2163 out:
2164 return pmd_offset(pud, address);
2165}
2166#else
2167pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
2168{
2169 pmd_t *new;
2170
2171 spin_unlock(&mm->page_table_lock);
2172 new = pmd_alloc_one(mm, address);
2173 spin_lock(&mm->page_table_lock);
2174 if (!new)
2175 return NULL;
2176
2177
2178
2179
2180
2181 if (pgd_present(*pud)) {
2182 pmd_free(new);
2183 goto out;
2184 }
2185 pgd_populate(mm, pud, new);
2186out:
2187 return pmd_offset(pud, address);
2188}
2189#endif
2190
2191int make_pages_present(unsigned long addr, unsigned long end)
2192{
2193 int ret, len, write;
2194 struct vm_area_struct * vma;
2195
2196 vma = find_vma(current->mm, addr);
2197 if (!vma)
2198 return -1;
2199 write = (vma->vm_flags & VM_WRITE) != 0;
2200 if (addr >= end)
2201 BUG();
2202 if (end > vma->vm_end)
2203 BUG();
2204 len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
2205 ret = get_user_pages(current, current->mm, addr,
2206 len, write, 0, NULL, NULL);
2207 if (ret < 0)
2208 return ret;
2209 return ret == len ? 0 : -1;
2210}
2211
2212
2213
2214
2215struct page * vmalloc_to_page(void * vmalloc_addr)
2216{
2217 unsigned long addr = (unsigned long) vmalloc_addr;
2218 struct page *page = NULL;
2219 pgd_t *pgd = pgd_offset_k(addr);
2220 pud_t *pud;
2221 pmd_t *pmd;
2222 pte_t *ptep, pte;
2223
2224 if (!pgd_none(*pgd)) {
2225 pud = pud_offset(pgd, addr);
2226 if (!pud_none(*pud)) {
2227 pmd = pmd_offset(pud, addr);
2228 if (!pmd_none(*pmd)) {
2229 ptep = pte_offset_map(pmd, addr);
2230 pte = *ptep;
2231 if (pte_present(pte))
2232 page = pte_page(pte);
2233 pte_unmap(ptep);
2234 }
2235 }
2236 }
2237 return page;
2238}
2239
2240EXPORT_SYMBOL(vmalloc_to_page);
2241
2242
2243
2244
2245unsigned long vmalloc_to_pfn(void * vmalloc_addr)
2246{
2247 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
2248}
2249
2250EXPORT_SYMBOL(vmalloc_to_pfn);
2251
2252
2253
2254
2255
2256void update_mem_hiwater(void)
2257{
2258 struct task_struct *tsk = current;
2259
2260 if (tsk->mm) {
2261 if (tsk->mm->hiwater_rss < tsk->mm->rss)
2262 tsk->mm->hiwater_rss = tsk->mm->rss;
2263 if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
2264 tsk->mm->hiwater_vm = tsk->mm->total_vm;
2265 }
2266}
2267
2268#if !defined(__HAVE_ARCH_GATE_AREA)
2269
2270#if defined(AT_SYSINFO_EHDR)
2271struct vm_area_struct gate_vma;
2272
2273static int __init gate_vma_init(void)
2274{
2275 gate_vma.vm_mm = NULL;
2276 gate_vma.vm_start = FIXADDR_USER_START;
2277 gate_vma.vm_end = FIXADDR_USER_END;
2278 gate_vma.vm_page_prot = PAGE_READONLY;
2279 gate_vma.vm_flags = 0;
2280 return 0;
2281}
2282__initcall(gate_vma_init);
2283#endif
2284
2285struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
2286{
2287#ifdef AT_SYSINFO_EHDR
2288 return &gate_vma;
2289#else
2290 return NULL;
2291#endif
2292}
2293
2294int in_gate_area_no_task(unsigned long addr)
2295{
2296#ifdef AT_SYSINFO_EHDR
2297 if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
2298 return 1;
2299#endif
2300 return 0;
2301}
2302
2303#endif
2304