1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45#include <linux/mm.h>
46#include <linux/pagemap.h>
47#include <linux/swap.h>
48#include <linux/swapops.h>
49#include <linux/slab.h>
50#include <linux/init.h>
51#include <linux/ksm.h>
52#include <linux/rmap.h>
53#include <linux/rcupdate.h>
54#include <linux/module.h>
55#include <linux/memcontrol.h>
56#include <linux/mmu_notifier.h>
57#include <linux/migrate.h>
58#include <linux/hugetlb.h>
59
60#include <asm/tlbflush.h>
61
62#include "internal.h"
63
64static struct kmem_cache *anon_vma_cachep;
65static struct kmem_cache *anon_vma_chain_cachep;
66
67static inline struct anon_vma *anon_vma_alloc(void)
68{
69 struct anon_vma *anon_vma;
70
71 anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
72 if (anon_vma) {
73 atomic_set(&anon_vma->refcount, 1);
74
75
76
77
78 anon_vma->root = anon_vma;
79 }
80
81 return anon_vma;
82}
83
84static inline void anon_vma_free(struct anon_vma *anon_vma)
85{
86 VM_BUG_ON(atomic_read(&anon_vma->refcount));
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105 if (mutex_is_locked(&anon_vma->root->mutex)) {
106 anon_vma_lock(anon_vma);
107 anon_vma_unlock(anon_vma);
108 }
109
110 kmem_cache_free(anon_vma_cachep, anon_vma);
111}
112
113static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
114{
115 return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
116}
117
118static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
119{
120 kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
121}
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150int anon_vma_prepare(struct vm_area_struct *vma)
151{
152 struct anon_vma *anon_vma = vma->anon_vma;
153 struct anon_vma_chain *avc;
154
155 might_sleep();
156 if (unlikely(!anon_vma)) {
157 struct mm_struct *mm = vma->vm_mm;
158 struct anon_vma *allocated;
159
160 avc = anon_vma_chain_alloc(GFP_KERNEL);
161 if (!avc)
162 goto out_enomem;
163
164 anon_vma = find_mergeable_anon_vma(vma);
165 allocated = NULL;
166 if (!anon_vma) {
167 anon_vma = anon_vma_alloc();
168 if (unlikely(!anon_vma))
169 goto out_enomem_free_avc;
170 allocated = anon_vma;
171 }
172
173 anon_vma_lock(anon_vma);
174
175 spin_lock(&mm->page_table_lock);
176 if (likely(!vma->anon_vma)) {
177 vma->anon_vma = anon_vma;
178 avc->anon_vma = anon_vma;
179 avc->vma = vma;
180 list_add(&avc->same_vma, &vma->anon_vma_chain);
181 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
182 allocated = NULL;
183 avc = NULL;
184 }
185 spin_unlock(&mm->page_table_lock);
186 anon_vma_unlock(anon_vma);
187
188 if (unlikely(allocated))
189 put_anon_vma(allocated);
190 if (unlikely(avc))
191 anon_vma_chain_free(avc);
192 }
193 return 0;
194
195 out_enomem_free_avc:
196 anon_vma_chain_free(avc);
197 out_enomem:
198 return -ENOMEM;
199}
200
201
202
203
204
205
206
207
208
209static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
210{
211 struct anon_vma *new_root = anon_vma->root;
212 if (new_root != root) {
213 if (WARN_ON_ONCE(root))
214 mutex_unlock(&root->mutex);
215 root = new_root;
216 mutex_lock(&root->mutex);
217 }
218 return root;
219}
220
221static inline void unlock_anon_vma_root(struct anon_vma *root)
222{
223 if (root)
224 mutex_unlock(&root->mutex);
225}
226
227static void anon_vma_chain_link(struct vm_area_struct *vma,
228 struct anon_vma_chain *avc,
229 struct anon_vma *anon_vma)
230{
231 avc->vma = vma;
232 avc->anon_vma = anon_vma;
233 list_add(&avc->same_vma, &vma->anon_vma_chain);
234
235
236
237
238
239 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
240}
241
242
243
244
245
246int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
247{
248 struct anon_vma_chain *avc, *pavc;
249 struct anon_vma *root = NULL;
250
251 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
252 struct anon_vma *anon_vma;
253
254 avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
255 if (unlikely(!avc)) {
256 unlock_anon_vma_root(root);
257 root = NULL;
258 avc = anon_vma_chain_alloc(GFP_KERNEL);
259 if (!avc)
260 goto enomem_failure;
261 }
262 anon_vma = pavc->anon_vma;
263 root = lock_anon_vma_root(root, anon_vma);
264 anon_vma_chain_link(dst, avc, anon_vma);
265 }
266 unlock_anon_vma_root(root);
267 return 0;
268
269 enomem_failure:
270 unlink_anon_vmas(dst);
271 return -ENOMEM;
272}
273
274
275
276
277
278
279int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
280{
281 struct anon_vma_chain *avc;
282 struct anon_vma *anon_vma;
283
284
285 if (!pvma->anon_vma)
286 return 0;
287
288
289
290
291
292 if (anon_vma_clone(vma, pvma))
293 return -ENOMEM;
294
295
296 anon_vma = anon_vma_alloc();
297 if (!anon_vma)
298 goto out_error;
299 avc = anon_vma_chain_alloc(GFP_KERNEL);
300 if (!avc)
301 goto out_error_free_anon_vma;
302
303
304
305
306
307 anon_vma->root = pvma->anon_vma->root;
308
309
310
311
312
313 get_anon_vma(anon_vma->root);
314
315 vma->anon_vma = anon_vma;
316 anon_vma_lock(anon_vma);
317 anon_vma_chain_link(vma, avc, anon_vma);
318 anon_vma_unlock(anon_vma);
319
320 return 0;
321
322 out_error_free_anon_vma:
323 put_anon_vma(anon_vma);
324 out_error:
325 unlink_anon_vmas(vma);
326 return -ENOMEM;
327}
328
329void unlink_anon_vmas(struct vm_area_struct *vma)
330{
331 struct anon_vma_chain *avc, *next;
332 struct anon_vma *root = NULL;
333
334
335
336
337
338 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
339 struct anon_vma *anon_vma = avc->anon_vma;
340
341 root = lock_anon_vma_root(root, anon_vma);
342 list_del(&avc->same_anon_vma);
343
344
345
346
347
348 if (list_empty(&anon_vma->head))
349 continue;
350
351 list_del(&avc->same_vma);
352 anon_vma_chain_free(avc);
353 }
354 unlock_anon_vma_root(root);
355
356
357
358
359
360
361 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
362 struct anon_vma *anon_vma = avc->anon_vma;
363
364 put_anon_vma(anon_vma);
365
366 list_del(&avc->same_vma);
367 anon_vma_chain_free(avc);
368 }
369}
370
371static void anon_vma_ctor(void *data)
372{
373 struct anon_vma *anon_vma = data;
374
375 mutex_init(&anon_vma->mutex);
376 atomic_set(&anon_vma->refcount, 0);
377 INIT_LIST_HEAD(&anon_vma->head);
378}
379
380void __init anon_vma_init(void)
381{
382 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
383 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
384 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
385}
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410struct anon_vma *page_get_anon_vma(struct page *page)
411{
412 struct anon_vma *anon_vma = NULL;
413 unsigned long anon_mapping;
414
415 rcu_read_lock();
416 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
417 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
418 goto out;
419 if (!page_mapped(page))
420 goto out;
421
422 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
423 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
424 anon_vma = NULL;
425 goto out;
426 }
427
428
429
430
431
432
433
434
435 if (!page_mapped(page)) {
436 put_anon_vma(anon_vma);
437 anon_vma = NULL;
438 }
439out:
440 rcu_read_unlock();
441
442 return anon_vma;
443}
444
445
446
447
448
449
450
451
452struct anon_vma *page_lock_anon_vma(struct page *page)
453{
454 struct anon_vma *anon_vma = NULL;
455 struct anon_vma *root_anon_vma;
456 unsigned long anon_mapping;
457
458 rcu_read_lock();
459 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
460 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
461 goto out;
462 if (!page_mapped(page))
463 goto out;
464
465 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
466 root_anon_vma = ACCESS_ONCE(anon_vma->root);
467 if (mutex_trylock(&root_anon_vma->mutex)) {
468
469
470
471
472
473 if (!page_mapped(page)) {
474 mutex_unlock(&root_anon_vma->mutex);
475 anon_vma = NULL;
476 }
477 goto out;
478 }
479
480
481 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
482 anon_vma = NULL;
483 goto out;
484 }
485
486 if (!page_mapped(page)) {
487 put_anon_vma(anon_vma);
488 anon_vma = NULL;
489 goto out;
490 }
491
492
493 rcu_read_unlock();
494 anon_vma_lock(anon_vma);
495
496 if (atomic_dec_and_test(&anon_vma->refcount)) {
497
498
499
500
501
502 anon_vma_unlock(anon_vma);
503 __put_anon_vma(anon_vma);
504 anon_vma = NULL;
505 }
506
507 return anon_vma;
508
509out:
510 rcu_read_unlock();
511 return anon_vma;
512}
513
514void page_unlock_anon_vma(struct anon_vma *anon_vma)
515{
516 anon_vma_unlock(anon_vma);
517}
518
519
520
521
522
523
524inline unsigned long
525vma_address(struct page *page, struct vm_area_struct *vma)
526{
527 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
528 unsigned long address;
529
530 if (unlikely(is_vm_hugetlb_page(vma)))
531 pgoff = page->index << huge_page_order(page_hstate(page));
532 address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
533 if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
534
535 return -EFAULT;
536 }
537 return address;
538}
539
540
541
542
543
544unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
545{
546 if (PageAnon(page)) {
547 struct anon_vma *page__anon_vma = page_anon_vma(page);
548
549
550
551
552 if (!vma->anon_vma || !page__anon_vma ||
553 vma->anon_vma->root != page__anon_vma->root)
554 return -EFAULT;
555 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
556 if (!vma->vm_file ||
557 vma->vm_file->f_mapping != page->mapping)
558 return -EFAULT;
559 } else
560 return -EFAULT;
561 return vma_address(page, vma);
562}
563
564
565
566
567
568
569
570
571
572
573pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
574 unsigned long address, spinlock_t **ptlp, int sync)
575{
576 pgd_t *pgd;
577 pud_t *pud;
578 pmd_t *pmd;
579 pte_t *pte;
580 spinlock_t *ptl;
581
582 if (unlikely(PageHuge(page))) {
583 pte = huge_pte_offset(mm, address);
584 ptl = &mm->page_table_lock;
585 goto check;
586 }
587
588 pgd = pgd_offset(mm, address);
589 if (!pgd_present(*pgd))
590 return NULL;
591
592 pud = pud_offset(pgd, address);
593 if (!pud_present(*pud))
594 return NULL;
595
596 pmd = pmd_offset(pud, address);
597 if (!pmd_present(*pmd))
598 return NULL;
599 if (pmd_trans_huge(*pmd))
600 return NULL;
601
602 pte = pte_offset_map(pmd, address);
603
604 if (!sync && !pte_present(*pte)) {
605 pte_unmap(pte);
606 return NULL;
607 }
608
609 ptl = pte_lockptr(mm, pmd);
610check:
611 spin_lock(ptl);
612 if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
613 *ptlp = ptl;
614 return pte;
615 }
616 pte_unmap_unlock(pte, ptl);
617 return NULL;
618}
619
620
621
622
623
624
625
626
627
628
629int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
630{
631 unsigned long address;
632 pte_t *pte;
633 spinlock_t *ptl;
634
635 address = vma_address(page, vma);
636 if (address == -EFAULT)
637 return 0;
638 pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
639 if (!pte)
640 return 0;
641 pte_unmap_unlock(pte, ptl);
642
643 return 1;
644}
645
646
647
648
649
650int page_referenced_one(struct page *page, struct vm_area_struct *vma,
651 unsigned long address, unsigned int *mapcount,
652 unsigned long *vm_flags)
653{
654 struct mm_struct *mm = vma->vm_mm;
655 int referenced = 0;
656
657 if (unlikely(PageTransHuge(page))) {
658 pmd_t *pmd;
659
660 spin_lock(&mm->page_table_lock);
661
662
663
664
665 pmd = page_check_address_pmd(page, mm, address,
666 PAGE_CHECK_ADDRESS_PMD_FLAG);
667 if (!pmd) {
668 spin_unlock(&mm->page_table_lock);
669 goto out;
670 }
671
672 if (vma->vm_flags & VM_LOCKED) {
673 spin_unlock(&mm->page_table_lock);
674 *mapcount = 0;
675 *vm_flags |= VM_LOCKED;
676 goto out;
677 }
678
679
680 if (pmdp_clear_flush_young_notify(vma, address, pmd))
681 referenced++;
682 spin_unlock(&mm->page_table_lock);
683 } else {
684 pte_t *pte;
685 spinlock_t *ptl;
686
687
688
689
690
691 pte = page_check_address(page, mm, address, &ptl, 0);
692 if (!pte)
693 goto out;
694
695 if (vma->vm_flags & VM_LOCKED) {
696 pte_unmap_unlock(pte, ptl);
697 *mapcount = 0;
698 *vm_flags |= VM_LOCKED;
699 goto out;
700 }
701
702 if (ptep_clear_flush_young_notify(vma, address, pte)) {
703
704
705
706
707
708
709
710 if (likely(!VM_SequentialReadHint(vma)))
711 referenced++;
712 }
713 pte_unmap_unlock(pte, ptl);
714 }
715
716
717
718 if (mm != current->mm && has_swap_token(mm) &&
719 rwsem_is_locked(&mm->mmap_sem))
720 referenced++;
721
722 (*mapcount)--;
723
724 if (referenced)
725 *vm_flags |= vma->vm_flags;
726out:
727 return referenced;
728}
729
730static int page_referenced_anon(struct page *page,
731 struct mem_cgroup *mem_cont,
732 unsigned long *vm_flags)
733{
734 unsigned int mapcount;
735 struct anon_vma *anon_vma;
736 struct anon_vma_chain *avc;
737 int referenced = 0;
738
739 anon_vma = page_lock_anon_vma(page);
740 if (!anon_vma)
741 return referenced;
742
743 mapcount = page_mapcount(page);
744 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
745 struct vm_area_struct *vma = avc->vma;
746 unsigned long address = vma_address(page, vma);
747 if (address == -EFAULT)
748 continue;
749
750
751
752
753
754 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
755 continue;
756 referenced += page_referenced_one(page, vma, address,
757 &mapcount, vm_flags);
758 if (!mapcount)
759 break;
760 }
761
762 page_unlock_anon_vma(anon_vma);
763 return referenced;
764}
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779static int page_referenced_file(struct page *page,
780 struct mem_cgroup *mem_cont,
781 unsigned long *vm_flags)
782{
783 unsigned int mapcount;
784 struct address_space *mapping = page->mapping;
785 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
786 struct vm_area_struct *vma;
787 struct prio_tree_iter iter;
788 int referenced = 0;
789
790
791
792
793
794
795 BUG_ON(PageAnon(page));
796
797
798
799
800
801
802
803 BUG_ON(!PageLocked(page));
804
805 mutex_lock(&mapping->i_mmap_mutex);
806
807
808
809
810
811 mapcount = page_mapcount(page);
812
813 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
814 unsigned long address = vma_address(page, vma);
815 if (address == -EFAULT)
816 continue;
817
818
819
820
821
822 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
823 continue;
824 referenced += page_referenced_one(page, vma, address,
825 &mapcount, vm_flags);
826 if (!mapcount)
827 break;
828 }
829
830 mutex_unlock(&mapping->i_mmap_mutex);
831 return referenced;
832}
833
834
835
836
837
838
839
840
841
842
843
844int page_referenced(struct page *page,
845 int is_locked,
846 struct mem_cgroup *mem_cont,
847 unsigned long *vm_flags)
848{
849 int referenced = 0;
850 int we_locked = 0;
851
852 *vm_flags = 0;
853 if (page_mapped(page) && page_rmapping(page)) {
854 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
855 we_locked = trylock_page(page);
856 if (!we_locked) {
857 referenced++;
858 goto out;
859 }
860 }
861 if (unlikely(PageKsm(page)))
862 referenced += page_referenced_ksm(page, mem_cont,
863 vm_flags);
864 else if (PageAnon(page))
865 referenced += page_referenced_anon(page, mem_cont,
866 vm_flags);
867 else if (page->mapping)
868 referenced += page_referenced_file(page, mem_cont,
869 vm_flags);
870 if (we_locked)
871 unlock_page(page);
872
873 if (page_test_and_clear_young(page_to_pfn(page)))
874 referenced++;
875 }
876out:
877 return referenced;
878}
879
880static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
881 unsigned long address)
882{
883 struct mm_struct *mm = vma->vm_mm;
884 pte_t *pte;
885 spinlock_t *ptl;
886 int ret = 0;
887
888 pte = page_check_address(page, mm, address, &ptl, 1);
889 if (!pte)
890 goto out;
891
892 if (pte_dirty(*pte) || pte_write(*pte)) {
893 pte_t entry;
894
895 flush_cache_page(vma, address, pte_pfn(*pte));
896 entry = ptep_clear_flush_notify(vma, address, pte);
897 entry = pte_wrprotect(entry);
898 entry = pte_mkclean(entry);
899 set_pte_at(mm, address, pte, entry);
900 ret = 1;
901 }
902
903 pte_unmap_unlock(pte, ptl);
904out:
905 return ret;
906}
907
908static int page_mkclean_file(struct address_space *mapping, struct page *page)
909{
910 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
911 struct vm_area_struct *vma;
912 struct prio_tree_iter iter;
913 int ret = 0;
914
915 BUG_ON(PageAnon(page));
916
917 mutex_lock(&mapping->i_mmap_mutex);
918 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
919 if (vma->vm_flags & VM_SHARED) {
920 unsigned long address = vma_address(page, vma);
921 if (address == -EFAULT)
922 continue;
923 ret += page_mkclean_one(page, vma, address);
924 }
925 }
926 mutex_unlock(&mapping->i_mmap_mutex);
927 return ret;
928}
929
930int page_mkclean(struct page *page)
931{
932 int ret = 0;
933
934 BUG_ON(!PageLocked(page));
935
936 if (page_mapped(page)) {
937 struct address_space *mapping = page_mapping(page);
938 if (mapping) {
939 ret = page_mkclean_file(mapping, page);
940 if (page_test_and_clear_dirty(page_to_pfn(page), 1))
941 ret = 1;
942 }
943 }
944
945 return ret;
946}
947EXPORT_SYMBOL_GPL(page_mkclean);
948
949
950
951
952
953
954
955
956
957
958
959
960void page_move_anon_rmap(struct page *page,
961 struct vm_area_struct *vma, unsigned long address)
962{
963 struct anon_vma *anon_vma = vma->anon_vma;
964
965 VM_BUG_ON(!PageLocked(page));
966 VM_BUG_ON(!anon_vma);
967 VM_BUG_ON(page->index != linear_page_index(vma, address));
968
969 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
970 page->mapping = (struct address_space *) anon_vma;
971}
972
973
974
975
976
977
978
979
980static void __page_set_anon_rmap(struct page *page,
981 struct vm_area_struct *vma, unsigned long address, int exclusive)
982{
983 struct anon_vma *anon_vma = vma->anon_vma;
984
985 BUG_ON(!anon_vma);
986
987 if (PageAnon(page))
988 return;
989
990
991
992
993
994
995 if (!exclusive)
996 anon_vma = anon_vma->root;
997
998 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
999 page->mapping = (struct address_space *) anon_vma;
1000 page->index = linear_page_index(vma, address);
1001}
1002
1003
1004
1005
1006
1007
1008
1009static void __page_check_anon_rmap(struct page *page,
1010 struct vm_area_struct *vma, unsigned long address)
1011{
1012#ifdef CONFIG_DEBUG_VM
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025 BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
1026 BUG_ON(page->index != linear_page_index(vma, address));
1027#endif
1028}
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041void page_add_anon_rmap(struct page *page,
1042 struct vm_area_struct *vma, unsigned long address)
1043{
1044 do_page_add_anon_rmap(page, vma, address, 0);
1045}
1046
1047
1048
1049
1050
1051
1052void do_page_add_anon_rmap(struct page *page,
1053 struct vm_area_struct *vma, unsigned long address, int exclusive)
1054{
1055 int first = atomic_inc_and_test(&page->_mapcount);
1056 if (first) {
1057 if (!PageTransHuge(page))
1058 __inc_zone_page_state(page, NR_ANON_PAGES);
1059 else
1060 __inc_zone_page_state(page,
1061 NR_ANON_TRANSPARENT_HUGEPAGES);
1062 }
1063 if (unlikely(PageKsm(page)))
1064 return;
1065
1066 VM_BUG_ON(!PageLocked(page));
1067
1068 if (first)
1069 __page_set_anon_rmap(page, vma, address, exclusive);
1070 else
1071 __page_check_anon_rmap(page, vma, address);
1072}
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084void page_add_new_anon_rmap(struct page *page,
1085 struct vm_area_struct *vma, unsigned long address)
1086{
1087 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1088 SetPageSwapBacked(page);
1089 atomic_set(&page->_mapcount, 0);
1090 if (!PageTransHuge(page))
1091 __inc_zone_page_state(page, NR_ANON_PAGES);
1092 else
1093 __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1094 __page_set_anon_rmap(page, vma, address, 1);
1095 if (page_evictable(page, vma))
1096 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
1097 else
1098 add_page_to_unevictable_list(page);
1099}
1100
1101
1102
1103
1104
1105
1106
1107void page_add_file_rmap(struct page *page)
1108{
1109 if (atomic_inc_and_test(&page->_mapcount)) {
1110 __inc_zone_page_state(page, NR_FILE_MAPPED);
1111 mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
1112 }
1113}
1114
1115
1116
1117
1118
1119
1120
1121void page_remove_rmap(struct page *page)
1122{
1123
1124 if (!atomic_add_negative(-1, &page->_mapcount))
1125 return;
1126
1127
1128
1129
1130
1131
1132
1133
1134 if ((!PageAnon(page) || PageSwapCache(page)) &&
1135 page_test_and_clear_dirty(page_to_pfn(page), 1))
1136 set_page_dirty(page);
1137
1138
1139
1140
1141 if (unlikely(PageHuge(page)))
1142 return;
1143 if (PageAnon(page)) {
1144 mem_cgroup_uncharge_page(page);
1145 if (!PageTransHuge(page))
1146 __dec_zone_page_state(page, NR_ANON_PAGES);
1147 else
1148 __dec_zone_page_state(page,
1149 NR_ANON_TRANSPARENT_HUGEPAGES);
1150 } else {
1151 __dec_zone_page_state(page, NR_FILE_MAPPED);
1152 mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
1153 }
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163}
1164
1165
1166
1167
1168
1169int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1170 unsigned long address, enum ttu_flags flags)
1171{
1172 struct mm_struct *mm = vma->vm_mm;
1173 pte_t *pte;
1174 pte_t pteval;
1175 spinlock_t *ptl;
1176 int ret = SWAP_AGAIN;
1177
1178 pte = page_check_address(page, mm, address, &ptl, 0);
1179 if (!pte)
1180 goto out;
1181
1182
1183
1184
1185
1186
1187 if (!(flags & TTU_IGNORE_MLOCK)) {
1188 if (vma->vm_flags & VM_LOCKED)
1189 goto out_mlock;
1190
1191 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1192 goto out_unmap;
1193 }
1194 if (!(flags & TTU_IGNORE_ACCESS)) {
1195 if (ptep_clear_flush_young_notify(vma, address, pte)) {
1196 ret = SWAP_FAIL;
1197 goto out_unmap;
1198 }
1199 }
1200
1201
1202 flush_cache_page(vma, address, page_to_pfn(page));
1203 pteval = ptep_clear_flush_notify(vma, address, pte);
1204
1205
1206 if (pte_dirty(pteval))
1207 set_page_dirty(page);
1208
1209
1210 update_hiwater_rss(mm);
1211
1212 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
1213 if (PageAnon(page))
1214 dec_mm_counter(mm, MM_ANONPAGES);
1215 else
1216 dec_mm_counter(mm, MM_FILEPAGES);
1217 set_pte_at(mm, address, pte,
1218 swp_entry_to_pte(make_hwpoison_entry(page)));
1219 } else if (PageAnon(page)) {
1220 swp_entry_t entry = { .val = page_private(page) };
1221
1222 if (PageSwapCache(page)) {
1223
1224
1225
1226
1227 if (swap_duplicate(entry) < 0) {
1228 set_pte_at(mm, address, pte, pteval);
1229 ret = SWAP_FAIL;
1230 goto out_unmap;
1231 }
1232 if (list_empty(&mm->mmlist)) {
1233 spin_lock(&mmlist_lock);
1234 if (list_empty(&mm->mmlist))
1235 list_add(&mm->mmlist, &init_mm.mmlist);
1236 spin_unlock(&mmlist_lock);
1237 }
1238 dec_mm_counter(mm, MM_ANONPAGES);
1239 inc_mm_counter(mm, MM_SWAPENTS);
1240 } else if (PAGE_MIGRATION) {
1241
1242
1243
1244
1245
1246 BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
1247 entry = make_migration_entry(page, pte_write(pteval));
1248 }
1249 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1250 BUG_ON(pte_file(*pte));
1251 } else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
1252
1253 swp_entry_t entry;
1254 entry = make_migration_entry(page, pte_write(pteval));
1255 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1256 } else
1257 dec_mm_counter(mm, MM_FILEPAGES);
1258
1259 page_remove_rmap(page);
1260 page_cache_release(page);
1261
1262out_unmap:
1263 pte_unmap_unlock(pte, ptl);
1264out:
1265 return ret;
1266
1267out_mlock:
1268 pte_unmap_unlock(pte, ptl);
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1280 if (vma->vm_flags & VM_LOCKED) {
1281 mlock_vma_page(page);
1282 ret = SWAP_MLOCK;
1283 }
1284 up_read(&vma->vm_mm->mmap_sem);
1285 }
1286 return ret;
1287}
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
1314#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
1315
1316static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1317 struct vm_area_struct *vma, struct page *check_page)
1318{
1319 struct mm_struct *mm = vma->vm_mm;
1320 pgd_t *pgd;
1321 pud_t *pud;
1322 pmd_t *pmd;
1323 pte_t *pte;
1324 pte_t pteval;
1325 spinlock_t *ptl;
1326 struct page *page;
1327 unsigned long address;
1328 unsigned long end;
1329 int ret = SWAP_AGAIN;
1330 int locked_vma = 0;
1331
1332 address = (vma->vm_start + cursor) & CLUSTER_MASK;
1333 end = address + CLUSTER_SIZE;
1334 if (address < vma->vm_start)
1335 address = vma->vm_start;
1336 if (end > vma->vm_end)
1337 end = vma->vm_end;
1338
1339 pgd = pgd_offset(mm, address);
1340 if (!pgd_present(*pgd))
1341 return ret;
1342
1343 pud = pud_offset(pgd, address);
1344 if (!pud_present(*pud))
1345 return ret;
1346
1347 pmd = pmd_offset(pud, address);
1348 if (!pmd_present(*pmd))
1349 return ret;
1350
1351
1352
1353
1354
1355 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1356 locked_vma = (vma->vm_flags & VM_LOCKED);
1357 if (!locked_vma)
1358 up_read(&vma->vm_mm->mmap_sem);
1359 }
1360
1361 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
1362
1363
1364 update_hiwater_rss(mm);
1365
1366 for (; address < end; pte++, address += PAGE_SIZE) {
1367 if (!pte_present(*pte))
1368 continue;
1369 page = vm_normal_page(vma, address, *pte);
1370 BUG_ON(!page || PageAnon(page));
1371
1372 if (locked_vma) {
1373 mlock_vma_page(page);
1374 if (page == check_page)
1375 ret = SWAP_MLOCK;
1376 continue;
1377 }
1378
1379 if (ptep_clear_flush_young_notify(vma, address, pte))
1380 continue;
1381
1382
1383 flush_cache_page(vma, address, pte_pfn(*pte));
1384 pteval = ptep_clear_flush_notify(vma, address, pte);
1385
1386
1387 if (page->index != linear_page_index(vma, address))
1388 set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
1389
1390
1391 if (pte_dirty(pteval))
1392 set_page_dirty(page);
1393
1394 page_remove_rmap(page);
1395 page_cache_release(page);
1396 dec_mm_counter(mm, MM_FILEPAGES);
1397 (*mapcount)--;
1398 }
1399 pte_unmap_unlock(pte - 1, ptl);
1400 if (locked_vma)
1401 up_read(&vma->vm_mm->mmap_sem);
1402 return ret;
1403}
1404
1405bool is_vma_temporary_stack(struct vm_area_struct *vma)
1406{
1407 int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
1408
1409 if (!maybe_stack)
1410 return false;
1411
1412 if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
1413 VM_STACK_INCOMPLETE_SETUP)
1414 return true;
1415
1416 return false;
1417}
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1436{
1437 struct anon_vma *anon_vma;
1438 struct anon_vma_chain *avc;
1439 int ret = SWAP_AGAIN;
1440
1441 anon_vma = page_lock_anon_vma(page);
1442 if (!anon_vma)
1443 return ret;
1444
1445 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1446 struct vm_area_struct *vma = avc->vma;
1447 unsigned long address;
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457 if (PAGE_MIGRATION && (flags & TTU_MIGRATION) &&
1458 is_vma_temporary_stack(vma))
1459 continue;
1460
1461 address = vma_address(page, vma);
1462 if (address == -EFAULT)
1463 continue;
1464 ret = try_to_unmap_one(page, vma, address, flags);
1465 if (ret != SWAP_AGAIN || !page_mapped(page))
1466 break;
1467 }
1468
1469 page_unlock_anon_vma(anon_vma);
1470 return ret;
1471}
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1489{
1490 struct address_space *mapping = page->mapping;
1491 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1492 struct vm_area_struct *vma;
1493 struct prio_tree_iter iter;
1494 int ret = SWAP_AGAIN;
1495 unsigned long cursor;
1496 unsigned long max_nl_cursor = 0;
1497 unsigned long max_nl_size = 0;
1498 unsigned int mapcount;
1499
1500 mutex_lock(&mapping->i_mmap_mutex);
1501 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1502 unsigned long address = vma_address(page, vma);
1503 if (address == -EFAULT)
1504 continue;
1505 ret = try_to_unmap_one(page, vma, address, flags);
1506 if (ret != SWAP_AGAIN || !page_mapped(page))
1507 goto out;
1508 }
1509
1510 if (list_empty(&mapping->i_mmap_nonlinear))
1511 goto out;
1512
1513
1514
1515
1516
1517
1518 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1519 goto out;
1520
1521 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1522 shared.vm_set.list) {
1523 cursor = (unsigned long) vma->vm_private_data;
1524 if (cursor > max_nl_cursor)
1525 max_nl_cursor = cursor;
1526 cursor = vma->vm_end - vma->vm_start;
1527 if (cursor > max_nl_size)
1528 max_nl_size = cursor;
1529 }
1530
1531 if (max_nl_size == 0) {
1532 ret = SWAP_FAIL;
1533 goto out;
1534 }
1535
1536
1537
1538
1539
1540
1541
1542
1543 mapcount = page_mapcount(page);
1544 if (!mapcount)
1545 goto out;
1546 cond_resched();
1547
1548 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
1549 if (max_nl_cursor == 0)
1550 max_nl_cursor = CLUSTER_SIZE;
1551
1552 do {
1553 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1554 shared.vm_set.list) {
1555 cursor = (unsigned long) vma->vm_private_data;
1556 while ( cursor < max_nl_cursor &&
1557 cursor < vma->vm_end - vma->vm_start) {
1558 if (try_to_unmap_cluster(cursor, &mapcount,
1559 vma, page) == SWAP_MLOCK)
1560 ret = SWAP_MLOCK;
1561 cursor += CLUSTER_SIZE;
1562 vma->vm_private_data = (void *) cursor;
1563 if ((int)mapcount <= 0)
1564 goto out;
1565 }
1566 vma->vm_private_data = (void *) max_nl_cursor;
1567 }
1568 cond_resched();
1569 max_nl_cursor += CLUSTER_SIZE;
1570 } while (max_nl_cursor <= max_nl_size);
1571
1572
1573
1574
1575
1576
1577 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
1578 vma->vm_private_data = NULL;
1579out:
1580 mutex_unlock(&mapping->i_mmap_mutex);
1581 return ret;
1582}
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598int try_to_unmap(struct page *page, enum ttu_flags flags)
1599{
1600 int ret;
1601
1602 BUG_ON(!PageLocked(page));
1603 VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
1604
1605 if (unlikely(PageKsm(page)))
1606 ret = try_to_unmap_ksm(page, flags);
1607 else if (PageAnon(page))
1608 ret = try_to_unmap_anon(page, flags);
1609 else
1610 ret = try_to_unmap_file(page, flags);
1611 if (ret != SWAP_MLOCK && !page_mapped(page))
1612 ret = SWAP_SUCCESS;
1613 return ret;
1614}
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631int try_to_munlock(struct page *page)
1632{
1633 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1634
1635 if (unlikely(PageKsm(page)))
1636 return try_to_unmap_ksm(page, TTU_MUNLOCK);
1637 else if (PageAnon(page))
1638 return try_to_unmap_anon(page, TTU_MUNLOCK);
1639 else
1640 return try_to_unmap_file(page, TTU_MUNLOCK);
1641}
1642
1643void __put_anon_vma(struct anon_vma *anon_vma)
1644{
1645 struct anon_vma *root = anon_vma->root;
1646
1647 if (root != anon_vma && atomic_dec_and_test(&root->refcount))
1648 anon_vma_free(root);
1649
1650 anon_vma_free(anon_vma);
1651}
1652
1653#ifdef CONFIG_MIGRATION
1654
1655
1656
1657
1658static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1659 struct vm_area_struct *, unsigned long, void *), void *arg)
1660{
1661 struct anon_vma *anon_vma;
1662 struct anon_vma_chain *avc;
1663 int ret = SWAP_AGAIN;
1664
1665
1666
1667
1668
1669
1670
1671 anon_vma = page_anon_vma(page);
1672 if (!anon_vma)
1673 return ret;
1674 anon_vma_lock(anon_vma);
1675 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1676 struct vm_area_struct *vma = avc->vma;
1677 unsigned long address = vma_address(page, vma);
1678 if (address == -EFAULT)
1679 continue;
1680 ret = rmap_one(page, vma, address, arg);
1681 if (ret != SWAP_AGAIN)
1682 break;
1683 }
1684 anon_vma_unlock(anon_vma);
1685 return ret;
1686}
1687
1688static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1689 struct vm_area_struct *, unsigned long, void *), void *arg)
1690{
1691 struct address_space *mapping = page->mapping;
1692 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1693 struct vm_area_struct *vma;
1694 struct prio_tree_iter iter;
1695 int ret = SWAP_AGAIN;
1696
1697 if (!mapping)
1698 return ret;
1699 mutex_lock(&mapping->i_mmap_mutex);
1700 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1701 unsigned long address = vma_address(page, vma);
1702 if (address == -EFAULT)
1703 continue;
1704 ret = rmap_one(page, vma, address, arg);
1705 if (ret != SWAP_AGAIN)
1706 break;
1707 }
1708
1709
1710
1711
1712
1713 mutex_unlock(&mapping->i_mmap_mutex);
1714 return ret;
1715}
1716
1717int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
1718 struct vm_area_struct *, unsigned long, void *), void *arg)
1719{
1720 VM_BUG_ON(!PageLocked(page));
1721
1722 if (unlikely(PageKsm(page)))
1723 return rmap_walk_ksm(page, rmap_one, arg);
1724 else if (PageAnon(page))
1725 return rmap_walk_anon(page, rmap_one, arg);
1726 else
1727 return rmap_walk_file(page, rmap_one, arg);
1728}
1729#endif
1730
1731#ifdef CONFIG_HUGETLB_PAGE
1732
1733
1734
1735
1736
1737static void __hugepage_set_anon_rmap(struct page *page,
1738 struct vm_area_struct *vma, unsigned long address, int exclusive)
1739{
1740 struct anon_vma *anon_vma = vma->anon_vma;
1741
1742 BUG_ON(!anon_vma);
1743
1744 if (PageAnon(page))
1745 return;
1746 if (!exclusive)
1747 anon_vma = anon_vma->root;
1748
1749 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1750 page->mapping = (struct address_space *) anon_vma;
1751 page->index = linear_page_index(vma, address);
1752}
1753
1754void hugepage_add_anon_rmap(struct page *page,
1755 struct vm_area_struct *vma, unsigned long address)
1756{
1757 struct anon_vma *anon_vma = vma->anon_vma;
1758 int first;
1759
1760 BUG_ON(!PageLocked(page));
1761 BUG_ON(!anon_vma);
1762
1763 first = atomic_inc_and_test(&page->_mapcount);
1764 if (first)
1765 __hugepage_set_anon_rmap(page, vma, address, 0);
1766}
1767
1768void hugepage_add_new_anon_rmap(struct page *page,
1769 struct vm_area_struct *vma, unsigned long address)
1770{
1771 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1772 atomic_set(&page->_mapcount, 0);
1773 __hugepage_set_anon_rmap(page, vma, address, 1);
1774}
1775#endif
1776