1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45#include <linux/mm.h>
46#include <linux/pagemap.h>
47#include <linux/swap.h>
48#include <linux/swapops.h>
49#include <linux/slab.h>
50#include <linux/init.h>
51#include <linux/ksm.h>
52#include <linux/rmap.h>
53#include <linux/rcupdate.h>
54#include <linux/export.h>
55#include <linux/memcontrol.h>
56#include <linux/mmu_notifier.h>
57#include <linux/migrate.h>
58#include <linux/hugetlb.h>
59#include <linux/backing-dev.h>
60
61#include <asm/tlbflush.h>
62
63#include "internal.h"
64
65static struct kmem_cache *anon_vma_cachep;
66static struct kmem_cache *anon_vma_chain_cachep;
67
68static inline struct anon_vma *anon_vma_alloc(void)
69{
70 struct anon_vma *anon_vma;
71
72 anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
73 if (anon_vma) {
74 atomic_set(&anon_vma->refcount, 1);
75
76
77
78
79 anon_vma->root = anon_vma;
80 }
81
82 return anon_vma;
83}
84
85static inline void anon_vma_free(struct anon_vma *anon_vma)
86{
87 VM_BUG_ON(atomic_read(&anon_vma->refcount));
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106 if (mutex_is_locked(&anon_vma->root->mutex)) {
107 anon_vma_lock(anon_vma);
108 anon_vma_unlock(anon_vma);
109 }
110
111 kmem_cache_free(anon_vma_cachep, anon_vma);
112}
113
114static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
115{
116 return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
117}
118
119static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
120{
121 kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
122}
123
124static void anon_vma_chain_link(struct vm_area_struct *vma,
125 struct anon_vma_chain *avc,
126 struct anon_vma *anon_vma)
127{
128 avc->vma = vma;
129 avc->anon_vma = anon_vma;
130 list_add(&avc->same_vma, &vma->anon_vma_chain);
131
132
133
134
135
136 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
137}
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166int anon_vma_prepare(struct vm_area_struct *vma)
167{
168 struct anon_vma *anon_vma = vma->anon_vma;
169 struct anon_vma_chain *avc;
170
171 might_sleep();
172 if (unlikely(!anon_vma)) {
173 struct mm_struct *mm = vma->vm_mm;
174 struct anon_vma *allocated;
175
176 avc = anon_vma_chain_alloc(GFP_KERNEL);
177 if (!avc)
178 goto out_enomem;
179
180 anon_vma = find_mergeable_anon_vma(vma);
181 allocated = NULL;
182 if (!anon_vma) {
183 anon_vma = anon_vma_alloc();
184 if (unlikely(!anon_vma))
185 goto out_enomem_free_avc;
186 allocated = anon_vma;
187 }
188
189 anon_vma_lock(anon_vma);
190
191 spin_lock(&mm->page_table_lock);
192 if (likely(!vma->anon_vma)) {
193 vma->anon_vma = anon_vma;
194 anon_vma_chain_link(vma, avc, anon_vma);
195 allocated = NULL;
196 avc = NULL;
197 }
198 spin_unlock(&mm->page_table_lock);
199 anon_vma_unlock(anon_vma);
200
201 if (unlikely(allocated))
202 put_anon_vma(allocated);
203 if (unlikely(avc))
204 anon_vma_chain_free(avc);
205 }
206 return 0;
207
208 out_enomem_free_avc:
209 anon_vma_chain_free(avc);
210 out_enomem:
211 return -ENOMEM;
212}
213
214
215
216
217
218
219
220
221
222static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
223{
224 struct anon_vma *new_root = anon_vma->root;
225 if (new_root != root) {
226 if (WARN_ON_ONCE(root))
227 mutex_unlock(&root->mutex);
228 root = new_root;
229 mutex_lock(&root->mutex);
230 }
231 return root;
232}
233
234static inline void unlock_anon_vma_root(struct anon_vma *root)
235{
236 if (root)
237 mutex_unlock(&root->mutex);
238}
239
240
241
242
243
244int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
245{
246 struct anon_vma_chain *avc, *pavc;
247 struct anon_vma *root = NULL;
248
249 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
250 struct anon_vma *anon_vma;
251
252 avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
253 if (unlikely(!avc)) {
254 unlock_anon_vma_root(root);
255 root = NULL;
256 avc = anon_vma_chain_alloc(GFP_KERNEL);
257 if (!avc)
258 goto enomem_failure;
259 }
260 anon_vma = pavc->anon_vma;
261 root = lock_anon_vma_root(root, anon_vma);
262 anon_vma_chain_link(dst, avc, anon_vma);
263 }
264 unlock_anon_vma_root(root);
265 return 0;
266
267 enomem_failure:
268 unlink_anon_vmas(dst);
269 return -ENOMEM;
270}
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302void anon_vma_moveto_tail(struct vm_area_struct *dst)
303{
304 struct anon_vma_chain *pavc;
305 struct anon_vma *root = NULL;
306
307 list_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) {
308 struct anon_vma *anon_vma = pavc->anon_vma;
309 VM_BUG_ON(pavc->vma != dst);
310 root = lock_anon_vma_root(root, anon_vma);
311 list_del(&pavc->same_anon_vma);
312 list_add_tail(&pavc->same_anon_vma, &anon_vma->head);
313 }
314 unlock_anon_vma_root(root);
315}
316
317
318
319
320
321
322int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
323{
324 struct anon_vma_chain *avc;
325 struct anon_vma *anon_vma;
326
327
328 if (!pvma->anon_vma)
329 return 0;
330
331
332
333
334
335 if (anon_vma_clone(vma, pvma))
336 return -ENOMEM;
337
338
339 anon_vma = anon_vma_alloc();
340 if (!anon_vma)
341 goto out_error;
342 avc = anon_vma_chain_alloc(GFP_KERNEL);
343 if (!avc)
344 goto out_error_free_anon_vma;
345
346
347
348
349
350 anon_vma->root = pvma->anon_vma->root;
351
352
353
354
355
356 get_anon_vma(anon_vma->root);
357
358 vma->anon_vma = anon_vma;
359 anon_vma_lock(anon_vma);
360 anon_vma_chain_link(vma, avc, anon_vma);
361 anon_vma_unlock(anon_vma);
362
363 return 0;
364
365 out_error_free_anon_vma:
366 put_anon_vma(anon_vma);
367 out_error:
368 unlink_anon_vmas(vma);
369 return -ENOMEM;
370}
371
372void unlink_anon_vmas(struct vm_area_struct *vma)
373{
374 struct anon_vma_chain *avc, *next;
375 struct anon_vma *root = NULL;
376
377
378
379
380
381 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
382 struct anon_vma *anon_vma = avc->anon_vma;
383
384 root = lock_anon_vma_root(root, anon_vma);
385 list_del(&avc->same_anon_vma);
386
387
388
389
390
391 if (list_empty(&anon_vma->head))
392 continue;
393
394 list_del(&avc->same_vma);
395 anon_vma_chain_free(avc);
396 }
397 unlock_anon_vma_root(root);
398
399
400
401
402
403
404 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
405 struct anon_vma *anon_vma = avc->anon_vma;
406
407 put_anon_vma(anon_vma);
408
409 list_del(&avc->same_vma);
410 anon_vma_chain_free(avc);
411 }
412}
413
414static void anon_vma_ctor(void *data)
415{
416 struct anon_vma *anon_vma = data;
417
418 mutex_init(&anon_vma->mutex);
419 atomic_set(&anon_vma->refcount, 0);
420 INIT_LIST_HEAD(&anon_vma->head);
421}
422
423void __init anon_vma_init(void)
424{
425 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
426 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
427 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
428}
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453struct anon_vma *page_get_anon_vma(struct page *page)
454{
455 struct anon_vma *anon_vma = NULL;
456 unsigned long anon_mapping;
457
458 rcu_read_lock();
459 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
460 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
461 goto out;
462 if (!page_mapped(page))
463 goto out;
464
465 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
466 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
467 anon_vma = NULL;
468 goto out;
469 }
470
471
472
473
474
475
476
477
478 if (!page_mapped(page)) {
479 put_anon_vma(anon_vma);
480 anon_vma = NULL;
481 }
482out:
483 rcu_read_unlock();
484
485 return anon_vma;
486}
487
488
489
490
491
492
493
494
495struct anon_vma *page_lock_anon_vma(struct page *page)
496{
497 struct anon_vma *anon_vma = NULL;
498 struct anon_vma *root_anon_vma;
499 unsigned long anon_mapping;
500
501 rcu_read_lock();
502 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
503 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
504 goto out;
505 if (!page_mapped(page))
506 goto out;
507
508 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
509 root_anon_vma = ACCESS_ONCE(anon_vma->root);
510 if (mutex_trylock(&root_anon_vma->mutex)) {
511
512
513
514
515
516 if (!page_mapped(page)) {
517 mutex_unlock(&root_anon_vma->mutex);
518 anon_vma = NULL;
519 }
520 goto out;
521 }
522
523
524 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
525 anon_vma = NULL;
526 goto out;
527 }
528
529 if (!page_mapped(page)) {
530 put_anon_vma(anon_vma);
531 anon_vma = NULL;
532 goto out;
533 }
534
535
536 rcu_read_unlock();
537 anon_vma_lock(anon_vma);
538
539 if (atomic_dec_and_test(&anon_vma->refcount)) {
540
541
542
543
544
545 anon_vma_unlock(anon_vma);
546 __put_anon_vma(anon_vma);
547 anon_vma = NULL;
548 }
549
550 return anon_vma;
551
552out:
553 rcu_read_unlock();
554 return anon_vma;
555}
556
557void page_unlock_anon_vma(struct anon_vma *anon_vma)
558{
559 anon_vma_unlock(anon_vma);
560}
561
562
563
564
565
566
567inline unsigned long
568vma_address(struct page *page, struct vm_area_struct *vma)
569{
570 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
571 unsigned long address;
572
573 if (unlikely(is_vm_hugetlb_page(vma)))
574 pgoff = page->index << huge_page_order(page_hstate(page));
575 address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
576 if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
577
578 return -EFAULT;
579 }
580 return address;
581}
582
583
584
585
586
587unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
588{
589 if (PageAnon(page)) {
590 struct anon_vma *page__anon_vma = page_anon_vma(page);
591
592
593
594
595 if (!vma->anon_vma || !page__anon_vma ||
596 vma->anon_vma->root != page__anon_vma->root)
597 return -EFAULT;
598 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
599 if (!vma->vm_file ||
600 vma->vm_file->f_mapping != page->mapping)
601 return -EFAULT;
602 } else
603 return -EFAULT;
604 return vma_address(page, vma);
605}
606
607
608
609
610
611
612
613
614
615
616pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
617 unsigned long address, spinlock_t **ptlp, int sync)
618{
619 pgd_t *pgd;
620 pud_t *pud;
621 pmd_t *pmd;
622 pte_t *pte;
623 spinlock_t *ptl;
624
625 if (unlikely(PageHuge(page))) {
626 pte = huge_pte_offset(mm, address);
627 ptl = &mm->page_table_lock;
628 goto check;
629 }
630
631 pgd = pgd_offset(mm, address);
632 if (!pgd_present(*pgd))
633 return NULL;
634
635 pud = pud_offset(pgd, address);
636 if (!pud_present(*pud))
637 return NULL;
638
639 pmd = pmd_offset(pud, address);
640 if (!pmd_present(*pmd))
641 return NULL;
642 if (pmd_trans_huge(*pmd))
643 return NULL;
644
645 pte = pte_offset_map(pmd, address);
646
647 if (!sync && !pte_present(*pte)) {
648 pte_unmap(pte);
649 return NULL;
650 }
651
652 ptl = pte_lockptr(mm, pmd);
653check:
654 spin_lock(ptl);
655 if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
656 *ptlp = ptl;
657 return pte;
658 }
659 pte_unmap_unlock(pte, ptl);
660 return NULL;
661}
662
663
664
665
666
667
668
669
670
671
672int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
673{
674 unsigned long address;
675 pte_t *pte;
676 spinlock_t *ptl;
677
678 address = vma_address(page, vma);
679 if (address == -EFAULT)
680 return 0;
681 pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
682 if (!pte)
683 return 0;
684 pte_unmap_unlock(pte, ptl);
685
686 return 1;
687}
688
689
690
691
692
693int page_referenced_one(struct page *page, struct vm_area_struct *vma,
694 unsigned long address, unsigned int *mapcount,
695 unsigned long *vm_flags)
696{
697 struct mm_struct *mm = vma->vm_mm;
698 int referenced = 0;
699
700 if (unlikely(PageTransHuge(page))) {
701 pmd_t *pmd;
702
703 spin_lock(&mm->page_table_lock);
704
705
706
707
708 pmd = page_check_address_pmd(page, mm, address,
709 PAGE_CHECK_ADDRESS_PMD_FLAG);
710 if (!pmd) {
711 spin_unlock(&mm->page_table_lock);
712 goto out;
713 }
714
715 if (vma->vm_flags & VM_LOCKED) {
716 spin_unlock(&mm->page_table_lock);
717 *mapcount = 0;
718 *vm_flags |= VM_LOCKED;
719 goto out;
720 }
721
722
723 if (pmdp_clear_flush_young_notify(vma, address, pmd))
724 referenced++;
725 spin_unlock(&mm->page_table_lock);
726 } else {
727 pte_t *pte;
728 spinlock_t *ptl;
729
730
731
732
733
734 pte = page_check_address(page, mm, address, &ptl, 0);
735 if (!pte)
736 goto out;
737
738 if (vma->vm_flags & VM_LOCKED) {
739 pte_unmap_unlock(pte, ptl);
740 *mapcount = 0;
741 *vm_flags |= VM_LOCKED;
742 goto out;
743 }
744
745 if (ptep_clear_flush_young_notify(vma, address, pte)) {
746
747
748
749
750
751
752
753 if (likely(!VM_SequentialReadHint(vma)))
754 referenced++;
755 }
756 pte_unmap_unlock(pte, ptl);
757 }
758
759 (*mapcount)--;
760
761 if (referenced)
762 *vm_flags |= vma->vm_flags;
763out:
764 return referenced;
765}
766
767static int page_referenced_anon(struct page *page,
768 struct mem_cgroup *memcg,
769 unsigned long *vm_flags)
770{
771 unsigned int mapcount;
772 struct anon_vma *anon_vma;
773 struct anon_vma_chain *avc;
774 int referenced = 0;
775
776 anon_vma = page_lock_anon_vma(page);
777 if (!anon_vma)
778 return referenced;
779
780 mapcount = page_mapcount(page);
781 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
782 struct vm_area_struct *vma = avc->vma;
783 unsigned long address = vma_address(page, vma);
784 if (address == -EFAULT)
785 continue;
786
787
788
789
790
791 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
792 continue;
793 referenced += page_referenced_one(page, vma, address,
794 &mapcount, vm_flags);
795 if (!mapcount)
796 break;
797 }
798
799 page_unlock_anon_vma(anon_vma);
800 return referenced;
801}
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816static int page_referenced_file(struct page *page,
817 struct mem_cgroup *memcg,
818 unsigned long *vm_flags)
819{
820 unsigned int mapcount;
821 struct address_space *mapping = page->mapping;
822 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
823 struct vm_area_struct *vma;
824 struct prio_tree_iter iter;
825 int referenced = 0;
826
827
828
829
830
831
832 BUG_ON(PageAnon(page));
833
834
835
836
837
838
839
840 BUG_ON(!PageLocked(page));
841
842 mutex_lock(&mapping->i_mmap_mutex);
843
844
845
846
847
848 mapcount = page_mapcount(page);
849
850 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
851 unsigned long address = vma_address(page, vma);
852 if (address == -EFAULT)
853 continue;
854
855
856
857
858
859 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
860 continue;
861 referenced += page_referenced_one(page, vma, address,
862 &mapcount, vm_flags);
863 if (!mapcount)
864 break;
865 }
866
867 mutex_unlock(&mapping->i_mmap_mutex);
868 return referenced;
869}
870
871
872
873
874
875
876
877
878
879
880
881int page_referenced(struct page *page,
882 int is_locked,
883 struct mem_cgroup *memcg,
884 unsigned long *vm_flags)
885{
886 int referenced = 0;
887 int we_locked = 0;
888
889 *vm_flags = 0;
890 if (page_mapped(page) && page_rmapping(page)) {
891 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
892 we_locked = trylock_page(page);
893 if (!we_locked) {
894 referenced++;
895 goto out;
896 }
897 }
898 if (unlikely(PageKsm(page)))
899 referenced += page_referenced_ksm(page, memcg,
900 vm_flags);
901 else if (PageAnon(page))
902 referenced += page_referenced_anon(page, memcg,
903 vm_flags);
904 else if (page->mapping)
905 referenced += page_referenced_file(page, memcg,
906 vm_flags);
907 if (we_locked)
908 unlock_page(page);
909
910 if (page_test_and_clear_young(page_to_pfn(page)))
911 referenced++;
912 }
913out:
914 return referenced;
915}
916
917static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
918 unsigned long address)
919{
920 struct mm_struct *mm = vma->vm_mm;
921 pte_t *pte;
922 spinlock_t *ptl;
923 int ret = 0;
924
925 pte = page_check_address(page, mm, address, &ptl, 1);
926 if (!pte)
927 goto out;
928
929 if (pte_dirty(*pte) || pte_write(*pte)) {
930 pte_t entry;
931
932 flush_cache_page(vma, address, pte_pfn(*pte));
933 entry = ptep_clear_flush_notify(vma, address, pte);
934 entry = pte_wrprotect(entry);
935 entry = pte_mkclean(entry);
936 set_pte_at(mm, address, pte, entry);
937 ret = 1;
938 }
939
940 pte_unmap_unlock(pte, ptl);
941out:
942 return ret;
943}
944
945static int page_mkclean_file(struct address_space *mapping, struct page *page)
946{
947 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
948 struct vm_area_struct *vma;
949 struct prio_tree_iter iter;
950 int ret = 0;
951
952 BUG_ON(PageAnon(page));
953
954 mutex_lock(&mapping->i_mmap_mutex);
955 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
956 if (vma->vm_flags & VM_SHARED) {
957 unsigned long address = vma_address(page, vma);
958 if (address == -EFAULT)
959 continue;
960 ret += page_mkclean_one(page, vma, address);
961 }
962 }
963 mutex_unlock(&mapping->i_mmap_mutex);
964 return ret;
965}
966
967int page_mkclean(struct page *page)
968{
969 int ret = 0;
970
971 BUG_ON(!PageLocked(page));
972
973 if (page_mapped(page)) {
974 struct address_space *mapping = page_mapping(page);
975 if (mapping)
976 ret = page_mkclean_file(mapping, page);
977 }
978
979 return ret;
980}
981EXPORT_SYMBOL_GPL(page_mkclean);
982
983
984
985
986
987
988
989
990
991
992
993
994void page_move_anon_rmap(struct page *page,
995 struct vm_area_struct *vma, unsigned long address)
996{
997 struct anon_vma *anon_vma = vma->anon_vma;
998
999 VM_BUG_ON(!PageLocked(page));
1000 VM_BUG_ON(!anon_vma);
1001 VM_BUG_ON(page->index != linear_page_index(vma, address));
1002
1003 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1004 page->mapping = (struct address_space *) anon_vma;
1005}
1006
1007
1008
1009
1010
1011
1012
1013
1014static void __page_set_anon_rmap(struct page *page,
1015 struct vm_area_struct *vma, unsigned long address, int exclusive)
1016{
1017 struct anon_vma *anon_vma = vma->anon_vma;
1018
1019 BUG_ON(!anon_vma);
1020
1021 if (PageAnon(page))
1022 return;
1023
1024
1025
1026
1027
1028
1029 if (!exclusive)
1030 anon_vma = anon_vma->root;
1031
1032 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1033 page->mapping = (struct address_space *) anon_vma;
1034 page->index = linear_page_index(vma, address);
1035}
1036
1037
1038
1039
1040
1041
1042
1043static void __page_check_anon_rmap(struct page *page,
1044 struct vm_area_struct *vma, unsigned long address)
1045{
1046#ifdef CONFIG_DEBUG_VM
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059 BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
1060 BUG_ON(page->index != linear_page_index(vma, address));
1061#endif
1062}
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075void page_add_anon_rmap(struct page *page,
1076 struct vm_area_struct *vma, unsigned long address)
1077{
1078 do_page_add_anon_rmap(page, vma, address, 0);
1079}
1080
1081
1082
1083
1084
1085
1086void do_page_add_anon_rmap(struct page *page,
1087 struct vm_area_struct *vma, unsigned long address, int exclusive)
1088{
1089 int first = atomic_inc_and_test(&page->_mapcount);
1090 if (first) {
1091 if (!PageTransHuge(page))
1092 __inc_zone_page_state(page, NR_ANON_PAGES);
1093 else
1094 __inc_zone_page_state(page,
1095 NR_ANON_TRANSPARENT_HUGEPAGES);
1096 }
1097 if (unlikely(PageKsm(page)))
1098 return;
1099
1100 VM_BUG_ON(!PageLocked(page));
1101
1102 if (first)
1103 __page_set_anon_rmap(page, vma, address, exclusive);
1104 else
1105 __page_check_anon_rmap(page, vma, address);
1106}
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118void page_add_new_anon_rmap(struct page *page,
1119 struct vm_area_struct *vma, unsigned long address)
1120{
1121 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1122 SetPageSwapBacked(page);
1123 atomic_set(&page->_mapcount, 0);
1124 if (!PageTransHuge(page))
1125 __inc_zone_page_state(page, NR_ANON_PAGES);
1126 else
1127 __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1128 __page_set_anon_rmap(page, vma, address, 1);
1129 if (page_evictable(page, vma))
1130 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
1131 else
1132 add_page_to_unevictable_list(page);
1133}
1134
1135
1136
1137
1138
1139
1140
1141void page_add_file_rmap(struct page *page)
1142{
1143 bool locked;
1144 unsigned long flags;
1145
1146 mem_cgroup_begin_update_page_stat(page, &locked, &flags);
1147 if (atomic_inc_and_test(&page->_mapcount)) {
1148 __inc_zone_page_state(page, NR_FILE_MAPPED);
1149 mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
1150 }
1151 mem_cgroup_end_update_page_stat(page, &locked, &flags);
1152}
1153
1154
1155
1156
1157
1158
1159
1160void page_remove_rmap(struct page *page)
1161{
1162 struct address_space *mapping = page_mapping(page);
1163 bool anon = PageAnon(page);
1164 bool locked;
1165 unsigned long flags;
1166
1167
1168
1169
1170
1171
1172 if (!anon)
1173 mem_cgroup_begin_update_page_stat(page, &locked, &flags);
1174
1175
1176 if (!atomic_add_negative(-1, &page->_mapcount))
1177 goto out;
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197 if (mapping && !mapping_cap_account_dirty(mapping) &&
1198 page_test_and_clear_dirty(page_to_pfn(page), 1))
1199 set_page_dirty(page);
1200
1201
1202
1203
1204 if (unlikely(PageHuge(page)))
1205 goto out;
1206 if (anon) {
1207 mem_cgroup_uncharge_page(page);
1208 if (!PageTransHuge(page))
1209 __dec_zone_page_state(page, NR_ANON_PAGES);
1210 else
1211 __dec_zone_page_state(page,
1212 NR_ANON_TRANSPARENT_HUGEPAGES);
1213 } else {
1214 __dec_zone_page_state(page, NR_FILE_MAPPED);
1215 mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
1216 }
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226out:
1227 if (!anon)
1228 mem_cgroup_end_update_page_stat(page, &locked, &flags);
1229}
1230
1231
1232
1233
1234
1235int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1236 unsigned long address, enum ttu_flags flags)
1237{
1238 struct mm_struct *mm = vma->vm_mm;
1239 pte_t *pte;
1240 pte_t pteval;
1241 spinlock_t *ptl;
1242 int ret = SWAP_AGAIN;
1243
1244 pte = page_check_address(page, mm, address, &ptl, 0);
1245 if (!pte)
1246 goto out;
1247
1248
1249
1250
1251
1252
1253 if (!(flags & TTU_IGNORE_MLOCK)) {
1254 if (vma->vm_flags & VM_LOCKED)
1255 goto out_mlock;
1256
1257 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1258 goto out_unmap;
1259 }
1260 if (!(flags & TTU_IGNORE_ACCESS)) {
1261 if (ptep_clear_flush_young_notify(vma, address, pte)) {
1262 ret = SWAP_FAIL;
1263 goto out_unmap;
1264 }
1265 }
1266
1267
1268 flush_cache_page(vma, address, page_to_pfn(page));
1269 pteval = ptep_clear_flush_notify(vma, address, pte);
1270
1271
1272 if (pte_dirty(pteval))
1273 set_page_dirty(page);
1274
1275
1276 update_hiwater_rss(mm);
1277
1278 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
1279 if (PageAnon(page))
1280 dec_mm_counter(mm, MM_ANONPAGES);
1281 else
1282 dec_mm_counter(mm, MM_FILEPAGES);
1283 set_pte_at(mm, address, pte,
1284 swp_entry_to_pte(make_hwpoison_entry(page)));
1285 } else if (PageAnon(page)) {
1286 swp_entry_t entry = { .val = page_private(page) };
1287
1288 if (PageSwapCache(page)) {
1289
1290
1291
1292
1293 if (swap_duplicate(entry) < 0) {
1294 set_pte_at(mm, address, pte, pteval);
1295 ret = SWAP_FAIL;
1296 goto out_unmap;
1297 }
1298 if (list_empty(&mm->mmlist)) {
1299 spin_lock(&mmlist_lock);
1300 if (list_empty(&mm->mmlist))
1301 list_add(&mm->mmlist, &init_mm.mmlist);
1302 spin_unlock(&mmlist_lock);
1303 }
1304 dec_mm_counter(mm, MM_ANONPAGES);
1305 inc_mm_counter(mm, MM_SWAPENTS);
1306 } else if (IS_ENABLED(CONFIG_MIGRATION)) {
1307
1308
1309
1310
1311
1312 BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
1313 entry = make_migration_entry(page, pte_write(pteval));
1314 }
1315 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1316 BUG_ON(pte_file(*pte));
1317 } else if (IS_ENABLED(CONFIG_MIGRATION) &&
1318 (TTU_ACTION(flags) == TTU_MIGRATION)) {
1319
1320 swp_entry_t entry;
1321 entry = make_migration_entry(page, pte_write(pteval));
1322 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1323 } else
1324 dec_mm_counter(mm, MM_FILEPAGES);
1325
1326 page_remove_rmap(page);
1327 page_cache_release(page);
1328
1329out_unmap:
1330 pte_unmap_unlock(pte, ptl);
1331out:
1332 return ret;
1333
1334out_mlock:
1335 pte_unmap_unlock(pte, ptl);
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1347 if (vma->vm_flags & VM_LOCKED) {
1348 mlock_vma_page(page);
1349 ret = SWAP_MLOCK;
1350 }
1351 up_read(&vma->vm_mm->mmap_sem);
1352 }
1353 return ret;
1354}
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
1381#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
1382
1383static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1384 struct vm_area_struct *vma, struct page *check_page)
1385{
1386 struct mm_struct *mm = vma->vm_mm;
1387 pgd_t *pgd;
1388 pud_t *pud;
1389 pmd_t *pmd;
1390 pte_t *pte;
1391 pte_t pteval;
1392 spinlock_t *ptl;
1393 struct page *page;
1394 unsigned long address;
1395 unsigned long end;
1396 int ret = SWAP_AGAIN;
1397 int locked_vma = 0;
1398
1399 address = (vma->vm_start + cursor) & CLUSTER_MASK;
1400 end = address + CLUSTER_SIZE;
1401 if (address < vma->vm_start)
1402 address = vma->vm_start;
1403 if (end > vma->vm_end)
1404 end = vma->vm_end;
1405
1406 pgd = pgd_offset(mm, address);
1407 if (!pgd_present(*pgd))
1408 return ret;
1409
1410 pud = pud_offset(pgd, address);
1411 if (!pud_present(*pud))
1412 return ret;
1413
1414 pmd = pmd_offset(pud, address);
1415 if (!pmd_present(*pmd))
1416 return ret;
1417
1418
1419
1420
1421
1422 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1423 locked_vma = (vma->vm_flags & VM_LOCKED);
1424 if (!locked_vma)
1425 up_read(&vma->vm_mm->mmap_sem);
1426 }
1427
1428 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
1429
1430
1431 update_hiwater_rss(mm);
1432
1433 for (; address < end; pte++, address += PAGE_SIZE) {
1434 if (!pte_present(*pte))
1435 continue;
1436 page = vm_normal_page(vma, address, *pte);
1437 BUG_ON(!page || PageAnon(page));
1438
1439 if (locked_vma) {
1440 mlock_vma_page(page);
1441 if (page == check_page)
1442 ret = SWAP_MLOCK;
1443 continue;
1444 }
1445
1446 if (ptep_clear_flush_young_notify(vma, address, pte))
1447 continue;
1448
1449
1450 flush_cache_page(vma, address, pte_pfn(*pte));
1451 pteval = ptep_clear_flush_notify(vma, address, pte);
1452
1453
1454 if (page->index != linear_page_index(vma, address))
1455 set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
1456
1457
1458 if (pte_dirty(pteval))
1459 set_page_dirty(page);
1460
1461 page_remove_rmap(page);
1462 page_cache_release(page);
1463 dec_mm_counter(mm, MM_FILEPAGES);
1464 (*mapcount)--;
1465 }
1466 pte_unmap_unlock(pte - 1, ptl);
1467 if (locked_vma)
1468 up_read(&vma->vm_mm->mmap_sem);
1469 return ret;
1470}
1471
1472bool is_vma_temporary_stack(struct vm_area_struct *vma)
1473{
1474 int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
1475
1476 if (!maybe_stack)
1477 return false;
1478
1479 if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
1480 VM_STACK_INCOMPLETE_SETUP)
1481 return true;
1482
1483 return false;
1484}
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1503{
1504 struct anon_vma *anon_vma;
1505 struct anon_vma_chain *avc;
1506 int ret = SWAP_AGAIN;
1507
1508 anon_vma = page_lock_anon_vma(page);
1509 if (!anon_vma)
1510 return ret;
1511
1512 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1513 struct vm_area_struct *vma = avc->vma;
1514 unsigned long address;
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524 if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
1525 is_vma_temporary_stack(vma))
1526 continue;
1527
1528 address = vma_address(page, vma);
1529 if (address == -EFAULT)
1530 continue;
1531 ret = try_to_unmap_one(page, vma, address, flags);
1532 if (ret != SWAP_AGAIN || !page_mapped(page))
1533 break;
1534 }
1535
1536 page_unlock_anon_vma(anon_vma);
1537 return ret;
1538}
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1556{
1557 struct address_space *mapping = page->mapping;
1558 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1559 struct vm_area_struct *vma;
1560 struct prio_tree_iter iter;
1561 int ret = SWAP_AGAIN;
1562 unsigned long cursor;
1563 unsigned long max_nl_cursor = 0;
1564 unsigned long max_nl_size = 0;
1565 unsigned int mapcount;
1566
1567 mutex_lock(&mapping->i_mmap_mutex);
1568 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1569 unsigned long address = vma_address(page, vma);
1570 if (address == -EFAULT)
1571 continue;
1572 ret = try_to_unmap_one(page, vma, address, flags);
1573 if (ret != SWAP_AGAIN || !page_mapped(page))
1574 goto out;
1575 }
1576
1577 if (list_empty(&mapping->i_mmap_nonlinear))
1578 goto out;
1579
1580
1581
1582
1583
1584
1585 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1586 goto out;
1587
1588 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1589 shared.vm_set.list) {
1590 cursor = (unsigned long) vma->vm_private_data;
1591 if (cursor > max_nl_cursor)
1592 max_nl_cursor = cursor;
1593 cursor = vma->vm_end - vma->vm_start;
1594 if (cursor > max_nl_size)
1595 max_nl_size = cursor;
1596 }
1597
1598 if (max_nl_size == 0) {
1599 ret = SWAP_FAIL;
1600 goto out;
1601 }
1602
1603
1604
1605
1606
1607
1608
1609
1610 mapcount = page_mapcount(page);
1611 if (!mapcount)
1612 goto out;
1613 cond_resched();
1614
1615 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
1616 if (max_nl_cursor == 0)
1617 max_nl_cursor = CLUSTER_SIZE;
1618
1619 do {
1620 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1621 shared.vm_set.list) {
1622 cursor = (unsigned long) vma->vm_private_data;
1623 while ( cursor < max_nl_cursor &&
1624 cursor < vma->vm_end - vma->vm_start) {
1625 if (try_to_unmap_cluster(cursor, &mapcount,
1626 vma, page) == SWAP_MLOCK)
1627 ret = SWAP_MLOCK;
1628 cursor += CLUSTER_SIZE;
1629 vma->vm_private_data = (void *) cursor;
1630 if ((int)mapcount <= 0)
1631 goto out;
1632 }
1633 vma->vm_private_data = (void *) max_nl_cursor;
1634 }
1635 cond_resched();
1636 max_nl_cursor += CLUSTER_SIZE;
1637 } while (max_nl_cursor <= max_nl_size);
1638
1639
1640
1641
1642
1643
1644 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
1645 vma->vm_private_data = NULL;
1646out:
1647 mutex_unlock(&mapping->i_mmap_mutex);
1648 return ret;
1649}
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665int try_to_unmap(struct page *page, enum ttu_flags flags)
1666{
1667 int ret;
1668
1669 BUG_ON(!PageLocked(page));
1670 VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
1671
1672 if (unlikely(PageKsm(page)))
1673 ret = try_to_unmap_ksm(page, flags);
1674 else if (PageAnon(page))
1675 ret = try_to_unmap_anon(page, flags);
1676 else
1677 ret = try_to_unmap_file(page, flags);
1678 if (ret != SWAP_MLOCK && !page_mapped(page))
1679 ret = SWAP_SUCCESS;
1680 return ret;
1681}
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698int try_to_munlock(struct page *page)
1699{
1700 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1701
1702 if (unlikely(PageKsm(page)))
1703 return try_to_unmap_ksm(page, TTU_MUNLOCK);
1704 else if (PageAnon(page))
1705 return try_to_unmap_anon(page, TTU_MUNLOCK);
1706 else
1707 return try_to_unmap_file(page, TTU_MUNLOCK);
1708}
1709
1710void __put_anon_vma(struct anon_vma *anon_vma)
1711{
1712 struct anon_vma *root = anon_vma->root;
1713
1714 if (root != anon_vma && atomic_dec_and_test(&root->refcount))
1715 anon_vma_free(root);
1716
1717 anon_vma_free(anon_vma);
1718}
1719
1720#ifdef CONFIG_MIGRATION
1721
1722
1723
1724
1725static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1726 struct vm_area_struct *, unsigned long, void *), void *arg)
1727{
1728 struct anon_vma *anon_vma;
1729 struct anon_vma_chain *avc;
1730 int ret = SWAP_AGAIN;
1731
1732
1733
1734
1735
1736
1737
1738 anon_vma = page_anon_vma(page);
1739 if (!anon_vma)
1740 return ret;
1741 anon_vma_lock(anon_vma);
1742 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1743 struct vm_area_struct *vma = avc->vma;
1744 unsigned long address = vma_address(page, vma);
1745 if (address == -EFAULT)
1746 continue;
1747 ret = rmap_one(page, vma, address, arg);
1748 if (ret != SWAP_AGAIN)
1749 break;
1750 }
1751 anon_vma_unlock(anon_vma);
1752 return ret;
1753}
1754
1755static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1756 struct vm_area_struct *, unsigned long, void *), void *arg)
1757{
1758 struct address_space *mapping = page->mapping;
1759 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1760 struct vm_area_struct *vma;
1761 struct prio_tree_iter iter;
1762 int ret = SWAP_AGAIN;
1763
1764 if (!mapping)
1765 return ret;
1766 mutex_lock(&mapping->i_mmap_mutex);
1767 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1768 unsigned long address = vma_address(page, vma);
1769 if (address == -EFAULT)
1770 continue;
1771 ret = rmap_one(page, vma, address, arg);
1772 if (ret != SWAP_AGAIN)
1773 break;
1774 }
1775
1776
1777
1778
1779
1780 mutex_unlock(&mapping->i_mmap_mutex);
1781 return ret;
1782}
1783
1784int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
1785 struct vm_area_struct *, unsigned long, void *), void *arg)
1786{
1787 VM_BUG_ON(!PageLocked(page));
1788
1789 if (unlikely(PageKsm(page)))
1790 return rmap_walk_ksm(page, rmap_one, arg);
1791 else if (PageAnon(page))
1792 return rmap_walk_anon(page, rmap_one, arg);
1793 else
1794 return rmap_walk_file(page, rmap_one, arg);
1795}
1796#endif
1797
1798#ifdef CONFIG_HUGETLB_PAGE
1799
1800
1801
1802
1803
1804static void __hugepage_set_anon_rmap(struct page *page,
1805 struct vm_area_struct *vma, unsigned long address, int exclusive)
1806{
1807 struct anon_vma *anon_vma = vma->anon_vma;
1808
1809 BUG_ON(!anon_vma);
1810
1811 if (PageAnon(page))
1812 return;
1813 if (!exclusive)
1814 anon_vma = anon_vma->root;
1815
1816 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1817 page->mapping = (struct address_space *) anon_vma;
1818 page->index = linear_page_index(vma, address);
1819}
1820
1821void hugepage_add_anon_rmap(struct page *page,
1822 struct vm_area_struct *vma, unsigned long address)
1823{
1824 struct anon_vma *anon_vma = vma->anon_vma;
1825 int first;
1826
1827 BUG_ON(!PageLocked(page));
1828 BUG_ON(!anon_vma);
1829
1830 first = atomic_inc_and_test(&page->_mapcount);
1831 if (first)
1832 __hugepage_set_anon_rmap(page, vma, address, 0);
1833}
1834
1835void hugepage_add_new_anon_rmap(struct page *page,
1836 struct vm_area_struct *vma, unsigned long address)
1837{
1838 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1839 atomic_set(&page->_mapcount, 0);
1840 __hugepage_set_anon_rmap(page, vma, address, 1);
1841}
1842#endif
1843