1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45#include <linux/mm.h>
46#include <linux/pagemap.h>
47#include <linux/swap.h>
48#include <linux/swapops.h>
49#include <linux/slab.h>
50#include <linux/init.h>
51#include <linux/ksm.h>
52#include <linux/rmap.h>
53#include <linux/rcupdate.h>
54#include <linux/export.h>
55#include <linux/memcontrol.h>
56#include <linux/mmu_notifier.h>
57#include <linux/migrate.h>
58#include <linux/hugetlb.h>
59
60#include <asm/tlbflush.h>
61
62#include "internal.h"
63
64static struct kmem_cache *anon_vma_cachep;
65static struct kmem_cache *anon_vma_chain_cachep;
66
67static inline struct anon_vma *anon_vma_alloc(void)
68{
69 struct anon_vma *anon_vma;
70
71 anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
72 if (anon_vma) {
73 atomic_set(&anon_vma->refcount, 1);
74
75
76
77
78 anon_vma->root = anon_vma;
79 }
80
81 return anon_vma;
82}
83
84static inline void anon_vma_free(struct anon_vma *anon_vma)
85{
86 VM_BUG_ON(atomic_read(&anon_vma->refcount));
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105 if (mutex_is_locked(&anon_vma->root->mutex)) {
106 anon_vma_lock(anon_vma);
107 anon_vma_unlock(anon_vma);
108 }
109
110 kmem_cache_free(anon_vma_cachep, anon_vma);
111}
112
113static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
114{
115 return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
116}
117
118static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
119{
120 kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
121}
122
123static void anon_vma_chain_link(struct vm_area_struct *vma,
124 struct anon_vma_chain *avc,
125 struct anon_vma *anon_vma)
126{
127 avc->vma = vma;
128 avc->anon_vma = anon_vma;
129 list_add(&avc->same_vma, &vma->anon_vma_chain);
130
131
132
133
134
135 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
136}
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165int anon_vma_prepare(struct vm_area_struct *vma)
166{
167 struct anon_vma *anon_vma = vma->anon_vma;
168 struct anon_vma_chain *avc;
169
170 might_sleep();
171 if (unlikely(!anon_vma)) {
172 struct mm_struct *mm = vma->vm_mm;
173 struct anon_vma *allocated;
174
175 avc = anon_vma_chain_alloc(GFP_KERNEL);
176 if (!avc)
177 goto out_enomem;
178
179 anon_vma = find_mergeable_anon_vma(vma);
180 allocated = NULL;
181 if (!anon_vma) {
182 anon_vma = anon_vma_alloc();
183 if (unlikely(!anon_vma))
184 goto out_enomem_free_avc;
185 allocated = anon_vma;
186 }
187
188 anon_vma_lock(anon_vma);
189
190 spin_lock(&mm->page_table_lock);
191 if (likely(!vma->anon_vma)) {
192 vma->anon_vma = anon_vma;
193 anon_vma_chain_link(vma, avc, anon_vma);
194 allocated = NULL;
195 avc = NULL;
196 }
197 spin_unlock(&mm->page_table_lock);
198 anon_vma_unlock(anon_vma);
199
200 if (unlikely(allocated))
201 put_anon_vma(allocated);
202 if (unlikely(avc))
203 anon_vma_chain_free(avc);
204 }
205 return 0;
206
207 out_enomem_free_avc:
208 anon_vma_chain_free(avc);
209 out_enomem:
210 return -ENOMEM;
211}
212
213
214
215
216
217
218
219
220
221static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
222{
223 struct anon_vma *new_root = anon_vma->root;
224 if (new_root != root) {
225 if (WARN_ON_ONCE(root))
226 mutex_unlock(&root->mutex);
227 root = new_root;
228 mutex_lock(&root->mutex);
229 }
230 return root;
231}
232
233static inline void unlock_anon_vma_root(struct anon_vma *root)
234{
235 if (root)
236 mutex_unlock(&root->mutex);
237}
238
239
240
241
242
243int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
244{
245 struct anon_vma_chain *avc, *pavc;
246 struct anon_vma *root = NULL;
247
248 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
249 struct anon_vma *anon_vma;
250
251 avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
252 if (unlikely(!avc)) {
253 unlock_anon_vma_root(root);
254 root = NULL;
255 avc = anon_vma_chain_alloc(GFP_KERNEL);
256 if (!avc)
257 goto enomem_failure;
258 }
259 anon_vma = pavc->anon_vma;
260 root = lock_anon_vma_root(root, anon_vma);
261 anon_vma_chain_link(dst, avc, anon_vma);
262 }
263 unlock_anon_vma_root(root);
264 return 0;
265
266 enomem_failure:
267 unlink_anon_vmas(dst);
268 return -ENOMEM;
269}
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301void anon_vma_moveto_tail(struct vm_area_struct *dst)
302{
303 struct anon_vma_chain *pavc;
304 struct anon_vma *root = NULL;
305
306 list_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) {
307 struct anon_vma *anon_vma = pavc->anon_vma;
308 VM_BUG_ON(pavc->vma != dst);
309 root = lock_anon_vma_root(root, anon_vma);
310 list_del(&pavc->same_anon_vma);
311 list_add_tail(&pavc->same_anon_vma, &anon_vma->head);
312 }
313 unlock_anon_vma_root(root);
314}
315
316
317
318
319
320
321int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
322{
323 struct anon_vma_chain *avc;
324 struct anon_vma *anon_vma;
325
326
327 if (!pvma->anon_vma)
328 return 0;
329
330
331
332
333
334 if (anon_vma_clone(vma, pvma))
335 return -ENOMEM;
336
337
338 anon_vma = anon_vma_alloc();
339 if (!anon_vma)
340 goto out_error;
341 avc = anon_vma_chain_alloc(GFP_KERNEL);
342 if (!avc)
343 goto out_error_free_anon_vma;
344
345
346
347
348
349 anon_vma->root = pvma->anon_vma->root;
350
351
352
353
354
355 get_anon_vma(anon_vma->root);
356
357 vma->anon_vma = anon_vma;
358 anon_vma_lock(anon_vma);
359 anon_vma_chain_link(vma, avc, anon_vma);
360 anon_vma_unlock(anon_vma);
361
362 return 0;
363
364 out_error_free_anon_vma:
365 put_anon_vma(anon_vma);
366 out_error:
367 unlink_anon_vmas(vma);
368 return -ENOMEM;
369}
370
371void unlink_anon_vmas(struct vm_area_struct *vma)
372{
373 struct anon_vma_chain *avc, *next;
374 struct anon_vma *root = NULL;
375
376
377
378
379
380 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
381 struct anon_vma *anon_vma = avc->anon_vma;
382
383 root = lock_anon_vma_root(root, anon_vma);
384 list_del(&avc->same_anon_vma);
385
386
387
388
389
390 if (list_empty(&anon_vma->head))
391 continue;
392
393 list_del(&avc->same_vma);
394 anon_vma_chain_free(avc);
395 }
396 unlock_anon_vma_root(root);
397
398
399
400
401
402
403 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
404 struct anon_vma *anon_vma = avc->anon_vma;
405
406 put_anon_vma(anon_vma);
407
408 list_del(&avc->same_vma);
409 anon_vma_chain_free(avc);
410 }
411}
412
413static void anon_vma_ctor(void *data)
414{
415 struct anon_vma *anon_vma = data;
416
417 mutex_init(&anon_vma->mutex);
418 atomic_set(&anon_vma->refcount, 0);
419 INIT_LIST_HEAD(&anon_vma->head);
420}
421
422void __init anon_vma_init(void)
423{
424 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
425 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
426 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
427}
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452struct anon_vma *page_get_anon_vma(struct page *page)
453{
454 struct anon_vma *anon_vma = NULL;
455 unsigned long anon_mapping;
456
457 rcu_read_lock();
458 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
459 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
460 goto out;
461 if (!page_mapped(page))
462 goto out;
463
464 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
465 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
466 anon_vma = NULL;
467 goto out;
468 }
469
470
471
472
473
474
475
476
477 if (!page_mapped(page)) {
478 put_anon_vma(anon_vma);
479 anon_vma = NULL;
480 }
481out:
482 rcu_read_unlock();
483
484 return anon_vma;
485}
486
487
488
489
490
491
492
493
494struct anon_vma *page_lock_anon_vma(struct page *page)
495{
496 struct anon_vma *anon_vma = NULL;
497 struct anon_vma *root_anon_vma;
498 unsigned long anon_mapping;
499
500 rcu_read_lock();
501 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
502 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
503 goto out;
504 if (!page_mapped(page))
505 goto out;
506
507 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
508 root_anon_vma = ACCESS_ONCE(anon_vma->root);
509 if (mutex_trylock(&root_anon_vma->mutex)) {
510
511
512
513
514
515 if (!page_mapped(page)) {
516 mutex_unlock(&root_anon_vma->mutex);
517 anon_vma = NULL;
518 }
519 goto out;
520 }
521
522
523 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
524 anon_vma = NULL;
525 goto out;
526 }
527
528 if (!page_mapped(page)) {
529 put_anon_vma(anon_vma);
530 anon_vma = NULL;
531 goto out;
532 }
533
534
535 rcu_read_unlock();
536 anon_vma_lock(anon_vma);
537
538 if (atomic_dec_and_test(&anon_vma->refcount)) {
539
540
541
542
543
544 anon_vma_unlock(anon_vma);
545 __put_anon_vma(anon_vma);
546 anon_vma = NULL;
547 }
548
549 return anon_vma;
550
551out:
552 rcu_read_unlock();
553 return anon_vma;
554}
555
556void page_unlock_anon_vma(struct anon_vma *anon_vma)
557{
558 anon_vma_unlock(anon_vma);
559}
560
561
562
563
564
565
566inline unsigned long
567vma_address(struct page *page, struct vm_area_struct *vma)
568{
569 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
570 unsigned long address;
571
572 if (unlikely(is_vm_hugetlb_page(vma)))
573 pgoff = page->index << huge_page_order(page_hstate(page));
574 address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
575 if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
576
577 return -EFAULT;
578 }
579 return address;
580}
581
582
583
584
585
586unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
587{
588 if (PageAnon(page)) {
589 struct anon_vma *page__anon_vma = page_anon_vma(page);
590
591
592
593
594 if (!vma->anon_vma || !page__anon_vma ||
595 vma->anon_vma->root != page__anon_vma->root)
596 return -EFAULT;
597 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
598 if (!vma->vm_file ||
599 vma->vm_file->f_mapping != page->mapping)
600 return -EFAULT;
601 } else
602 return -EFAULT;
603 return vma_address(page, vma);
604}
605
606
607
608
609
610
611
612
613
614
615pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
616 unsigned long address, spinlock_t **ptlp, int sync)
617{
618 pgd_t *pgd;
619 pud_t *pud;
620 pmd_t *pmd;
621 pte_t *pte;
622 spinlock_t *ptl;
623
624 if (unlikely(PageHuge(page))) {
625 pte = huge_pte_offset(mm, address);
626 ptl = &mm->page_table_lock;
627 goto check;
628 }
629
630 pgd = pgd_offset(mm, address);
631 if (!pgd_present(*pgd))
632 return NULL;
633
634 pud = pud_offset(pgd, address);
635 if (!pud_present(*pud))
636 return NULL;
637
638 pmd = pmd_offset(pud, address);
639 if (!pmd_present(*pmd))
640 return NULL;
641 if (pmd_trans_huge(*pmd))
642 return NULL;
643
644 pte = pte_offset_map(pmd, address);
645
646 if (!sync && !pte_present(*pte)) {
647 pte_unmap(pte);
648 return NULL;
649 }
650
651 ptl = pte_lockptr(mm, pmd);
652check:
653 spin_lock(ptl);
654 if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
655 *ptlp = ptl;
656 return pte;
657 }
658 pte_unmap_unlock(pte, ptl);
659 return NULL;
660}
661
662
663
664
665
666
667
668
669
670
671int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
672{
673 unsigned long address;
674 pte_t *pte;
675 spinlock_t *ptl;
676
677 address = vma_address(page, vma);
678 if (address == -EFAULT)
679 return 0;
680 pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
681 if (!pte)
682 return 0;
683 pte_unmap_unlock(pte, ptl);
684
685 return 1;
686}
687
688
689
690
691
692int page_referenced_one(struct page *page, struct vm_area_struct *vma,
693 unsigned long address, unsigned int *mapcount,
694 unsigned long *vm_flags)
695{
696 struct mm_struct *mm = vma->vm_mm;
697 int referenced = 0;
698
699 if (unlikely(PageTransHuge(page))) {
700 pmd_t *pmd;
701
702 spin_lock(&mm->page_table_lock);
703
704
705
706
707 pmd = page_check_address_pmd(page, mm, address,
708 PAGE_CHECK_ADDRESS_PMD_FLAG);
709 if (!pmd) {
710 spin_unlock(&mm->page_table_lock);
711 goto out;
712 }
713
714 if (vma->vm_flags & VM_LOCKED) {
715 spin_unlock(&mm->page_table_lock);
716 *mapcount = 0;
717 *vm_flags |= VM_LOCKED;
718 goto out;
719 }
720
721
722 if (pmdp_clear_flush_young_notify(vma, address, pmd))
723 referenced++;
724 spin_unlock(&mm->page_table_lock);
725 } else {
726 pte_t *pte;
727 spinlock_t *ptl;
728
729
730
731
732
733 pte = page_check_address(page, mm, address, &ptl, 0);
734 if (!pte)
735 goto out;
736
737 if (vma->vm_flags & VM_LOCKED) {
738 pte_unmap_unlock(pte, ptl);
739 *mapcount = 0;
740 *vm_flags |= VM_LOCKED;
741 goto out;
742 }
743
744 if (ptep_clear_flush_young_notify(vma, address, pte)) {
745
746
747
748
749
750
751
752 if (likely(!VM_SequentialReadHint(vma)))
753 referenced++;
754 }
755 pte_unmap_unlock(pte, ptl);
756 }
757
758 (*mapcount)--;
759
760 if (referenced)
761 *vm_flags |= vma->vm_flags;
762out:
763 return referenced;
764}
765
766static int page_referenced_anon(struct page *page,
767 struct mem_cgroup *memcg,
768 unsigned long *vm_flags)
769{
770 unsigned int mapcount;
771 struct anon_vma *anon_vma;
772 struct anon_vma_chain *avc;
773 int referenced = 0;
774
775 anon_vma = page_lock_anon_vma(page);
776 if (!anon_vma)
777 return referenced;
778
779 mapcount = page_mapcount(page);
780 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
781 struct vm_area_struct *vma = avc->vma;
782 unsigned long address = vma_address(page, vma);
783 if (address == -EFAULT)
784 continue;
785
786
787
788
789
790 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
791 continue;
792 referenced += page_referenced_one(page, vma, address,
793 &mapcount, vm_flags);
794 if (!mapcount)
795 break;
796 }
797
798 page_unlock_anon_vma(anon_vma);
799 return referenced;
800}
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815static int page_referenced_file(struct page *page,
816 struct mem_cgroup *memcg,
817 unsigned long *vm_flags)
818{
819 unsigned int mapcount;
820 struct address_space *mapping = page->mapping;
821 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
822 struct vm_area_struct *vma;
823 struct prio_tree_iter iter;
824 int referenced = 0;
825
826
827
828
829
830
831 BUG_ON(PageAnon(page));
832
833
834
835
836
837
838
839 BUG_ON(!PageLocked(page));
840
841 mutex_lock(&mapping->i_mmap_mutex);
842
843
844
845
846
847 mapcount = page_mapcount(page);
848
849 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
850 unsigned long address = vma_address(page, vma);
851 if (address == -EFAULT)
852 continue;
853
854
855
856
857
858 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
859 continue;
860 referenced += page_referenced_one(page, vma, address,
861 &mapcount, vm_flags);
862 if (!mapcount)
863 break;
864 }
865
866 mutex_unlock(&mapping->i_mmap_mutex);
867 return referenced;
868}
869
870
871
872
873
874
875
876
877
878
879
880int page_referenced(struct page *page,
881 int is_locked,
882 struct mem_cgroup *memcg,
883 unsigned long *vm_flags)
884{
885 int referenced = 0;
886 int we_locked = 0;
887
888 *vm_flags = 0;
889 if (page_mapped(page) && page_rmapping(page)) {
890 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
891 we_locked = trylock_page(page);
892 if (!we_locked) {
893 referenced++;
894 goto out;
895 }
896 }
897 if (unlikely(PageKsm(page)))
898 referenced += page_referenced_ksm(page, memcg,
899 vm_flags);
900 else if (PageAnon(page))
901 referenced += page_referenced_anon(page, memcg,
902 vm_flags);
903 else if (page->mapping)
904 referenced += page_referenced_file(page, memcg,
905 vm_flags);
906 if (we_locked)
907 unlock_page(page);
908
909 if (page_test_and_clear_young(page_to_pfn(page)))
910 referenced++;
911 }
912out:
913 return referenced;
914}
915
916static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
917 unsigned long address)
918{
919 struct mm_struct *mm = vma->vm_mm;
920 pte_t *pte;
921 spinlock_t *ptl;
922 int ret = 0;
923
924 pte = page_check_address(page, mm, address, &ptl, 1);
925 if (!pte)
926 goto out;
927
928 if (pte_dirty(*pte) || pte_write(*pte)) {
929 pte_t entry;
930
931 flush_cache_page(vma, address, pte_pfn(*pte));
932 entry = ptep_clear_flush_notify(vma, address, pte);
933 entry = pte_wrprotect(entry);
934 entry = pte_mkclean(entry);
935 set_pte_at(mm, address, pte, entry);
936 ret = 1;
937 }
938
939 pte_unmap_unlock(pte, ptl);
940out:
941 return ret;
942}
943
944static int page_mkclean_file(struct address_space *mapping, struct page *page)
945{
946 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
947 struct vm_area_struct *vma;
948 struct prio_tree_iter iter;
949 int ret = 0;
950
951 BUG_ON(PageAnon(page));
952
953 mutex_lock(&mapping->i_mmap_mutex);
954 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
955 if (vma->vm_flags & VM_SHARED) {
956 unsigned long address = vma_address(page, vma);
957 if (address == -EFAULT)
958 continue;
959 ret += page_mkclean_one(page, vma, address);
960 }
961 }
962 mutex_unlock(&mapping->i_mmap_mutex);
963 return ret;
964}
965
966int page_mkclean(struct page *page)
967{
968 int ret = 0;
969
970 BUG_ON(!PageLocked(page));
971
972 if (page_mapped(page)) {
973 struct address_space *mapping = page_mapping(page);
974 if (mapping) {
975 ret = page_mkclean_file(mapping, page);
976 if (page_test_and_clear_dirty(page_to_pfn(page), 1))
977 ret = 1;
978 }
979 }
980
981 return ret;
982}
983EXPORT_SYMBOL_GPL(page_mkclean);
984
985
986
987
988
989
990
991
992
993
994
995
996void page_move_anon_rmap(struct page *page,
997 struct vm_area_struct *vma, unsigned long address)
998{
999 struct anon_vma *anon_vma = vma->anon_vma;
1000
1001 VM_BUG_ON(!PageLocked(page));
1002 VM_BUG_ON(!anon_vma);
1003 VM_BUG_ON(page->index != linear_page_index(vma, address));
1004
1005 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1006 page->mapping = (struct address_space *) anon_vma;
1007}
1008
1009
1010
1011
1012
1013
1014
1015
1016static void __page_set_anon_rmap(struct page *page,
1017 struct vm_area_struct *vma, unsigned long address, int exclusive)
1018{
1019 struct anon_vma *anon_vma = vma->anon_vma;
1020
1021 BUG_ON(!anon_vma);
1022
1023 if (PageAnon(page))
1024 return;
1025
1026
1027
1028
1029
1030
1031 if (!exclusive)
1032 anon_vma = anon_vma->root;
1033
1034 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1035 page->mapping = (struct address_space *) anon_vma;
1036 page->index = linear_page_index(vma, address);
1037}
1038
1039
1040
1041
1042
1043
1044
1045static void __page_check_anon_rmap(struct page *page,
1046 struct vm_area_struct *vma, unsigned long address)
1047{
1048#ifdef CONFIG_DEBUG_VM
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061 BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
1062 BUG_ON(page->index != linear_page_index(vma, address));
1063#endif
1064}
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077void page_add_anon_rmap(struct page *page,
1078 struct vm_area_struct *vma, unsigned long address)
1079{
1080 do_page_add_anon_rmap(page, vma, address, 0);
1081}
1082
1083
1084
1085
1086
1087
1088void do_page_add_anon_rmap(struct page *page,
1089 struct vm_area_struct *vma, unsigned long address, int exclusive)
1090{
1091 int first = atomic_inc_and_test(&page->_mapcount);
1092 if (first) {
1093 if (!PageTransHuge(page))
1094 __inc_zone_page_state(page, NR_ANON_PAGES);
1095 else
1096 __inc_zone_page_state(page,
1097 NR_ANON_TRANSPARENT_HUGEPAGES);
1098 }
1099 if (unlikely(PageKsm(page)))
1100 return;
1101
1102 VM_BUG_ON(!PageLocked(page));
1103
1104 if (first)
1105 __page_set_anon_rmap(page, vma, address, exclusive);
1106 else
1107 __page_check_anon_rmap(page, vma, address);
1108}
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120void page_add_new_anon_rmap(struct page *page,
1121 struct vm_area_struct *vma, unsigned long address)
1122{
1123 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1124 SetPageSwapBacked(page);
1125 atomic_set(&page->_mapcount, 0);
1126 if (!PageTransHuge(page))
1127 __inc_zone_page_state(page, NR_ANON_PAGES);
1128 else
1129 __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1130 __page_set_anon_rmap(page, vma, address, 1);
1131 if (page_evictable(page, vma))
1132 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
1133 else
1134 add_page_to_unevictable_list(page);
1135}
1136
1137
1138
1139
1140
1141
1142
1143void page_add_file_rmap(struct page *page)
1144{
1145 bool locked;
1146 unsigned long flags;
1147
1148 mem_cgroup_begin_update_page_stat(page, &locked, &flags);
1149 if (atomic_inc_and_test(&page->_mapcount)) {
1150 __inc_zone_page_state(page, NR_FILE_MAPPED);
1151 mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
1152 }
1153 mem_cgroup_end_update_page_stat(page, &locked, &flags);
1154}
1155
1156
1157
1158
1159
1160
1161
1162void page_remove_rmap(struct page *page)
1163{
1164 bool anon = PageAnon(page);
1165 bool locked;
1166 unsigned long flags;
1167
1168
1169
1170
1171
1172
1173 if (!anon)
1174 mem_cgroup_begin_update_page_stat(page, &locked, &flags);
1175
1176
1177 if (!atomic_add_negative(-1, &page->_mapcount))
1178 goto out;
1179
1180
1181
1182
1183
1184
1185
1186
1187 if ((!anon || PageSwapCache(page)) &&
1188 page_test_and_clear_dirty(page_to_pfn(page), 1))
1189 set_page_dirty(page);
1190
1191
1192
1193
1194 if (unlikely(PageHuge(page)))
1195 goto out;
1196 if (anon) {
1197 mem_cgroup_uncharge_page(page);
1198 if (!PageTransHuge(page))
1199 __dec_zone_page_state(page, NR_ANON_PAGES);
1200 else
1201 __dec_zone_page_state(page,
1202 NR_ANON_TRANSPARENT_HUGEPAGES);
1203 } else {
1204 __dec_zone_page_state(page, NR_FILE_MAPPED);
1205 mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
1206 }
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216out:
1217 if (!anon)
1218 mem_cgroup_end_update_page_stat(page, &locked, &flags);
1219}
1220
1221
1222
1223
1224
1225int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1226 unsigned long address, enum ttu_flags flags)
1227{
1228 struct mm_struct *mm = vma->vm_mm;
1229 pte_t *pte;
1230 pte_t pteval;
1231 spinlock_t *ptl;
1232 int ret = SWAP_AGAIN;
1233
1234 pte = page_check_address(page, mm, address, &ptl, 0);
1235 if (!pte)
1236 goto out;
1237
1238
1239
1240
1241
1242
1243 if (!(flags & TTU_IGNORE_MLOCK)) {
1244 if (vma->vm_flags & VM_LOCKED)
1245 goto out_mlock;
1246
1247 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1248 goto out_unmap;
1249 }
1250 if (!(flags & TTU_IGNORE_ACCESS)) {
1251 if (ptep_clear_flush_young_notify(vma, address, pte)) {
1252 ret = SWAP_FAIL;
1253 goto out_unmap;
1254 }
1255 }
1256
1257
1258 flush_cache_page(vma, address, page_to_pfn(page));
1259 pteval = ptep_clear_flush_notify(vma, address, pte);
1260
1261
1262 if (pte_dirty(pteval))
1263 set_page_dirty(page);
1264
1265
1266 update_hiwater_rss(mm);
1267
1268 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
1269 if (PageAnon(page))
1270 dec_mm_counter(mm, MM_ANONPAGES);
1271 else
1272 dec_mm_counter(mm, MM_FILEPAGES);
1273 set_pte_at(mm, address, pte,
1274 swp_entry_to_pte(make_hwpoison_entry(page)));
1275 } else if (PageAnon(page)) {
1276 swp_entry_t entry = { .val = page_private(page) };
1277
1278 if (PageSwapCache(page)) {
1279
1280
1281
1282
1283 if (swap_duplicate(entry) < 0) {
1284 set_pte_at(mm, address, pte, pteval);
1285 ret = SWAP_FAIL;
1286 goto out_unmap;
1287 }
1288 if (list_empty(&mm->mmlist)) {
1289 spin_lock(&mmlist_lock);
1290 if (list_empty(&mm->mmlist))
1291 list_add(&mm->mmlist, &init_mm.mmlist);
1292 spin_unlock(&mmlist_lock);
1293 }
1294 dec_mm_counter(mm, MM_ANONPAGES);
1295 inc_mm_counter(mm, MM_SWAPENTS);
1296 } else if (IS_ENABLED(CONFIG_MIGRATION)) {
1297
1298
1299
1300
1301
1302 BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
1303 entry = make_migration_entry(page, pte_write(pteval));
1304 }
1305 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1306 BUG_ON(pte_file(*pte));
1307 } else if (IS_ENABLED(CONFIG_MIGRATION) &&
1308 (TTU_ACTION(flags) == TTU_MIGRATION)) {
1309
1310 swp_entry_t entry;
1311 entry = make_migration_entry(page, pte_write(pteval));
1312 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1313 } else
1314 dec_mm_counter(mm, MM_FILEPAGES);
1315
1316 page_remove_rmap(page);
1317 page_cache_release(page);
1318
1319out_unmap:
1320 pte_unmap_unlock(pte, ptl);
1321out:
1322 return ret;
1323
1324out_mlock:
1325 pte_unmap_unlock(pte, ptl);
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1337 if (vma->vm_flags & VM_LOCKED) {
1338 mlock_vma_page(page);
1339 ret = SWAP_MLOCK;
1340 }
1341 up_read(&vma->vm_mm->mmap_sem);
1342 }
1343 return ret;
1344}
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
1371#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
1372
1373static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1374 struct vm_area_struct *vma, struct page *check_page)
1375{
1376 struct mm_struct *mm = vma->vm_mm;
1377 pgd_t *pgd;
1378 pud_t *pud;
1379 pmd_t *pmd;
1380 pte_t *pte;
1381 pte_t pteval;
1382 spinlock_t *ptl;
1383 struct page *page;
1384 unsigned long address;
1385 unsigned long end;
1386 int ret = SWAP_AGAIN;
1387 int locked_vma = 0;
1388
1389 address = (vma->vm_start + cursor) & CLUSTER_MASK;
1390 end = address + CLUSTER_SIZE;
1391 if (address < vma->vm_start)
1392 address = vma->vm_start;
1393 if (end > vma->vm_end)
1394 end = vma->vm_end;
1395
1396 pgd = pgd_offset(mm, address);
1397 if (!pgd_present(*pgd))
1398 return ret;
1399
1400 pud = pud_offset(pgd, address);
1401 if (!pud_present(*pud))
1402 return ret;
1403
1404 pmd = pmd_offset(pud, address);
1405 if (!pmd_present(*pmd))
1406 return ret;
1407
1408
1409
1410
1411
1412 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1413 locked_vma = (vma->vm_flags & VM_LOCKED);
1414 if (!locked_vma)
1415 up_read(&vma->vm_mm->mmap_sem);
1416 }
1417
1418 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
1419
1420
1421 update_hiwater_rss(mm);
1422
1423 for (; address < end; pte++, address += PAGE_SIZE) {
1424 if (!pte_present(*pte))
1425 continue;
1426 page = vm_normal_page(vma, address, *pte);
1427 BUG_ON(!page || PageAnon(page));
1428
1429 if (locked_vma) {
1430 mlock_vma_page(page);
1431 if (page == check_page)
1432 ret = SWAP_MLOCK;
1433 continue;
1434 }
1435
1436 if (ptep_clear_flush_young_notify(vma, address, pte))
1437 continue;
1438
1439
1440 flush_cache_page(vma, address, pte_pfn(*pte));
1441 pteval = ptep_clear_flush_notify(vma, address, pte);
1442
1443
1444 if (page->index != linear_page_index(vma, address))
1445 set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
1446
1447
1448 if (pte_dirty(pteval))
1449 set_page_dirty(page);
1450
1451 page_remove_rmap(page);
1452 page_cache_release(page);
1453 dec_mm_counter(mm, MM_FILEPAGES);
1454 (*mapcount)--;
1455 }
1456 pte_unmap_unlock(pte - 1, ptl);
1457 if (locked_vma)
1458 up_read(&vma->vm_mm->mmap_sem);
1459 return ret;
1460}
1461
1462bool is_vma_temporary_stack(struct vm_area_struct *vma)
1463{
1464 int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
1465
1466 if (!maybe_stack)
1467 return false;
1468
1469 if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
1470 VM_STACK_INCOMPLETE_SETUP)
1471 return true;
1472
1473 return false;
1474}
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1493{
1494 struct anon_vma *anon_vma;
1495 struct anon_vma_chain *avc;
1496 int ret = SWAP_AGAIN;
1497
1498 anon_vma = page_lock_anon_vma(page);
1499 if (!anon_vma)
1500 return ret;
1501
1502 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1503 struct vm_area_struct *vma = avc->vma;
1504 unsigned long address;
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514 if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
1515 is_vma_temporary_stack(vma))
1516 continue;
1517
1518 address = vma_address(page, vma);
1519 if (address == -EFAULT)
1520 continue;
1521 ret = try_to_unmap_one(page, vma, address, flags);
1522 if (ret != SWAP_AGAIN || !page_mapped(page))
1523 break;
1524 }
1525
1526 page_unlock_anon_vma(anon_vma);
1527 return ret;
1528}
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1546{
1547 struct address_space *mapping = page->mapping;
1548 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1549 struct vm_area_struct *vma;
1550 struct prio_tree_iter iter;
1551 int ret = SWAP_AGAIN;
1552 unsigned long cursor;
1553 unsigned long max_nl_cursor = 0;
1554 unsigned long max_nl_size = 0;
1555 unsigned int mapcount;
1556
1557 mutex_lock(&mapping->i_mmap_mutex);
1558 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1559 unsigned long address = vma_address(page, vma);
1560 if (address == -EFAULT)
1561 continue;
1562 ret = try_to_unmap_one(page, vma, address, flags);
1563 if (ret != SWAP_AGAIN || !page_mapped(page))
1564 goto out;
1565 }
1566
1567 if (list_empty(&mapping->i_mmap_nonlinear))
1568 goto out;
1569
1570
1571
1572
1573
1574
1575 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1576 goto out;
1577
1578 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1579 shared.vm_set.list) {
1580 cursor = (unsigned long) vma->vm_private_data;
1581 if (cursor > max_nl_cursor)
1582 max_nl_cursor = cursor;
1583 cursor = vma->vm_end - vma->vm_start;
1584 if (cursor > max_nl_size)
1585 max_nl_size = cursor;
1586 }
1587
1588 if (max_nl_size == 0) {
1589 ret = SWAP_FAIL;
1590 goto out;
1591 }
1592
1593
1594
1595
1596
1597
1598
1599
1600 mapcount = page_mapcount(page);
1601 if (!mapcount)
1602 goto out;
1603 cond_resched();
1604
1605 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
1606 if (max_nl_cursor == 0)
1607 max_nl_cursor = CLUSTER_SIZE;
1608
1609 do {
1610 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1611 shared.vm_set.list) {
1612 cursor = (unsigned long) vma->vm_private_data;
1613 while ( cursor < max_nl_cursor &&
1614 cursor < vma->vm_end - vma->vm_start) {
1615 if (try_to_unmap_cluster(cursor, &mapcount,
1616 vma, page) == SWAP_MLOCK)
1617 ret = SWAP_MLOCK;
1618 cursor += CLUSTER_SIZE;
1619 vma->vm_private_data = (void *) cursor;
1620 if ((int)mapcount <= 0)
1621 goto out;
1622 }
1623 vma->vm_private_data = (void *) max_nl_cursor;
1624 }
1625 cond_resched();
1626 max_nl_cursor += CLUSTER_SIZE;
1627 } while (max_nl_cursor <= max_nl_size);
1628
1629
1630
1631
1632
1633
1634 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
1635 vma->vm_private_data = NULL;
1636out:
1637 mutex_unlock(&mapping->i_mmap_mutex);
1638 return ret;
1639}
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655int try_to_unmap(struct page *page, enum ttu_flags flags)
1656{
1657 int ret;
1658
1659 BUG_ON(!PageLocked(page));
1660 VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
1661
1662 if (unlikely(PageKsm(page)))
1663 ret = try_to_unmap_ksm(page, flags);
1664 else if (PageAnon(page))
1665 ret = try_to_unmap_anon(page, flags);
1666 else
1667 ret = try_to_unmap_file(page, flags);
1668 if (ret != SWAP_MLOCK && !page_mapped(page))
1669 ret = SWAP_SUCCESS;
1670 return ret;
1671}
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688int try_to_munlock(struct page *page)
1689{
1690 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1691
1692 if (unlikely(PageKsm(page)))
1693 return try_to_unmap_ksm(page, TTU_MUNLOCK);
1694 else if (PageAnon(page))
1695 return try_to_unmap_anon(page, TTU_MUNLOCK);
1696 else
1697 return try_to_unmap_file(page, TTU_MUNLOCK);
1698}
1699
1700void __put_anon_vma(struct anon_vma *anon_vma)
1701{
1702 struct anon_vma *root = anon_vma->root;
1703
1704 if (root != anon_vma && atomic_dec_and_test(&root->refcount))
1705 anon_vma_free(root);
1706
1707 anon_vma_free(anon_vma);
1708}
1709
1710#ifdef CONFIG_MIGRATION
1711
1712
1713
1714
1715static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1716 struct vm_area_struct *, unsigned long, void *), void *arg)
1717{
1718 struct anon_vma *anon_vma;
1719 struct anon_vma_chain *avc;
1720 int ret = SWAP_AGAIN;
1721
1722
1723
1724
1725
1726
1727
1728 anon_vma = page_anon_vma(page);
1729 if (!anon_vma)
1730 return ret;
1731 anon_vma_lock(anon_vma);
1732 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1733 struct vm_area_struct *vma = avc->vma;
1734 unsigned long address = vma_address(page, vma);
1735 if (address == -EFAULT)
1736 continue;
1737 ret = rmap_one(page, vma, address, arg);
1738 if (ret != SWAP_AGAIN)
1739 break;
1740 }
1741 anon_vma_unlock(anon_vma);
1742 return ret;
1743}
1744
1745static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1746 struct vm_area_struct *, unsigned long, void *), void *arg)
1747{
1748 struct address_space *mapping = page->mapping;
1749 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1750 struct vm_area_struct *vma;
1751 struct prio_tree_iter iter;
1752 int ret = SWAP_AGAIN;
1753
1754 if (!mapping)
1755 return ret;
1756 mutex_lock(&mapping->i_mmap_mutex);
1757 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1758 unsigned long address = vma_address(page, vma);
1759 if (address == -EFAULT)
1760 continue;
1761 ret = rmap_one(page, vma, address, arg);
1762 if (ret != SWAP_AGAIN)
1763 break;
1764 }
1765
1766
1767
1768
1769
1770 mutex_unlock(&mapping->i_mmap_mutex);
1771 return ret;
1772}
1773
1774int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
1775 struct vm_area_struct *, unsigned long, void *), void *arg)
1776{
1777 VM_BUG_ON(!PageLocked(page));
1778
1779 if (unlikely(PageKsm(page)))
1780 return rmap_walk_ksm(page, rmap_one, arg);
1781 else if (PageAnon(page))
1782 return rmap_walk_anon(page, rmap_one, arg);
1783 else
1784 return rmap_walk_file(page, rmap_one, arg);
1785}
1786#endif
1787
1788#ifdef CONFIG_HUGETLB_PAGE
1789
1790
1791
1792
1793
1794static void __hugepage_set_anon_rmap(struct page *page,
1795 struct vm_area_struct *vma, unsigned long address, int exclusive)
1796{
1797 struct anon_vma *anon_vma = vma->anon_vma;
1798
1799 BUG_ON(!anon_vma);
1800
1801 if (PageAnon(page))
1802 return;
1803 if (!exclusive)
1804 anon_vma = anon_vma->root;
1805
1806 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1807 page->mapping = (struct address_space *) anon_vma;
1808 page->index = linear_page_index(vma, address);
1809}
1810
1811void hugepage_add_anon_rmap(struct page *page,
1812 struct vm_area_struct *vma, unsigned long address)
1813{
1814 struct anon_vma *anon_vma = vma->anon_vma;
1815 int first;
1816
1817 BUG_ON(!PageLocked(page));
1818 BUG_ON(!anon_vma);
1819
1820 first = atomic_inc_and_test(&page->_mapcount);
1821 if (first)
1822 __hugepage_set_anon_rmap(page, vma, address, 0);
1823}
1824
1825void hugepage_add_new_anon_rmap(struct page *page,
1826 struct vm_area_struct *vma, unsigned long address)
1827{
1828 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1829 atomic_set(&page->_mapcount, 0);
1830 __hugepage_set_anon_rmap(page, vma, address, 1);
1831}
1832#endif
1833