1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45#include <linux/mm.h>
46#include <linux/pagemap.h>
47#include <linux/swap.h>
48#include <linux/swapops.h>
49#include <linux/slab.h>
50#include <linux/init.h>
51#include <linux/ksm.h>
52#include <linux/rmap.h>
53#include <linux/rcupdate.h>
54#include <linux/export.h>
55#include <linux/memcontrol.h>
56#include <linux/mmu_notifier.h>
57#include <linux/migrate.h>
58#include <linux/hugetlb.h>
59#include <linux/backing-dev.h>
60
61#include <asm/tlbflush.h>
62
63#include "internal.h"
64
65static struct kmem_cache *anon_vma_cachep;
66static struct kmem_cache *anon_vma_chain_cachep;
67
68static inline struct anon_vma *anon_vma_alloc(void)
69{
70 struct anon_vma *anon_vma;
71
72 anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
73 if (anon_vma) {
74 atomic_set(&anon_vma->refcount, 1);
75
76
77
78
79 anon_vma->root = anon_vma;
80 }
81
82 return anon_vma;
83}
84
85static inline void anon_vma_free(struct anon_vma *anon_vma)
86{
87 VM_BUG_ON(atomic_read(&anon_vma->refcount));
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106 if (mutex_is_locked(&anon_vma->root->mutex)) {
107 anon_vma_lock(anon_vma);
108 anon_vma_unlock(anon_vma);
109 }
110
111 kmem_cache_free(anon_vma_cachep, anon_vma);
112}
113
114static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
115{
116 return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
117}
118
119static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
120{
121 kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
122}
123
124static void anon_vma_chain_link(struct vm_area_struct *vma,
125 struct anon_vma_chain *avc,
126 struct anon_vma *anon_vma)
127{
128 avc->vma = vma;
129 avc->anon_vma = anon_vma;
130 list_add(&avc->same_vma, &vma->anon_vma_chain);
131 anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
132}
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161int anon_vma_prepare(struct vm_area_struct *vma)
162{
163 struct anon_vma *anon_vma = vma->anon_vma;
164 struct anon_vma_chain *avc;
165
166 might_sleep();
167 if (unlikely(!anon_vma)) {
168 struct mm_struct *mm = vma->vm_mm;
169 struct anon_vma *allocated;
170
171 avc = anon_vma_chain_alloc(GFP_KERNEL);
172 if (!avc)
173 goto out_enomem;
174
175 anon_vma = find_mergeable_anon_vma(vma);
176 allocated = NULL;
177 if (!anon_vma) {
178 anon_vma = anon_vma_alloc();
179 if (unlikely(!anon_vma))
180 goto out_enomem_free_avc;
181 allocated = anon_vma;
182 }
183
184 anon_vma_lock(anon_vma);
185
186 spin_lock(&mm->page_table_lock);
187 if (likely(!vma->anon_vma)) {
188 vma->anon_vma = anon_vma;
189 anon_vma_chain_link(vma, avc, anon_vma);
190 allocated = NULL;
191 avc = NULL;
192 }
193 spin_unlock(&mm->page_table_lock);
194 anon_vma_unlock(anon_vma);
195
196 if (unlikely(allocated))
197 put_anon_vma(allocated);
198 if (unlikely(avc))
199 anon_vma_chain_free(avc);
200 }
201 return 0;
202
203 out_enomem_free_avc:
204 anon_vma_chain_free(avc);
205 out_enomem:
206 return -ENOMEM;
207}
208
209
210
211
212
213
214
215
216
217static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
218{
219 struct anon_vma *new_root = anon_vma->root;
220 if (new_root != root) {
221 if (WARN_ON_ONCE(root))
222 mutex_unlock(&root->mutex);
223 root = new_root;
224 mutex_lock(&root->mutex);
225 }
226 return root;
227}
228
229static inline void unlock_anon_vma_root(struct anon_vma *root)
230{
231 if (root)
232 mutex_unlock(&root->mutex);
233}
234
235
236
237
238
239int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
240{
241 struct anon_vma_chain *avc, *pavc;
242 struct anon_vma *root = NULL;
243
244 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
245 struct anon_vma *anon_vma;
246
247 avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
248 if (unlikely(!avc)) {
249 unlock_anon_vma_root(root);
250 root = NULL;
251 avc = anon_vma_chain_alloc(GFP_KERNEL);
252 if (!avc)
253 goto enomem_failure;
254 }
255 anon_vma = pavc->anon_vma;
256 root = lock_anon_vma_root(root, anon_vma);
257 anon_vma_chain_link(dst, avc, anon_vma);
258 }
259 unlock_anon_vma_root(root);
260 return 0;
261
262 enomem_failure:
263 unlink_anon_vmas(dst);
264 return -ENOMEM;
265}
266
267
268
269
270
271
272int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
273{
274 struct anon_vma_chain *avc;
275 struct anon_vma *anon_vma;
276
277
278 if (!pvma->anon_vma)
279 return 0;
280
281
282
283
284
285 if (anon_vma_clone(vma, pvma))
286 return -ENOMEM;
287
288
289 anon_vma = anon_vma_alloc();
290 if (!anon_vma)
291 goto out_error;
292 avc = anon_vma_chain_alloc(GFP_KERNEL);
293 if (!avc)
294 goto out_error_free_anon_vma;
295
296
297
298
299
300 anon_vma->root = pvma->anon_vma->root;
301
302
303
304
305
306 get_anon_vma(anon_vma->root);
307
308 vma->anon_vma = anon_vma;
309 anon_vma_lock(anon_vma);
310 anon_vma_chain_link(vma, avc, anon_vma);
311 anon_vma_unlock(anon_vma);
312
313 return 0;
314
315 out_error_free_anon_vma:
316 put_anon_vma(anon_vma);
317 out_error:
318 unlink_anon_vmas(vma);
319 return -ENOMEM;
320}
321
322void unlink_anon_vmas(struct vm_area_struct *vma)
323{
324 struct anon_vma_chain *avc, *next;
325 struct anon_vma *root = NULL;
326
327
328
329
330
331 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
332 struct anon_vma *anon_vma = avc->anon_vma;
333
334 root = lock_anon_vma_root(root, anon_vma);
335 anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
336
337
338
339
340
341 if (RB_EMPTY_ROOT(&anon_vma->rb_root))
342 continue;
343
344 list_del(&avc->same_vma);
345 anon_vma_chain_free(avc);
346 }
347 unlock_anon_vma_root(root);
348
349
350
351
352
353
354 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
355 struct anon_vma *anon_vma = avc->anon_vma;
356
357 put_anon_vma(anon_vma);
358
359 list_del(&avc->same_vma);
360 anon_vma_chain_free(avc);
361 }
362}
363
364static void anon_vma_ctor(void *data)
365{
366 struct anon_vma *anon_vma = data;
367
368 mutex_init(&anon_vma->mutex);
369 atomic_set(&anon_vma->refcount, 0);
370 anon_vma->rb_root = RB_ROOT;
371}
372
373void __init anon_vma_init(void)
374{
375 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
376 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
377 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
378}
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403struct anon_vma *page_get_anon_vma(struct page *page)
404{
405 struct anon_vma *anon_vma = NULL;
406 unsigned long anon_mapping;
407
408 rcu_read_lock();
409 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
410 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
411 goto out;
412 if (!page_mapped(page))
413 goto out;
414
415 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
416 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
417 anon_vma = NULL;
418 goto out;
419 }
420
421
422
423
424
425
426
427
428 if (!page_mapped(page)) {
429 put_anon_vma(anon_vma);
430 anon_vma = NULL;
431 }
432out:
433 rcu_read_unlock();
434
435 return anon_vma;
436}
437
438
439
440
441
442
443
444
445struct anon_vma *page_lock_anon_vma(struct page *page)
446{
447 struct anon_vma *anon_vma = NULL;
448 struct anon_vma *root_anon_vma;
449 unsigned long anon_mapping;
450
451 rcu_read_lock();
452 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
453 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
454 goto out;
455 if (!page_mapped(page))
456 goto out;
457
458 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
459 root_anon_vma = ACCESS_ONCE(anon_vma->root);
460 if (mutex_trylock(&root_anon_vma->mutex)) {
461
462
463
464
465
466 if (!page_mapped(page)) {
467 mutex_unlock(&root_anon_vma->mutex);
468 anon_vma = NULL;
469 }
470 goto out;
471 }
472
473
474 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
475 anon_vma = NULL;
476 goto out;
477 }
478
479 if (!page_mapped(page)) {
480 put_anon_vma(anon_vma);
481 anon_vma = NULL;
482 goto out;
483 }
484
485
486 rcu_read_unlock();
487 anon_vma_lock(anon_vma);
488
489 if (atomic_dec_and_test(&anon_vma->refcount)) {
490
491
492
493
494
495 anon_vma_unlock(anon_vma);
496 __put_anon_vma(anon_vma);
497 anon_vma = NULL;
498 }
499
500 return anon_vma;
501
502out:
503 rcu_read_unlock();
504 return anon_vma;
505}
506
507void page_unlock_anon_vma(struct anon_vma *anon_vma)
508{
509 anon_vma_unlock(anon_vma);
510}
511
512
513
514
515static inline unsigned long
516__vma_address(struct page *page, struct vm_area_struct *vma)
517{
518 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
519
520 if (unlikely(is_vm_hugetlb_page(vma)))
521 pgoff = page->index << huge_page_order(page_hstate(page));
522
523 return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
524}
525
526inline unsigned long
527vma_address(struct page *page, struct vm_area_struct *vma)
528{
529 unsigned long address = __vma_address(page, vma);
530
531
532 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
533
534 return address;
535}
536
537
538
539
540
541unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
542{
543 unsigned long address;
544 if (PageAnon(page)) {
545 struct anon_vma *page__anon_vma = page_anon_vma(page);
546
547
548
549
550 if (!vma->anon_vma || !page__anon_vma ||
551 vma->anon_vma->root != page__anon_vma->root)
552 return -EFAULT;
553 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
554 if (!vma->vm_file ||
555 vma->vm_file->f_mapping != page->mapping)
556 return -EFAULT;
557 } else
558 return -EFAULT;
559 address = __vma_address(page, vma);
560 if (unlikely(address < vma->vm_start || address >= vma->vm_end))
561 return -EFAULT;
562 return address;
563}
564
565
566
567
568
569
570
571
572
573
574pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
575 unsigned long address, spinlock_t **ptlp, int sync)
576{
577 pgd_t *pgd;
578 pud_t *pud;
579 pmd_t *pmd;
580 pte_t *pte;
581 spinlock_t *ptl;
582
583 if (unlikely(PageHuge(page))) {
584 pte = huge_pte_offset(mm, address);
585 ptl = &mm->page_table_lock;
586 goto check;
587 }
588
589 pgd = pgd_offset(mm, address);
590 if (!pgd_present(*pgd))
591 return NULL;
592
593 pud = pud_offset(pgd, address);
594 if (!pud_present(*pud))
595 return NULL;
596
597 pmd = pmd_offset(pud, address);
598 if (!pmd_present(*pmd))
599 return NULL;
600 if (pmd_trans_huge(*pmd))
601 return NULL;
602
603 pte = pte_offset_map(pmd, address);
604
605 if (!sync && !pte_present(*pte)) {
606 pte_unmap(pte);
607 return NULL;
608 }
609
610 ptl = pte_lockptr(mm, pmd);
611check:
612 spin_lock(ptl);
613 if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
614 *ptlp = ptl;
615 return pte;
616 }
617 pte_unmap_unlock(pte, ptl);
618 return NULL;
619}
620
621
622
623
624
625
626
627
628
629
630int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
631{
632 unsigned long address;
633 pte_t *pte;
634 spinlock_t *ptl;
635
636 address = __vma_address(page, vma);
637 if (unlikely(address < vma->vm_start || address >= vma->vm_end))
638 return 0;
639 pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
640 if (!pte)
641 return 0;
642 pte_unmap_unlock(pte, ptl);
643
644 return 1;
645}
646
647
648
649
650
651int page_referenced_one(struct page *page, struct vm_area_struct *vma,
652 unsigned long address, unsigned int *mapcount,
653 unsigned long *vm_flags)
654{
655 struct mm_struct *mm = vma->vm_mm;
656 int referenced = 0;
657
658 if (unlikely(PageTransHuge(page))) {
659 pmd_t *pmd;
660
661 spin_lock(&mm->page_table_lock);
662
663
664
665
666 pmd = page_check_address_pmd(page, mm, address,
667 PAGE_CHECK_ADDRESS_PMD_FLAG);
668 if (!pmd) {
669 spin_unlock(&mm->page_table_lock);
670 goto out;
671 }
672
673 if (vma->vm_flags & VM_LOCKED) {
674 spin_unlock(&mm->page_table_lock);
675 *mapcount = 0;
676 *vm_flags |= VM_LOCKED;
677 goto out;
678 }
679
680
681 if (pmdp_clear_flush_young_notify(vma, address, pmd))
682 referenced++;
683 spin_unlock(&mm->page_table_lock);
684 } else {
685 pte_t *pte;
686 spinlock_t *ptl;
687
688
689
690
691
692 pte = page_check_address(page, mm, address, &ptl, 0);
693 if (!pte)
694 goto out;
695
696 if (vma->vm_flags & VM_LOCKED) {
697 pte_unmap_unlock(pte, ptl);
698 *mapcount = 0;
699 *vm_flags |= VM_LOCKED;
700 goto out;
701 }
702
703 if (ptep_clear_flush_young_notify(vma, address, pte)) {
704
705
706
707
708
709
710
711 if (likely(!VM_SequentialReadHint(vma)))
712 referenced++;
713 }
714 pte_unmap_unlock(pte, ptl);
715 }
716
717 (*mapcount)--;
718
719 if (referenced)
720 *vm_flags |= vma->vm_flags;
721out:
722 return referenced;
723}
724
725static int page_referenced_anon(struct page *page,
726 struct mem_cgroup *memcg,
727 unsigned long *vm_flags)
728{
729 unsigned int mapcount;
730 struct anon_vma *anon_vma;
731 pgoff_t pgoff;
732 struct anon_vma_chain *avc;
733 int referenced = 0;
734
735 anon_vma = page_lock_anon_vma(page);
736 if (!anon_vma)
737 return referenced;
738
739 mapcount = page_mapcount(page);
740 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
741 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
742 struct vm_area_struct *vma = avc->vma;
743 unsigned long address = vma_address(page, vma);
744
745
746
747
748
749 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
750 continue;
751 referenced += page_referenced_one(page, vma, address,
752 &mapcount, vm_flags);
753 if (!mapcount)
754 break;
755 }
756
757 page_unlock_anon_vma(anon_vma);
758 return referenced;
759}
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774static int page_referenced_file(struct page *page,
775 struct mem_cgroup *memcg,
776 unsigned long *vm_flags)
777{
778 unsigned int mapcount;
779 struct address_space *mapping = page->mapping;
780 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
781 struct vm_area_struct *vma;
782 int referenced = 0;
783
784
785
786
787
788
789 BUG_ON(PageAnon(page));
790
791
792
793
794
795
796
797 BUG_ON(!PageLocked(page));
798
799 mutex_lock(&mapping->i_mmap_mutex);
800
801
802
803
804
805 mapcount = page_mapcount(page);
806
807 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
808 unsigned long address = vma_address(page, vma);
809
810
811
812
813
814 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
815 continue;
816 referenced += page_referenced_one(page, vma, address,
817 &mapcount, vm_flags);
818 if (!mapcount)
819 break;
820 }
821
822 mutex_unlock(&mapping->i_mmap_mutex);
823 return referenced;
824}
825
826
827
828
829
830
831
832
833
834
835
836int page_referenced(struct page *page,
837 int is_locked,
838 struct mem_cgroup *memcg,
839 unsigned long *vm_flags)
840{
841 int referenced = 0;
842 int we_locked = 0;
843
844 *vm_flags = 0;
845 if (page_mapped(page) && page_rmapping(page)) {
846 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
847 we_locked = trylock_page(page);
848 if (!we_locked) {
849 referenced++;
850 goto out;
851 }
852 }
853 if (unlikely(PageKsm(page)))
854 referenced += page_referenced_ksm(page, memcg,
855 vm_flags);
856 else if (PageAnon(page))
857 referenced += page_referenced_anon(page, memcg,
858 vm_flags);
859 else if (page->mapping)
860 referenced += page_referenced_file(page, memcg,
861 vm_flags);
862 if (we_locked)
863 unlock_page(page);
864
865 if (page_test_and_clear_young(page_to_pfn(page)))
866 referenced++;
867 }
868out:
869 return referenced;
870}
871
872static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
873 unsigned long address)
874{
875 struct mm_struct *mm = vma->vm_mm;
876 pte_t *pte;
877 spinlock_t *ptl;
878 int ret = 0;
879
880 pte = page_check_address(page, mm, address, &ptl, 1);
881 if (!pte)
882 goto out;
883
884 if (pte_dirty(*pte) || pte_write(*pte)) {
885 pte_t entry;
886
887 flush_cache_page(vma, address, pte_pfn(*pte));
888 entry = ptep_clear_flush(vma, address, pte);
889 entry = pte_wrprotect(entry);
890 entry = pte_mkclean(entry);
891 set_pte_at(mm, address, pte, entry);
892 ret = 1;
893 }
894
895 pte_unmap_unlock(pte, ptl);
896
897 if (ret)
898 mmu_notifier_invalidate_page(mm, address);
899out:
900 return ret;
901}
902
903static int page_mkclean_file(struct address_space *mapping, struct page *page)
904{
905 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
906 struct vm_area_struct *vma;
907 int ret = 0;
908
909 BUG_ON(PageAnon(page));
910
911 mutex_lock(&mapping->i_mmap_mutex);
912 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
913 if (vma->vm_flags & VM_SHARED) {
914 unsigned long address = vma_address(page, vma);
915 ret += page_mkclean_one(page, vma, address);
916 }
917 }
918 mutex_unlock(&mapping->i_mmap_mutex);
919 return ret;
920}
921
922int page_mkclean(struct page *page)
923{
924 int ret = 0;
925
926 BUG_ON(!PageLocked(page));
927
928 if (page_mapped(page)) {
929 struct address_space *mapping = page_mapping(page);
930 if (mapping)
931 ret = page_mkclean_file(mapping, page);
932 }
933
934 return ret;
935}
936EXPORT_SYMBOL_GPL(page_mkclean);
937
938
939
940
941
942
943
944
945
946
947
948
949void page_move_anon_rmap(struct page *page,
950 struct vm_area_struct *vma, unsigned long address)
951{
952 struct anon_vma *anon_vma = vma->anon_vma;
953
954 VM_BUG_ON(!PageLocked(page));
955 VM_BUG_ON(!anon_vma);
956 VM_BUG_ON(page->index != linear_page_index(vma, address));
957
958 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
959 page->mapping = (struct address_space *) anon_vma;
960}
961
962
963
964
965
966
967
968
969static void __page_set_anon_rmap(struct page *page,
970 struct vm_area_struct *vma, unsigned long address, int exclusive)
971{
972 struct anon_vma *anon_vma = vma->anon_vma;
973
974 BUG_ON(!anon_vma);
975
976 if (PageAnon(page))
977 return;
978
979
980
981
982
983
984 if (!exclusive)
985 anon_vma = anon_vma->root;
986
987 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
988 page->mapping = (struct address_space *) anon_vma;
989 page->index = linear_page_index(vma, address);
990}
991
992
993
994
995
996
997
998static void __page_check_anon_rmap(struct page *page,
999 struct vm_area_struct *vma, unsigned long address)
1000{
1001#ifdef CONFIG_DEBUG_VM
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014 BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
1015 BUG_ON(page->index != linear_page_index(vma, address));
1016#endif
1017}
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030void page_add_anon_rmap(struct page *page,
1031 struct vm_area_struct *vma, unsigned long address)
1032{
1033 do_page_add_anon_rmap(page, vma, address, 0);
1034}
1035
1036
1037
1038
1039
1040
1041void do_page_add_anon_rmap(struct page *page,
1042 struct vm_area_struct *vma, unsigned long address, int exclusive)
1043{
1044 int first = atomic_inc_and_test(&page->_mapcount);
1045 if (first) {
1046 if (!PageTransHuge(page))
1047 __inc_zone_page_state(page, NR_ANON_PAGES);
1048 else
1049 __inc_zone_page_state(page,
1050 NR_ANON_TRANSPARENT_HUGEPAGES);
1051 }
1052 if (unlikely(PageKsm(page)))
1053 return;
1054
1055 VM_BUG_ON(!PageLocked(page));
1056
1057 if (first)
1058 __page_set_anon_rmap(page, vma, address, exclusive);
1059 else
1060 __page_check_anon_rmap(page, vma, address);
1061}
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073void page_add_new_anon_rmap(struct page *page,
1074 struct vm_area_struct *vma, unsigned long address)
1075{
1076 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1077 SetPageSwapBacked(page);
1078 atomic_set(&page->_mapcount, 0);
1079 if (!PageTransHuge(page))
1080 __inc_zone_page_state(page, NR_ANON_PAGES);
1081 else
1082 __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1083 __page_set_anon_rmap(page, vma, address, 1);
1084 if (!mlocked_vma_newpage(vma, page))
1085 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
1086 else
1087 add_page_to_unevictable_list(page);
1088}
1089
1090
1091
1092
1093
1094
1095
1096void page_add_file_rmap(struct page *page)
1097{
1098 bool locked;
1099 unsigned long flags;
1100
1101 mem_cgroup_begin_update_page_stat(page, &locked, &flags);
1102 if (atomic_inc_and_test(&page->_mapcount)) {
1103 __inc_zone_page_state(page, NR_FILE_MAPPED);
1104 mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
1105 }
1106 mem_cgroup_end_update_page_stat(page, &locked, &flags);
1107}
1108
1109
1110
1111
1112
1113
1114
1115void page_remove_rmap(struct page *page)
1116{
1117 struct address_space *mapping = page_mapping(page);
1118 bool anon = PageAnon(page);
1119 bool locked;
1120 unsigned long flags;
1121
1122
1123
1124
1125
1126
1127 if (!anon)
1128 mem_cgroup_begin_update_page_stat(page, &locked, &flags);
1129
1130
1131 if (!atomic_add_negative(-1, &page->_mapcount))
1132 goto out;
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152 if (mapping && !mapping_cap_account_dirty(mapping) &&
1153 page_test_and_clear_dirty(page_to_pfn(page), 1))
1154 set_page_dirty(page);
1155
1156
1157
1158
1159 if (unlikely(PageHuge(page)))
1160 goto out;
1161 if (anon) {
1162 mem_cgroup_uncharge_page(page);
1163 if (!PageTransHuge(page))
1164 __dec_zone_page_state(page, NR_ANON_PAGES);
1165 else
1166 __dec_zone_page_state(page,
1167 NR_ANON_TRANSPARENT_HUGEPAGES);
1168 } else {
1169 __dec_zone_page_state(page, NR_FILE_MAPPED);
1170 mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
1171 mem_cgroup_end_update_page_stat(page, &locked, &flags);
1172 }
1173 if (unlikely(PageMlocked(page)))
1174 clear_page_mlock(page);
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184 return;
1185out:
1186 if (!anon)
1187 mem_cgroup_end_update_page_stat(page, &locked, &flags);
1188}
1189
1190
1191
1192
1193
1194int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1195 unsigned long address, enum ttu_flags flags)
1196{
1197 struct mm_struct *mm = vma->vm_mm;
1198 pte_t *pte;
1199 pte_t pteval;
1200 spinlock_t *ptl;
1201 int ret = SWAP_AGAIN;
1202
1203 pte = page_check_address(page, mm, address, &ptl, 0);
1204 if (!pte)
1205 goto out;
1206
1207
1208
1209
1210
1211
1212 if (!(flags & TTU_IGNORE_MLOCK)) {
1213 if (vma->vm_flags & VM_LOCKED)
1214 goto out_mlock;
1215
1216 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1217 goto out_unmap;
1218 }
1219 if (!(flags & TTU_IGNORE_ACCESS)) {
1220 if (ptep_clear_flush_young_notify(vma, address, pte)) {
1221 ret = SWAP_FAIL;
1222 goto out_unmap;
1223 }
1224 }
1225
1226
1227 flush_cache_page(vma, address, page_to_pfn(page));
1228 pteval = ptep_clear_flush(vma, address, pte);
1229
1230
1231 if (pte_dirty(pteval))
1232 set_page_dirty(page);
1233
1234
1235 update_hiwater_rss(mm);
1236
1237 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
1238 if (PageAnon(page))
1239 dec_mm_counter(mm, MM_ANONPAGES);
1240 else
1241 dec_mm_counter(mm, MM_FILEPAGES);
1242 set_pte_at(mm, address, pte,
1243 swp_entry_to_pte(make_hwpoison_entry(page)));
1244 } else if (PageAnon(page)) {
1245 swp_entry_t entry = { .val = page_private(page) };
1246
1247 if (PageSwapCache(page)) {
1248
1249
1250
1251
1252 if (swap_duplicate(entry) < 0) {
1253 set_pte_at(mm, address, pte, pteval);
1254 ret = SWAP_FAIL;
1255 goto out_unmap;
1256 }
1257 if (list_empty(&mm->mmlist)) {
1258 spin_lock(&mmlist_lock);
1259 if (list_empty(&mm->mmlist))
1260 list_add(&mm->mmlist, &init_mm.mmlist);
1261 spin_unlock(&mmlist_lock);
1262 }
1263 dec_mm_counter(mm, MM_ANONPAGES);
1264 inc_mm_counter(mm, MM_SWAPENTS);
1265 } else if (IS_ENABLED(CONFIG_MIGRATION)) {
1266
1267
1268
1269
1270
1271 BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
1272 entry = make_migration_entry(page, pte_write(pteval));
1273 }
1274 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1275 BUG_ON(pte_file(*pte));
1276 } else if (IS_ENABLED(CONFIG_MIGRATION) &&
1277 (TTU_ACTION(flags) == TTU_MIGRATION)) {
1278
1279 swp_entry_t entry;
1280 entry = make_migration_entry(page, pte_write(pteval));
1281 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1282 } else
1283 dec_mm_counter(mm, MM_FILEPAGES);
1284
1285 page_remove_rmap(page);
1286 page_cache_release(page);
1287
1288out_unmap:
1289 pte_unmap_unlock(pte, ptl);
1290 if (ret != SWAP_FAIL)
1291 mmu_notifier_invalidate_page(mm, address);
1292out:
1293 return ret;
1294
1295out_mlock:
1296 pte_unmap_unlock(pte, ptl);
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1308 if (vma->vm_flags & VM_LOCKED) {
1309 mlock_vma_page(page);
1310 ret = SWAP_MLOCK;
1311 }
1312 up_read(&vma->vm_mm->mmap_sem);
1313 }
1314 return ret;
1315}
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
1342#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
1343
1344static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1345 struct vm_area_struct *vma, struct page *check_page)
1346{
1347 struct mm_struct *mm = vma->vm_mm;
1348 pgd_t *pgd;
1349 pud_t *pud;
1350 pmd_t *pmd;
1351 pte_t *pte;
1352 pte_t pteval;
1353 spinlock_t *ptl;
1354 struct page *page;
1355 unsigned long address;
1356 unsigned long mmun_start;
1357 unsigned long mmun_end;
1358 unsigned long end;
1359 int ret = SWAP_AGAIN;
1360 int locked_vma = 0;
1361
1362 address = (vma->vm_start + cursor) & CLUSTER_MASK;
1363 end = address + CLUSTER_SIZE;
1364 if (address < vma->vm_start)
1365 address = vma->vm_start;
1366 if (end > vma->vm_end)
1367 end = vma->vm_end;
1368
1369 pgd = pgd_offset(mm, address);
1370 if (!pgd_present(*pgd))
1371 return ret;
1372
1373 pud = pud_offset(pgd, address);
1374 if (!pud_present(*pud))
1375 return ret;
1376
1377 pmd = pmd_offset(pud, address);
1378 if (!pmd_present(*pmd))
1379 return ret;
1380
1381 mmun_start = address;
1382 mmun_end = end;
1383 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
1384
1385
1386
1387
1388
1389 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1390 locked_vma = (vma->vm_flags & VM_LOCKED);
1391 if (!locked_vma)
1392 up_read(&vma->vm_mm->mmap_sem);
1393 }
1394
1395 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
1396
1397
1398 update_hiwater_rss(mm);
1399
1400 for (; address < end; pte++, address += PAGE_SIZE) {
1401 if (!pte_present(*pte))
1402 continue;
1403 page = vm_normal_page(vma, address, *pte);
1404 BUG_ON(!page || PageAnon(page));
1405
1406 if (locked_vma) {
1407 mlock_vma_page(page);
1408 if (page == check_page)
1409 ret = SWAP_MLOCK;
1410 continue;
1411 }
1412
1413 if (ptep_clear_flush_young_notify(vma, address, pte))
1414 continue;
1415
1416
1417 flush_cache_page(vma, address, pte_pfn(*pte));
1418 pteval = ptep_clear_flush(vma, address, pte);
1419
1420
1421 if (page->index != linear_page_index(vma, address))
1422 set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
1423
1424
1425 if (pte_dirty(pteval))
1426 set_page_dirty(page);
1427
1428 page_remove_rmap(page);
1429 page_cache_release(page);
1430 dec_mm_counter(mm, MM_FILEPAGES);
1431 (*mapcount)--;
1432 }
1433 pte_unmap_unlock(pte - 1, ptl);
1434 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1435 if (locked_vma)
1436 up_read(&vma->vm_mm->mmap_sem);
1437 return ret;
1438}
1439
1440bool is_vma_temporary_stack(struct vm_area_struct *vma)
1441{
1442 int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
1443
1444 if (!maybe_stack)
1445 return false;
1446
1447 if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
1448 VM_STACK_INCOMPLETE_SETUP)
1449 return true;
1450
1451 return false;
1452}
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1471{
1472 struct anon_vma *anon_vma;
1473 pgoff_t pgoff;
1474 struct anon_vma_chain *avc;
1475 int ret = SWAP_AGAIN;
1476
1477 anon_vma = page_lock_anon_vma(page);
1478 if (!anon_vma)
1479 return ret;
1480
1481 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1482 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
1483 struct vm_area_struct *vma = avc->vma;
1484 unsigned long address;
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494 if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
1495 is_vma_temporary_stack(vma))
1496 continue;
1497
1498 address = vma_address(page, vma);
1499 ret = try_to_unmap_one(page, vma, address, flags);
1500 if (ret != SWAP_AGAIN || !page_mapped(page))
1501 break;
1502 }
1503
1504 page_unlock_anon_vma(anon_vma);
1505 return ret;
1506}
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1524{
1525 struct address_space *mapping = page->mapping;
1526 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1527 struct vm_area_struct *vma;
1528 int ret = SWAP_AGAIN;
1529 unsigned long cursor;
1530 unsigned long max_nl_cursor = 0;
1531 unsigned long max_nl_size = 0;
1532 unsigned int mapcount;
1533
1534 mutex_lock(&mapping->i_mmap_mutex);
1535 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
1536 unsigned long address = vma_address(page, vma);
1537 ret = try_to_unmap_one(page, vma, address, flags);
1538 if (ret != SWAP_AGAIN || !page_mapped(page))
1539 goto out;
1540 }
1541
1542 if (list_empty(&mapping->i_mmap_nonlinear))
1543 goto out;
1544
1545
1546
1547
1548
1549
1550 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1551 goto out;
1552
1553 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1554 shared.nonlinear) {
1555 cursor = (unsigned long) vma->vm_private_data;
1556 if (cursor > max_nl_cursor)
1557 max_nl_cursor = cursor;
1558 cursor = vma->vm_end - vma->vm_start;
1559 if (cursor > max_nl_size)
1560 max_nl_size = cursor;
1561 }
1562
1563 if (max_nl_size == 0) {
1564 ret = SWAP_FAIL;
1565 goto out;
1566 }
1567
1568
1569
1570
1571
1572
1573
1574
1575 mapcount = page_mapcount(page);
1576 if (!mapcount)
1577 goto out;
1578 cond_resched();
1579
1580 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
1581 if (max_nl_cursor == 0)
1582 max_nl_cursor = CLUSTER_SIZE;
1583
1584 do {
1585 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1586 shared.nonlinear) {
1587 cursor = (unsigned long) vma->vm_private_data;
1588 while ( cursor < max_nl_cursor &&
1589 cursor < vma->vm_end - vma->vm_start) {
1590 if (try_to_unmap_cluster(cursor, &mapcount,
1591 vma, page) == SWAP_MLOCK)
1592 ret = SWAP_MLOCK;
1593 cursor += CLUSTER_SIZE;
1594 vma->vm_private_data = (void *) cursor;
1595 if ((int)mapcount <= 0)
1596 goto out;
1597 }
1598 vma->vm_private_data = (void *) max_nl_cursor;
1599 }
1600 cond_resched();
1601 max_nl_cursor += CLUSTER_SIZE;
1602 } while (max_nl_cursor <= max_nl_size);
1603
1604
1605
1606
1607
1608
1609 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
1610 vma->vm_private_data = NULL;
1611out:
1612 mutex_unlock(&mapping->i_mmap_mutex);
1613 return ret;
1614}
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630int try_to_unmap(struct page *page, enum ttu_flags flags)
1631{
1632 int ret;
1633
1634 BUG_ON(!PageLocked(page));
1635 VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
1636
1637 if (unlikely(PageKsm(page)))
1638 ret = try_to_unmap_ksm(page, flags);
1639 else if (PageAnon(page))
1640 ret = try_to_unmap_anon(page, flags);
1641 else
1642 ret = try_to_unmap_file(page, flags);
1643 if (ret != SWAP_MLOCK && !page_mapped(page))
1644 ret = SWAP_SUCCESS;
1645 return ret;
1646}
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663int try_to_munlock(struct page *page)
1664{
1665 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1666
1667 if (unlikely(PageKsm(page)))
1668 return try_to_unmap_ksm(page, TTU_MUNLOCK);
1669 else if (PageAnon(page))
1670 return try_to_unmap_anon(page, TTU_MUNLOCK);
1671 else
1672 return try_to_unmap_file(page, TTU_MUNLOCK);
1673}
1674
1675void __put_anon_vma(struct anon_vma *anon_vma)
1676{
1677 struct anon_vma *root = anon_vma->root;
1678
1679 if (root != anon_vma && atomic_dec_and_test(&root->refcount))
1680 anon_vma_free(root);
1681
1682 anon_vma_free(anon_vma);
1683}
1684
1685#ifdef CONFIG_MIGRATION
1686
1687
1688
1689
1690static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1691 struct vm_area_struct *, unsigned long, void *), void *arg)
1692{
1693 struct anon_vma *anon_vma;
1694 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1695 struct anon_vma_chain *avc;
1696 int ret = SWAP_AGAIN;
1697
1698
1699
1700
1701
1702
1703
1704 anon_vma = page_anon_vma(page);
1705 if (!anon_vma)
1706 return ret;
1707 anon_vma_lock(anon_vma);
1708 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
1709 struct vm_area_struct *vma = avc->vma;
1710 unsigned long address = vma_address(page, vma);
1711 ret = rmap_one(page, vma, address, arg);
1712 if (ret != SWAP_AGAIN)
1713 break;
1714 }
1715 anon_vma_unlock(anon_vma);
1716 return ret;
1717}
1718
1719static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1720 struct vm_area_struct *, unsigned long, void *), void *arg)
1721{
1722 struct address_space *mapping = page->mapping;
1723 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1724 struct vm_area_struct *vma;
1725 int ret = SWAP_AGAIN;
1726
1727 if (!mapping)
1728 return ret;
1729 mutex_lock(&mapping->i_mmap_mutex);
1730 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
1731 unsigned long address = vma_address(page, vma);
1732 ret = rmap_one(page, vma, address, arg);
1733 if (ret != SWAP_AGAIN)
1734 break;
1735 }
1736
1737
1738
1739
1740
1741 mutex_unlock(&mapping->i_mmap_mutex);
1742 return ret;
1743}
1744
1745int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
1746 struct vm_area_struct *, unsigned long, void *), void *arg)
1747{
1748 VM_BUG_ON(!PageLocked(page));
1749
1750 if (unlikely(PageKsm(page)))
1751 return rmap_walk_ksm(page, rmap_one, arg);
1752 else if (PageAnon(page))
1753 return rmap_walk_anon(page, rmap_one, arg);
1754 else
1755 return rmap_walk_file(page, rmap_one, arg);
1756}
1757#endif
1758
1759#ifdef CONFIG_HUGETLB_PAGE
1760
1761
1762
1763
1764
1765static void __hugepage_set_anon_rmap(struct page *page,
1766 struct vm_area_struct *vma, unsigned long address, int exclusive)
1767{
1768 struct anon_vma *anon_vma = vma->anon_vma;
1769
1770 BUG_ON(!anon_vma);
1771
1772 if (PageAnon(page))
1773 return;
1774 if (!exclusive)
1775 anon_vma = anon_vma->root;
1776
1777 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1778 page->mapping = (struct address_space *) anon_vma;
1779 page->index = linear_page_index(vma, address);
1780}
1781
1782void hugepage_add_anon_rmap(struct page *page,
1783 struct vm_area_struct *vma, unsigned long address)
1784{
1785 struct anon_vma *anon_vma = vma->anon_vma;
1786 int first;
1787
1788 BUG_ON(!PageLocked(page));
1789 BUG_ON(!anon_vma);
1790
1791 first = atomic_inc_and_test(&page->_mapcount);
1792 if (first)
1793 __hugepage_set_anon_rmap(page, vma, address, 0);
1794}
1795
1796void hugepage_add_new_anon_rmap(struct page *page,
1797 struct vm_area_struct *vma, unsigned long address)
1798{
1799 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1800 atomic_set(&page->_mapcount, 0);
1801 __hugepage_set_anon_rmap(page, vma, address, 1);
1802}
1803#endif
1804