1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45#include <linux/mm.h>
46#include <linux/pagemap.h>
47#include <linux/swap.h>
48#include <linux/swapops.h>
49#include <linux/slab.h>
50#include <linux/init.h>
51#include <linux/ksm.h>
52#include <linux/rmap.h>
53#include <linux/rcupdate.h>
54#include <linux/export.h>
55#include <linux/memcontrol.h>
56#include <linux/mmu_notifier.h>
57#include <linux/migrate.h>
58#include <linux/hugetlb.h>
59
60#include <asm/tlbflush.h>
61
62#include "internal.h"
63
64static struct kmem_cache *anon_vma_cachep;
65static struct kmem_cache *anon_vma_chain_cachep;
66
67static inline struct anon_vma *anon_vma_alloc(void)
68{
69 struct anon_vma *anon_vma;
70
71 anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
72 if (anon_vma) {
73 atomic_set(&anon_vma->refcount, 1);
74
75
76
77
78 anon_vma->root = anon_vma;
79 }
80
81 return anon_vma;
82}
83
84static inline void anon_vma_free(struct anon_vma *anon_vma)
85{
86 VM_BUG_ON(atomic_read(&anon_vma->refcount));
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105 if (mutex_is_locked(&anon_vma->root->mutex)) {
106 anon_vma_lock(anon_vma);
107 anon_vma_unlock(anon_vma);
108 }
109
110 kmem_cache_free(anon_vma_cachep, anon_vma);
111}
112
113static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
114{
115 return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
116}
117
118static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
119{
120 kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
121}
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150int anon_vma_prepare(struct vm_area_struct *vma)
151{
152 struct anon_vma *anon_vma = vma->anon_vma;
153 struct anon_vma_chain *avc;
154
155 might_sleep();
156 if (unlikely(!anon_vma)) {
157 struct mm_struct *mm = vma->vm_mm;
158 struct anon_vma *allocated;
159
160 avc = anon_vma_chain_alloc(GFP_KERNEL);
161 if (!avc)
162 goto out_enomem;
163
164 anon_vma = find_mergeable_anon_vma(vma);
165 allocated = NULL;
166 if (!anon_vma) {
167 anon_vma = anon_vma_alloc();
168 if (unlikely(!anon_vma))
169 goto out_enomem_free_avc;
170 allocated = anon_vma;
171 }
172
173 anon_vma_lock(anon_vma);
174
175 spin_lock(&mm->page_table_lock);
176 if (likely(!vma->anon_vma)) {
177 vma->anon_vma = anon_vma;
178 avc->anon_vma = anon_vma;
179 avc->vma = vma;
180 list_add(&avc->same_vma, &vma->anon_vma_chain);
181 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
182 allocated = NULL;
183 avc = NULL;
184 }
185 spin_unlock(&mm->page_table_lock);
186 anon_vma_unlock(anon_vma);
187
188 if (unlikely(allocated))
189 put_anon_vma(allocated);
190 if (unlikely(avc))
191 anon_vma_chain_free(avc);
192 }
193 return 0;
194
195 out_enomem_free_avc:
196 anon_vma_chain_free(avc);
197 out_enomem:
198 return -ENOMEM;
199}
200
201
202
203
204
205
206
207
208
209static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
210{
211 struct anon_vma *new_root = anon_vma->root;
212 if (new_root != root) {
213 if (WARN_ON_ONCE(root))
214 mutex_unlock(&root->mutex);
215 root = new_root;
216 mutex_lock(&root->mutex);
217 }
218 return root;
219}
220
221static inline void unlock_anon_vma_root(struct anon_vma *root)
222{
223 if (root)
224 mutex_unlock(&root->mutex);
225}
226
227static void anon_vma_chain_link(struct vm_area_struct *vma,
228 struct anon_vma_chain *avc,
229 struct anon_vma *anon_vma)
230{
231 avc->vma = vma;
232 avc->anon_vma = anon_vma;
233 list_add(&avc->same_vma, &vma->anon_vma_chain);
234
235
236
237
238
239 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
240}
241
242
243
244
245
246int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
247{
248 struct anon_vma_chain *avc, *pavc;
249 struct anon_vma *root = NULL;
250
251 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
252 struct anon_vma *anon_vma;
253
254 avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
255 if (unlikely(!avc)) {
256 unlock_anon_vma_root(root);
257 root = NULL;
258 avc = anon_vma_chain_alloc(GFP_KERNEL);
259 if (!avc)
260 goto enomem_failure;
261 }
262 anon_vma = pavc->anon_vma;
263 root = lock_anon_vma_root(root, anon_vma);
264 anon_vma_chain_link(dst, avc, anon_vma);
265 }
266 unlock_anon_vma_root(root);
267 return 0;
268
269 enomem_failure:
270 unlink_anon_vmas(dst);
271 return -ENOMEM;
272}
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304void anon_vma_moveto_tail(struct vm_area_struct *dst)
305{
306 struct anon_vma_chain *pavc;
307 struct anon_vma *root = NULL;
308
309 list_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) {
310 struct anon_vma *anon_vma = pavc->anon_vma;
311 VM_BUG_ON(pavc->vma != dst);
312 root = lock_anon_vma_root(root, anon_vma);
313 list_del(&pavc->same_anon_vma);
314 list_add_tail(&pavc->same_anon_vma, &anon_vma->head);
315 }
316 unlock_anon_vma_root(root);
317}
318
319
320
321
322
323
324int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
325{
326 struct anon_vma_chain *avc;
327 struct anon_vma *anon_vma;
328
329
330 if (!pvma->anon_vma)
331 return 0;
332
333
334
335
336
337 if (anon_vma_clone(vma, pvma))
338 return -ENOMEM;
339
340
341 anon_vma = anon_vma_alloc();
342 if (!anon_vma)
343 goto out_error;
344 avc = anon_vma_chain_alloc(GFP_KERNEL);
345 if (!avc)
346 goto out_error_free_anon_vma;
347
348
349
350
351
352 anon_vma->root = pvma->anon_vma->root;
353
354
355
356
357
358 get_anon_vma(anon_vma->root);
359
360 vma->anon_vma = anon_vma;
361 anon_vma_lock(anon_vma);
362 anon_vma_chain_link(vma, avc, anon_vma);
363 anon_vma_unlock(anon_vma);
364
365 return 0;
366
367 out_error_free_anon_vma:
368 put_anon_vma(anon_vma);
369 out_error:
370 unlink_anon_vmas(vma);
371 return -ENOMEM;
372}
373
374void unlink_anon_vmas(struct vm_area_struct *vma)
375{
376 struct anon_vma_chain *avc, *next;
377 struct anon_vma *root = NULL;
378
379
380
381
382
383 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
384 struct anon_vma *anon_vma = avc->anon_vma;
385
386 root = lock_anon_vma_root(root, anon_vma);
387 list_del(&avc->same_anon_vma);
388
389
390
391
392
393 if (list_empty(&anon_vma->head))
394 continue;
395
396 list_del(&avc->same_vma);
397 anon_vma_chain_free(avc);
398 }
399 unlock_anon_vma_root(root);
400
401
402
403
404
405
406 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
407 struct anon_vma *anon_vma = avc->anon_vma;
408
409 put_anon_vma(anon_vma);
410
411 list_del(&avc->same_vma);
412 anon_vma_chain_free(avc);
413 }
414}
415
416static void anon_vma_ctor(void *data)
417{
418 struct anon_vma *anon_vma = data;
419
420 mutex_init(&anon_vma->mutex);
421 atomic_set(&anon_vma->refcount, 0);
422 INIT_LIST_HEAD(&anon_vma->head);
423}
424
425void __init anon_vma_init(void)
426{
427 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
428 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
429 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
430}
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455struct anon_vma *page_get_anon_vma(struct page *page)
456{
457 struct anon_vma *anon_vma = NULL;
458 unsigned long anon_mapping;
459
460 rcu_read_lock();
461 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
462 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
463 goto out;
464 if (!page_mapped(page))
465 goto out;
466
467 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
468 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
469 anon_vma = NULL;
470 goto out;
471 }
472
473
474
475
476
477
478
479
480 if (!page_mapped(page)) {
481 put_anon_vma(anon_vma);
482 anon_vma = NULL;
483 }
484out:
485 rcu_read_unlock();
486
487 return anon_vma;
488}
489
490
491
492
493
494
495
496
497struct anon_vma *page_lock_anon_vma(struct page *page)
498{
499 struct anon_vma *anon_vma = NULL;
500 struct anon_vma *root_anon_vma;
501 unsigned long anon_mapping;
502
503 rcu_read_lock();
504 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
505 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
506 goto out;
507 if (!page_mapped(page))
508 goto out;
509
510 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
511 root_anon_vma = ACCESS_ONCE(anon_vma->root);
512 if (mutex_trylock(&root_anon_vma->mutex)) {
513
514
515
516
517
518 if (!page_mapped(page)) {
519 mutex_unlock(&root_anon_vma->mutex);
520 anon_vma = NULL;
521 }
522 goto out;
523 }
524
525
526 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
527 anon_vma = NULL;
528 goto out;
529 }
530
531 if (!page_mapped(page)) {
532 put_anon_vma(anon_vma);
533 anon_vma = NULL;
534 goto out;
535 }
536
537
538 rcu_read_unlock();
539 anon_vma_lock(anon_vma);
540
541 if (atomic_dec_and_test(&anon_vma->refcount)) {
542
543
544
545
546
547 anon_vma_unlock(anon_vma);
548 __put_anon_vma(anon_vma);
549 anon_vma = NULL;
550 }
551
552 return anon_vma;
553
554out:
555 rcu_read_unlock();
556 return anon_vma;
557}
558
559void page_unlock_anon_vma(struct anon_vma *anon_vma)
560{
561 anon_vma_unlock(anon_vma);
562}
563
564
565
566
567
568
569inline unsigned long
570vma_address(struct page *page, struct vm_area_struct *vma)
571{
572 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
573 unsigned long address;
574
575 if (unlikely(is_vm_hugetlb_page(vma)))
576 pgoff = page->index << huge_page_order(page_hstate(page));
577 address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
578 if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
579
580 return -EFAULT;
581 }
582 return address;
583}
584
585
586
587
588
589unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
590{
591 if (PageAnon(page)) {
592 struct anon_vma *page__anon_vma = page_anon_vma(page);
593
594
595
596
597 if (!vma->anon_vma || !page__anon_vma ||
598 vma->anon_vma->root != page__anon_vma->root)
599 return -EFAULT;
600 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
601 if (!vma->vm_file ||
602 vma->vm_file->f_mapping != page->mapping)
603 return -EFAULT;
604 } else
605 return -EFAULT;
606 return vma_address(page, vma);
607}
608
609
610
611
612
613
614
615
616
617
618pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
619 unsigned long address, spinlock_t **ptlp, int sync)
620{
621 pgd_t *pgd;
622 pud_t *pud;
623 pmd_t *pmd;
624 pte_t *pte;
625 spinlock_t *ptl;
626
627 if (unlikely(PageHuge(page))) {
628 pte = huge_pte_offset(mm, address);
629 ptl = &mm->page_table_lock;
630 goto check;
631 }
632
633 pgd = pgd_offset(mm, address);
634 if (!pgd_present(*pgd))
635 return NULL;
636
637 pud = pud_offset(pgd, address);
638 if (!pud_present(*pud))
639 return NULL;
640
641 pmd = pmd_offset(pud, address);
642 if (!pmd_present(*pmd))
643 return NULL;
644 if (pmd_trans_huge(*pmd))
645 return NULL;
646
647 pte = pte_offset_map(pmd, address);
648
649 if (!sync && !pte_present(*pte)) {
650 pte_unmap(pte);
651 return NULL;
652 }
653
654 ptl = pte_lockptr(mm, pmd);
655check:
656 spin_lock(ptl);
657 if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
658 *ptlp = ptl;
659 return pte;
660 }
661 pte_unmap_unlock(pte, ptl);
662 return NULL;
663}
664
665
666
667
668
669
670
671
672
673
674int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
675{
676 unsigned long address;
677 pte_t *pte;
678 spinlock_t *ptl;
679
680 address = vma_address(page, vma);
681 if (address == -EFAULT)
682 return 0;
683 pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
684 if (!pte)
685 return 0;
686 pte_unmap_unlock(pte, ptl);
687
688 return 1;
689}
690
691
692
693
694
695int page_referenced_one(struct page *page, struct vm_area_struct *vma,
696 unsigned long address, unsigned int *mapcount,
697 unsigned long *vm_flags)
698{
699 struct mm_struct *mm = vma->vm_mm;
700 int referenced = 0;
701
702 if (unlikely(PageTransHuge(page))) {
703 pmd_t *pmd;
704
705 spin_lock(&mm->page_table_lock);
706
707
708
709
710 pmd = page_check_address_pmd(page, mm, address,
711 PAGE_CHECK_ADDRESS_PMD_FLAG);
712 if (!pmd) {
713 spin_unlock(&mm->page_table_lock);
714 goto out;
715 }
716
717 if (vma->vm_flags & VM_LOCKED) {
718 spin_unlock(&mm->page_table_lock);
719 *mapcount = 0;
720 *vm_flags |= VM_LOCKED;
721 goto out;
722 }
723
724
725 if (pmdp_clear_flush_young_notify(vma, address, pmd))
726 referenced++;
727 spin_unlock(&mm->page_table_lock);
728 } else {
729 pte_t *pte;
730 spinlock_t *ptl;
731
732
733
734
735
736 pte = page_check_address(page, mm, address, &ptl, 0);
737 if (!pte)
738 goto out;
739
740 if (vma->vm_flags & VM_LOCKED) {
741 pte_unmap_unlock(pte, ptl);
742 *mapcount = 0;
743 *vm_flags |= VM_LOCKED;
744 goto out;
745 }
746
747 if (ptep_clear_flush_young_notify(vma, address, pte)) {
748
749
750
751
752
753
754
755 if (likely(!VM_SequentialReadHint(vma)))
756 referenced++;
757 }
758 pte_unmap_unlock(pte, ptl);
759 }
760
761
762
763 if (mm != current->mm && has_swap_token(mm) &&
764 rwsem_is_locked(&mm->mmap_sem))
765 referenced++;
766
767 (*mapcount)--;
768
769 if (referenced)
770 *vm_flags |= vma->vm_flags;
771out:
772 return referenced;
773}
774
775static int page_referenced_anon(struct page *page,
776 struct mem_cgroup *memcg,
777 unsigned long *vm_flags)
778{
779 unsigned int mapcount;
780 struct anon_vma *anon_vma;
781 struct anon_vma_chain *avc;
782 int referenced = 0;
783
784 anon_vma = page_lock_anon_vma(page);
785 if (!anon_vma)
786 return referenced;
787
788 mapcount = page_mapcount(page);
789 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
790 struct vm_area_struct *vma = avc->vma;
791 unsigned long address = vma_address(page, vma);
792 if (address == -EFAULT)
793 continue;
794
795
796
797
798
799 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
800 continue;
801 referenced += page_referenced_one(page, vma, address,
802 &mapcount, vm_flags);
803 if (!mapcount)
804 break;
805 }
806
807 page_unlock_anon_vma(anon_vma);
808 return referenced;
809}
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824static int page_referenced_file(struct page *page,
825 struct mem_cgroup *memcg,
826 unsigned long *vm_flags)
827{
828 unsigned int mapcount;
829 struct address_space *mapping = page->mapping;
830 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
831 struct vm_area_struct *vma;
832 struct prio_tree_iter iter;
833 int referenced = 0;
834
835
836
837
838
839
840 BUG_ON(PageAnon(page));
841
842
843
844
845
846
847
848 BUG_ON(!PageLocked(page));
849
850 mutex_lock(&mapping->i_mmap_mutex);
851
852
853
854
855
856 mapcount = page_mapcount(page);
857
858 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
859 unsigned long address = vma_address(page, vma);
860 if (address == -EFAULT)
861 continue;
862
863
864
865
866
867 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
868 continue;
869 referenced += page_referenced_one(page, vma, address,
870 &mapcount, vm_flags);
871 if (!mapcount)
872 break;
873 }
874
875 mutex_unlock(&mapping->i_mmap_mutex);
876 return referenced;
877}
878
879
880
881
882
883
884
885
886
887
888
889int page_referenced(struct page *page,
890 int is_locked,
891 struct mem_cgroup *memcg,
892 unsigned long *vm_flags)
893{
894 int referenced = 0;
895 int we_locked = 0;
896
897 *vm_flags = 0;
898 if (page_mapped(page) && page_rmapping(page)) {
899 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
900 we_locked = trylock_page(page);
901 if (!we_locked) {
902 referenced++;
903 goto out;
904 }
905 }
906 if (unlikely(PageKsm(page)))
907 referenced += page_referenced_ksm(page, memcg,
908 vm_flags);
909 else if (PageAnon(page))
910 referenced += page_referenced_anon(page, memcg,
911 vm_flags);
912 else if (page->mapping)
913 referenced += page_referenced_file(page, memcg,
914 vm_flags);
915 if (we_locked)
916 unlock_page(page);
917
918 if (page_test_and_clear_young(page_to_pfn(page)))
919 referenced++;
920 }
921out:
922 return referenced;
923}
924
925static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
926 unsigned long address)
927{
928 struct mm_struct *mm = vma->vm_mm;
929 pte_t *pte;
930 spinlock_t *ptl;
931 int ret = 0;
932
933 pte = page_check_address(page, mm, address, &ptl, 1);
934 if (!pte)
935 goto out;
936
937 if (pte_dirty(*pte) || pte_write(*pte)) {
938 pte_t entry;
939
940 flush_cache_page(vma, address, pte_pfn(*pte));
941 entry = ptep_clear_flush_notify(vma, address, pte);
942 entry = pte_wrprotect(entry);
943 entry = pte_mkclean(entry);
944 set_pte_at(mm, address, pte, entry);
945 ret = 1;
946 }
947
948 pte_unmap_unlock(pte, ptl);
949out:
950 return ret;
951}
952
953static int page_mkclean_file(struct address_space *mapping, struct page *page)
954{
955 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
956 struct vm_area_struct *vma;
957 struct prio_tree_iter iter;
958 int ret = 0;
959
960 BUG_ON(PageAnon(page));
961
962 mutex_lock(&mapping->i_mmap_mutex);
963 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
964 if (vma->vm_flags & VM_SHARED) {
965 unsigned long address = vma_address(page, vma);
966 if (address == -EFAULT)
967 continue;
968 ret += page_mkclean_one(page, vma, address);
969 }
970 }
971 mutex_unlock(&mapping->i_mmap_mutex);
972 return ret;
973}
974
975int page_mkclean(struct page *page)
976{
977 int ret = 0;
978
979 BUG_ON(!PageLocked(page));
980
981 if (page_mapped(page)) {
982 struct address_space *mapping = page_mapping(page);
983 if (mapping) {
984 ret = page_mkclean_file(mapping, page);
985 if (page_test_and_clear_dirty(page_to_pfn(page), 1))
986 ret = 1;
987 }
988 }
989
990 return ret;
991}
992EXPORT_SYMBOL_GPL(page_mkclean);
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005void page_move_anon_rmap(struct page *page,
1006 struct vm_area_struct *vma, unsigned long address)
1007{
1008 struct anon_vma *anon_vma = vma->anon_vma;
1009
1010 VM_BUG_ON(!PageLocked(page));
1011 VM_BUG_ON(!anon_vma);
1012 VM_BUG_ON(page->index != linear_page_index(vma, address));
1013
1014 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1015 page->mapping = (struct address_space *) anon_vma;
1016}
1017
1018
1019
1020
1021
1022
1023
1024
1025static void __page_set_anon_rmap(struct page *page,
1026 struct vm_area_struct *vma, unsigned long address, int exclusive)
1027{
1028 struct anon_vma *anon_vma = vma->anon_vma;
1029
1030 BUG_ON(!anon_vma);
1031
1032 if (PageAnon(page))
1033 return;
1034
1035
1036
1037
1038
1039
1040 if (!exclusive)
1041 anon_vma = anon_vma->root;
1042
1043 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1044 page->mapping = (struct address_space *) anon_vma;
1045 page->index = linear_page_index(vma, address);
1046}
1047
1048
1049
1050
1051
1052
1053
1054static void __page_check_anon_rmap(struct page *page,
1055 struct vm_area_struct *vma, unsigned long address)
1056{
1057#ifdef CONFIG_DEBUG_VM
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070 BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
1071 BUG_ON(page->index != linear_page_index(vma, address));
1072#endif
1073}
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086void page_add_anon_rmap(struct page *page,
1087 struct vm_area_struct *vma, unsigned long address)
1088{
1089 do_page_add_anon_rmap(page, vma, address, 0);
1090}
1091
1092
1093
1094
1095
1096
1097void do_page_add_anon_rmap(struct page *page,
1098 struct vm_area_struct *vma, unsigned long address, int exclusive)
1099{
1100 int first = atomic_inc_and_test(&page->_mapcount);
1101 if (first) {
1102 if (!PageTransHuge(page))
1103 __inc_zone_page_state(page, NR_ANON_PAGES);
1104 else
1105 __inc_zone_page_state(page,
1106 NR_ANON_TRANSPARENT_HUGEPAGES);
1107 }
1108 if (unlikely(PageKsm(page)))
1109 return;
1110
1111 VM_BUG_ON(!PageLocked(page));
1112
1113 if (first)
1114 __page_set_anon_rmap(page, vma, address, exclusive);
1115 else
1116 __page_check_anon_rmap(page, vma, address);
1117}
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129void page_add_new_anon_rmap(struct page *page,
1130 struct vm_area_struct *vma, unsigned long address)
1131{
1132 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1133 SetPageSwapBacked(page);
1134 atomic_set(&page->_mapcount, 0);
1135 if (!PageTransHuge(page))
1136 __inc_zone_page_state(page, NR_ANON_PAGES);
1137 else
1138 __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1139 __page_set_anon_rmap(page, vma, address, 1);
1140 if (page_evictable(page, vma))
1141 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
1142 else
1143 add_page_to_unevictable_list(page);
1144}
1145
1146
1147
1148
1149
1150
1151
1152void page_add_file_rmap(struct page *page)
1153{
1154 if (atomic_inc_and_test(&page->_mapcount)) {
1155 __inc_zone_page_state(page, NR_FILE_MAPPED);
1156 mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
1157 }
1158}
1159
1160
1161
1162
1163
1164
1165
1166void page_remove_rmap(struct page *page)
1167{
1168
1169 if (!atomic_add_negative(-1, &page->_mapcount))
1170 return;
1171
1172
1173
1174
1175
1176
1177
1178
1179 if ((!PageAnon(page) || PageSwapCache(page)) &&
1180 page_test_and_clear_dirty(page_to_pfn(page), 1))
1181 set_page_dirty(page);
1182
1183
1184
1185
1186 if (unlikely(PageHuge(page)))
1187 return;
1188 if (PageAnon(page)) {
1189 mem_cgroup_uncharge_page(page);
1190 if (!PageTransHuge(page))
1191 __dec_zone_page_state(page, NR_ANON_PAGES);
1192 else
1193 __dec_zone_page_state(page,
1194 NR_ANON_TRANSPARENT_HUGEPAGES);
1195 } else {
1196 __dec_zone_page_state(page, NR_FILE_MAPPED);
1197 mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
1198 }
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208}
1209
1210
1211
1212
1213
1214int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1215 unsigned long address, enum ttu_flags flags)
1216{
1217 struct mm_struct *mm = vma->vm_mm;
1218 pte_t *pte;
1219 pte_t pteval;
1220 spinlock_t *ptl;
1221 int ret = SWAP_AGAIN;
1222
1223 pte = page_check_address(page, mm, address, &ptl, 0);
1224 if (!pte)
1225 goto out;
1226
1227
1228
1229
1230
1231
1232 if (!(flags & TTU_IGNORE_MLOCK)) {
1233 if (vma->vm_flags & VM_LOCKED)
1234 goto out_mlock;
1235
1236 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1237 goto out_unmap;
1238 }
1239 if (!(flags & TTU_IGNORE_ACCESS)) {
1240 if (ptep_clear_flush_young_notify(vma, address, pte)) {
1241 ret = SWAP_FAIL;
1242 goto out_unmap;
1243 }
1244 }
1245
1246
1247 flush_cache_page(vma, address, page_to_pfn(page));
1248 pteval = ptep_clear_flush_notify(vma, address, pte);
1249
1250
1251 if (pte_dirty(pteval))
1252 set_page_dirty(page);
1253
1254
1255 update_hiwater_rss(mm);
1256
1257 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
1258 if (PageAnon(page))
1259 dec_mm_counter(mm, MM_ANONPAGES);
1260 else
1261 dec_mm_counter(mm, MM_FILEPAGES);
1262 set_pte_at(mm, address, pte,
1263 swp_entry_to_pte(make_hwpoison_entry(page)));
1264 } else if (PageAnon(page)) {
1265 swp_entry_t entry = { .val = page_private(page) };
1266
1267 if (PageSwapCache(page)) {
1268
1269
1270
1271
1272 if (swap_duplicate(entry) < 0) {
1273 set_pte_at(mm, address, pte, pteval);
1274 ret = SWAP_FAIL;
1275 goto out_unmap;
1276 }
1277 if (list_empty(&mm->mmlist)) {
1278 spin_lock(&mmlist_lock);
1279 if (list_empty(&mm->mmlist))
1280 list_add(&mm->mmlist, &init_mm.mmlist);
1281 spin_unlock(&mmlist_lock);
1282 }
1283 dec_mm_counter(mm, MM_ANONPAGES);
1284 inc_mm_counter(mm, MM_SWAPENTS);
1285 } else if (PAGE_MIGRATION) {
1286
1287
1288
1289
1290
1291 BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
1292 entry = make_migration_entry(page, pte_write(pteval));
1293 }
1294 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1295 BUG_ON(pte_file(*pte));
1296 } else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
1297
1298 swp_entry_t entry;
1299 entry = make_migration_entry(page, pte_write(pteval));
1300 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1301 } else
1302 dec_mm_counter(mm, MM_FILEPAGES);
1303
1304 page_remove_rmap(page);
1305 page_cache_release(page);
1306
1307out_unmap:
1308 pte_unmap_unlock(pte, ptl);
1309out:
1310 return ret;
1311
1312out_mlock:
1313 pte_unmap_unlock(pte, ptl);
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1325 if (vma->vm_flags & VM_LOCKED) {
1326 mlock_vma_page(page);
1327 ret = SWAP_MLOCK;
1328 }
1329 up_read(&vma->vm_mm->mmap_sem);
1330 }
1331 return ret;
1332}
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
1359#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
1360
1361static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1362 struct vm_area_struct *vma, struct page *check_page)
1363{
1364 struct mm_struct *mm = vma->vm_mm;
1365 pgd_t *pgd;
1366 pud_t *pud;
1367 pmd_t *pmd;
1368 pte_t *pte;
1369 pte_t pteval;
1370 spinlock_t *ptl;
1371 struct page *page;
1372 unsigned long address;
1373 unsigned long end;
1374 int ret = SWAP_AGAIN;
1375 int locked_vma = 0;
1376
1377 address = (vma->vm_start + cursor) & CLUSTER_MASK;
1378 end = address + CLUSTER_SIZE;
1379 if (address < vma->vm_start)
1380 address = vma->vm_start;
1381 if (end > vma->vm_end)
1382 end = vma->vm_end;
1383
1384 pgd = pgd_offset(mm, address);
1385 if (!pgd_present(*pgd))
1386 return ret;
1387
1388 pud = pud_offset(pgd, address);
1389 if (!pud_present(*pud))
1390 return ret;
1391
1392 pmd = pmd_offset(pud, address);
1393 if (!pmd_present(*pmd))
1394 return ret;
1395
1396
1397
1398
1399
1400 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1401 locked_vma = (vma->vm_flags & VM_LOCKED);
1402 if (!locked_vma)
1403 up_read(&vma->vm_mm->mmap_sem);
1404 }
1405
1406 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
1407
1408
1409 update_hiwater_rss(mm);
1410
1411 for (; address < end; pte++, address += PAGE_SIZE) {
1412 if (!pte_present(*pte))
1413 continue;
1414 page = vm_normal_page(vma, address, *pte);
1415 BUG_ON(!page || PageAnon(page));
1416
1417 if (locked_vma) {
1418 mlock_vma_page(page);
1419 if (page == check_page)
1420 ret = SWAP_MLOCK;
1421 continue;
1422 }
1423
1424 if (ptep_clear_flush_young_notify(vma, address, pte))
1425 continue;
1426
1427
1428 flush_cache_page(vma, address, pte_pfn(*pte));
1429 pteval = ptep_clear_flush_notify(vma, address, pte);
1430
1431
1432 if (page->index != linear_page_index(vma, address))
1433 set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
1434
1435
1436 if (pte_dirty(pteval))
1437 set_page_dirty(page);
1438
1439 page_remove_rmap(page);
1440 page_cache_release(page);
1441 dec_mm_counter(mm, MM_FILEPAGES);
1442 (*mapcount)--;
1443 }
1444 pte_unmap_unlock(pte - 1, ptl);
1445 if (locked_vma)
1446 up_read(&vma->vm_mm->mmap_sem);
1447 return ret;
1448}
1449
1450bool is_vma_temporary_stack(struct vm_area_struct *vma)
1451{
1452 int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
1453
1454 if (!maybe_stack)
1455 return false;
1456
1457 if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
1458 VM_STACK_INCOMPLETE_SETUP)
1459 return true;
1460
1461 return false;
1462}
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1481{
1482 struct anon_vma *anon_vma;
1483 struct anon_vma_chain *avc;
1484 int ret = SWAP_AGAIN;
1485
1486 anon_vma = page_lock_anon_vma(page);
1487 if (!anon_vma)
1488 return ret;
1489
1490 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1491 struct vm_area_struct *vma = avc->vma;
1492 unsigned long address;
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502 if (PAGE_MIGRATION && (flags & TTU_MIGRATION) &&
1503 is_vma_temporary_stack(vma))
1504 continue;
1505
1506 address = vma_address(page, vma);
1507 if (address == -EFAULT)
1508 continue;
1509 ret = try_to_unmap_one(page, vma, address, flags);
1510 if (ret != SWAP_AGAIN || !page_mapped(page))
1511 break;
1512 }
1513
1514 page_unlock_anon_vma(anon_vma);
1515 return ret;
1516}
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1534{
1535 struct address_space *mapping = page->mapping;
1536 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1537 struct vm_area_struct *vma;
1538 struct prio_tree_iter iter;
1539 int ret = SWAP_AGAIN;
1540 unsigned long cursor;
1541 unsigned long max_nl_cursor = 0;
1542 unsigned long max_nl_size = 0;
1543 unsigned int mapcount;
1544
1545 mutex_lock(&mapping->i_mmap_mutex);
1546 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1547 unsigned long address = vma_address(page, vma);
1548 if (address == -EFAULT)
1549 continue;
1550 ret = try_to_unmap_one(page, vma, address, flags);
1551 if (ret != SWAP_AGAIN || !page_mapped(page))
1552 goto out;
1553 }
1554
1555 if (list_empty(&mapping->i_mmap_nonlinear))
1556 goto out;
1557
1558
1559
1560
1561
1562
1563 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1564 goto out;
1565
1566 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1567 shared.vm_set.list) {
1568 cursor = (unsigned long) vma->vm_private_data;
1569 if (cursor > max_nl_cursor)
1570 max_nl_cursor = cursor;
1571 cursor = vma->vm_end - vma->vm_start;
1572 if (cursor > max_nl_size)
1573 max_nl_size = cursor;
1574 }
1575
1576 if (max_nl_size == 0) {
1577 ret = SWAP_FAIL;
1578 goto out;
1579 }
1580
1581
1582
1583
1584
1585
1586
1587
1588 mapcount = page_mapcount(page);
1589 if (!mapcount)
1590 goto out;
1591 cond_resched();
1592
1593 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
1594 if (max_nl_cursor == 0)
1595 max_nl_cursor = CLUSTER_SIZE;
1596
1597 do {
1598 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1599 shared.vm_set.list) {
1600 cursor = (unsigned long) vma->vm_private_data;
1601 while ( cursor < max_nl_cursor &&
1602 cursor < vma->vm_end - vma->vm_start) {
1603 if (try_to_unmap_cluster(cursor, &mapcount,
1604 vma, page) == SWAP_MLOCK)
1605 ret = SWAP_MLOCK;
1606 cursor += CLUSTER_SIZE;
1607 vma->vm_private_data = (void *) cursor;
1608 if ((int)mapcount <= 0)
1609 goto out;
1610 }
1611 vma->vm_private_data = (void *) max_nl_cursor;
1612 }
1613 cond_resched();
1614 max_nl_cursor += CLUSTER_SIZE;
1615 } while (max_nl_cursor <= max_nl_size);
1616
1617
1618
1619
1620
1621
1622 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
1623 vma->vm_private_data = NULL;
1624out:
1625 mutex_unlock(&mapping->i_mmap_mutex);
1626 return ret;
1627}
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643int try_to_unmap(struct page *page, enum ttu_flags flags)
1644{
1645 int ret;
1646
1647 BUG_ON(!PageLocked(page));
1648 VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
1649
1650 if (unlikely(PageKsm(page)))
1651 ret = try_to_unmap_ksm(page, flags);
1652 else if (PageAnon(page))
1653 ret = try_to_unmap_anon(page, flags);
1654 else
1655 ret = try_to_unmap_file(page, flags);
1656 if (ret != SWAP_MLOCK && !page_mapped(page))
1657 ret = SWAP_SUCCESS;
1658 return ret;
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676int try_to_munlock(struct page *page)
1677{
1678 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1679
1680 if (unlikely(PageKsm(page)))
1681 return try_to_unmap_ksm(page, TTU_MUNLOCK);
1682 else if (PageAnon(page))
1683 return try_to_unmap_anon(page, TTU_MUNLOCK);
1684 else
1685 return try_to_unmap_file(page, TTU_MUNLOCK);
1686}
1687
1688void __put_anon_vma(struct anon_vma *anon_vma)
1689{
1690 struct anon_vma *root = anon_vma->root;
1691
1692 if (root != anon_vma && atomic_dec_and_test(&root->refcount))
1693 anon_vma_free(root);
1694
1695 anon_vma_free(anon_vma);
1696}
1697
1698#ifdef CONFIG_MIGRATION
1699
1700
1701
1702
1703static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1704 struct vm_area_struct *, unsigned long, void *), void *arg)
1705{
1706 struct anon_vma *anon_vma;
1707 struct anon_vma_chain *avc;
1708 int ret = SWAP_AGAIN;
1709
1710
1711
1712
1713
1714
1715
1716 anon_vma = page_anon_vma(page);
1717 if (!anon_vma)
1718 return ret;
1719 anon_vma_lock(anon_vma);
1720 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1721 struct vm_area_struct *vma = avc->vma;
1722 unsigned long address = vma_address(page, vma);
1723 if (address == -EFAULT)
1724 continue;
1725 ret = rmap_one(page, vma, address, arg);
1726 if (ret != SWAP_AGAIN)
1727 break;
1728 }
1729 anon_vma_unlock(anon_vma);
1730 return ret;
1731}
1732
1733static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1734 struct vm_area_struct *, unsigned long, void *), void *arg)
1735{
1736 struct address_space *mapping = page->mapping;
1737 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1738 struct vm_area_struct *vma;
1739 struct prio_tree_iter iter;
1740 int ret = SWAP_AGAIN;
1741
1742 if (!mapping)
1743 return ret;
1744 mutex_lock(&mapping->i_mmap_mutex);
1745 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1746 unsigned long address = vma_address(page, vma);
1747 if (address == -EFAULT)
1748 continue;
1749 ret = rmap_one(page, vma, address, arg);
1750 if (ret != SWAP_AGAIN)
1751 break;
1752 }
1753
1754
1755
1756
1757
1758 mutex_unlock(&mapping->i_mmap_mutex);
1759 return ret;
1760}
1761
1762int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
1763 struct vm_area_struct *, unsigned long, void *), void *arg)
1764{
1765 VM_BUG_ON(!PageLocked(page));
1766
1767 if (unlikely(PageKsm(page)))
1768 return rmap_walk_ksm(page, rmap_one, arg);
1769 else if (PageAnon(page))
1770 return rmap_walk_anon(page, rmap_one, arg);
1771 else
1772 return rmap_walk_file(page, rmap_one, arg);
1773}
1774#endif
1775
1776#ifdef CONFIG_HUGETLB_PAGE
1777
1778
1779
1780
1781
1782static void __hugepage_set_anon_rmap(struct page *page,
1783 struct vm_area_struct *vma, unsigned long address, int exclusive)
1784{
1785 struct anon_vma *anon_vma = vma->anon_vma;
1786
1787 BUG_ON(!anon_vma);
1788
1789 if (PageAnon(page))
1790 return;
1791 if (!exclusive)
1792 anon_vma = anon_vma->root;
1793
1794 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1795 page->mapping = (struct address_space *) anon_vma;
1796 page->index = linear_page_index(vma, address);
1797}
1798
1799void hugepage_add_anon_rmap(struct page *page,
1800 struct vm_area_struct *vma, unsigned long address)
1801{
1802 struct anon_vma *anon_vma = vma->anon_vma;
1803 int first;
1804
1805 BUG_ON(!PageLocked(page));
1806 BUG_ON(!anon_vma);
1807
1808 first = atomic_inc_and_test(&page->_mapcount);
1809 if (first)
1810 __hugepage_set_anon_rmap(page, vma, address, 0);
1811}
1812
1813void hugepage_add_new_anon_rmap(struct page *page,
1814 struct vm_area_struct *vma, unsigned long address)
1815{
1816 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1817 atomic_set(&page->_mapcount, 0);
1818 __hugepage_set_anon_rmap(page, vma, address, 1);
1819}
1820#endif
1821