1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/errno.h>
18#include <linux/mm.h>
19#include <linux/fs.h>
20#include <linux/mman.h>
21#include <linux/sched.h>
22#include <linux/rwsem.h>
23#include <linux/pagemap.h>
24#include <linux/rmap.h>
25#include <linux/spinlock.h>
26#include <linux/jhash.h>
27#include <linux/delay.h>
28#include <linux/kthread.h>
29#include <linux/wait.h>
30#include <linux/slab.h>
31#include <linux/rbtree.h>
32#include <linux/memory.h>
33#include <linux/mmu_notifier.h>
34#include <linux/swap.h>
35#include <linux/ksm.h>
36#include <linux/hash.h>
37#include <linux/freezer.h>
38#include <linux/oom.h>
39
40#include <asm/tlbflush.h>
41#include "internal.h"
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90struct mm_slot {
91 struct hlist_node link;
92 struct list_head mm_list;
93 struct rmap_item *rmap_list;
94 struct mm_struct *mm;
95};
96
97
98
99
100
101
102
103
104
105
106struct ksm_scan {
107 struct mm_slot *mm_slot;
108 unsigned long address;
109 struct rmap_item **rmap_list;
110 unsigned long seqnr;
111};
112
113
114
115
116
117
118
119struct stable_node {
120 struct rb_node node;
121 struct hlist_head hlist;
122 unsigned long kpfn;
123};
124
125
126
127
128
129
130
131
132
133
134
135
136struct rmap_item {
137 struct rmap_item *rmap_list;
138 struct anon_vma *anon_vma;
139 struct mm_struct *mm;
140 unsigned long address;
141 unsigned int oldchecksum;
142 union {
143 struct rb_node node;
144 struct {
145 struct stable_node *head;
146 struct hlist_node hlist;
147 };
148 };
149};
150
151#define SEQNR_MASK 0x0ff
152#define UNSTABLE_FLAG 0x100
153#define STABLE_FLAG 0x200
154
155
156static struct rb_root root_stable_tree = RB_ROOT;
157static struct rb_root root_unstable_tree = RB_ROOT;
158
159#define MM_SLOTS_HASH_SHIFT 10
160#define MM_SLOTS_HASH_HEADS (1 << MM_SLOTS_HASH_SHIFT)
161static struct hlist_head mm_slots_hash[MM_SLOTS_HASH_HEADS];
162
163static struct mm_slot ksm_mm_head = {
164 .mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
165};
166static struct ksm_scan ksm_scan = {
167 .mm_slot = &ksm_mm_head,
168};
169
170static struct kmem_cache *rmap_item_cache;
171static struct kmem_cache *stable_node_cache;
172static struct kmem_cache *mm_slot_cache;
173
174
175static unsigned long ksm_pages_shared;
176
177
178static unsigned long ksm_pages_sharing;
179
180
181static unsigned long ksm_pages_unshared;
182
183
184static unsigned long ksm_rmap_items;
185
186
187static unsigned int ksm_thread_pages_to_scan = 100;
188
189
190static unsigned int ksm_thread_sleep_millisecs = 20;
191
192#define KSM_RUN_STOP 0
193#define KSM_RUN_MERGE 1
194#define KSM_RUN_UNMERGE 2
195static unsigned int ksm_run = KSM_RUN_STOP;
196
197static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
198static DEFINE_MUTEX(ksm_thread_mutex);
199static DEFINE_SPINLOCK(ksm_mmlist_lock);
200
201#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
202 sizeof(struct __struct), __alignof__(struct __struct),\
203 (__flags), NULL)
204
205static int __init ksm_slab_init(void)
206{
207 rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
208 if (!rmap_item_cache)
209 goto out;
210
211 stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
212 if (!stable_node_cache)
213 goto out_free1;
214
215 mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
216 if (!mm_slot_cache)
217 goto out_free2;
218
219 return 0;
220
221out_free2:
222 kmem_cache_destroy(stable_node_cache);
223out_free1:
224 kmem_cache_destroy(rmap_item_cache);
225out:
226 return -ENOMEM;
227}
228
229static void __init ksm_slab_free(void)
230{
231 kmem_cache_destroy(mm_slot_cache);
232 kmem_cache_destroy(stable_node_cache);
233 kmem_cache_destroy(rmap_item_cache);
234 mm_slot_cache = NULL;
235}
236
237static inline struct rmap_item *alloc_rmap_item(void)
238{
239 struct rmap_item *rmap_item;
240
241 rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
242 if (rmap_item)
243 ksm_rmap_items++;
244 return rmap_item;
245}
246
247static inline void free_rmap_item(struct rmap_item *rmap_item)
248{
249 ksm_rmap_items--;
250 rmap_item->mm = NULL;
251 kmem_cache_free(rmap_item_cache, rmap_item);
252}
253
254static inline struct stable_node *alloc_stable_node(void)
255{
256 return kmem_cache_alloc(stable_node_cache, GFP_KERNEL);
257}
258
259static inline void free_stable_node(struct stable_node *stable_node)
260{
261 kmem_cache_free(stable_node_cache, stable_node);
262}
263
264static inline struct mm_slot *alloc_mm_slot(void)
265{
266 if (!mm_slot_cache)
267 return NULL;
268 return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
269}
270
271static inline void free_mm_slot(struct mm_slot *mm_slot)
272{
273 kmem_cache_free(mm_slot_cache, mm_slot);
274}
275
276static struct mm_slot *get_mm_slot(struct mm_struct *mm)
277{
278 struct mm_slot *mm_slot;
279 struct hlist_head *bucket;
280 struct hlist_node *node;
281
282 bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
283 hlist_for_each_entry(mm_slot, node, bucket, link) {
284 if (mm == mm_slot->mm)
285 return mm_slot;
286 }
287 return NULL;
288}
289
290static void insert_to_mm_slots_hash(struct mm_struct *mm,
291 struct mm_slot *mm_slot)
292{
293 struct hlist_head *bucket;
294
295 bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
296 mm_slot->mm = mm;
297 hlist_add_head(&mm_slot->link, bucket);
298}
299
300static inline int in_stable_tree(struct rmap_item *rmap_item)
301{
302 return rmap_item->address & STABLE_FLAG;
303}
304
305
306
307
308
309
310
311
312
313static inline bool ksm_test_exit(struct mm_struct *mm)
314{
315 return atomic_read(&mm->mm_users) == 0;
316}
317
318
319
320
321
322
323
324
325
326
327
328
329static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
330{
331 struct page *page;
332 int ret = 0;
333
334 do {
335 cond_resched();
336 page = follow_page(vma, addr, FOLL_GET);
337 if (IS_ERR_OR_NULL(page))
338 break;
339 if (PageKsm(page))
340 ret = handle_mm_fault(vma->vm_mm, vma, addr,
341 FAULT_FLAG_WRITE);
342 else
343 ret = VM_FAULT_WRITE;
344 put_page(page);
345 } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374 return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
375}
376
377static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
378 unsigned long addr)
379{
380 struct vm_area_struct *vma;
381 if (ksm_test_exit(mm))
382 return NULL;
383 vma = find_vma(mm, addr);
384 if (!vma || vma->vm_start > addr)
385 return NULL;
386 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
387 return NULL;
388 return vma;
389}
390
391static void break_cow(struct rmap_item *rmap_item)
392{
393 struct mm_struct *mm = rmap_item->mm;
394 unsigned long addr = rmap_item->address;
395 struct vm_area_struct *vma;
396
397
398
399
400
401 put_anon_vma(rmap_item->anon_vma);
402
403 down_read(&mm->mmap_sem);
404 vma = find_mergeable_vma(mm, addr);
405 if (vma)
406 break_ksm(vma, addr);
407 up_read(&mm->mmap_sem);
408}
409
410static struct page *page_trans_compound_anon(struct page *page)
411{
412 if (PageTransCompound(page)) {
413 struct page *head = compound_trans_head(page);
414
415
416
417
418 if (PageAnon(head))
419 return head;
420 }
421 return NULL;
422}
423
424static struct page *get_mergeable_page(struct rmap_item *rmap_item)
425{
426 struct mm_struct *mm = rmap_item->mm;
427 unsigned long addr = rmap_item->address;
428 struct vm_area_struct *vma;
429 struct page *page;
430
431 down_read(&mm->mmap_sem);
432 vma = find_mergeable_vma(mm, addr);
433 if (!vma)
434 goto out;
435
436 page = follow_page(vma, addr, FOLL_GET);
437 if (IS_ERR_OR_NULL(page))
438 goto out;
439 if (PageAnon(page) || page_trans_compound_anon(page)) {
440 flush_anon_page(vma, page, addr);
441 flush_dcache_page(page);
442 } else {
443 put_page(page);
444out: page = NULL;
445 }
446 up_read(&mm->mmap_sem);
447 return page;
448}
449
450static void remove_node_from_stable_tree(struct stable_node *stable_node)
451{
452 struct rmap_item *rmap_item;
453 struct hlist_node *hlist;
454
455 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
456 if (rmap_item->hlist.next)
457 ksm_pages_sharing--;
458 else
459 ksm_pages_shared--;
460 put_anon_vma(rmap_item->anon_vma);
461 rmap_item->address &= PAGE_MASK;
462 cond_resched();
463 }
464
465 rb_erase(&stable_node->node, &root_stable_tree);
466 free_stable_node(stable_node);
467}
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498static struct page *get_ksm_page(struct stable_node *stable_node)
499{
500 struct page *page;
501 void *expected_mapping;
502
503 page = pfn_to_page(stable_node->kpfn);
504 expected_mapping = (void *)stable_node +
505 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
506 rcu_read_lock();
507 if (page->mapping != expected_mapping)
508 goto stale;
509 if (!get_page_unless_zero(page))
510 goto stale;
511 if (page->mapping != expected_mapping) {
512 put_page(page);
513 goto stale;
514 }
515 rcu_read_unlock();
516 return page;
517stale:
518 rcu_read_unlock();
519 remove_node_from_stable_tree(stable_node);
520 return NULL;
521}
522
523
524
525
526
527static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
528{
529 if (rmap_item->address & STABLE_FLAG) {
530 struct stable_node *stable_node;
531 struct page *page;
532
533 stable_node = rmap_item->head;
534 page = get_ksm_page(stable_node);
535 if (!page)
536 goto out;
537
538 lock_page(page);
539 hlist_del(&rmap_item->hlist);
540 unlock_page(page);
541 put_page(page);
542
543 if (stable_node->hlist.first)
544 ksm_pages_sharing--;
545 else
546 ksm_pages_shared--;
547
548 put_anon_vma(rmap_item->anon_vma);
549 rmap_item->address &= PAGE_MASK;
550
551 } else if (rmap_item->address & UNSTABLE_FLAG) {
552 unsigned char age;
553
554
555
556
557
558
559
560 age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
561 BUG_ON(age > 1);
562 if (!age)
563 rb_erase(&rmap_item->node, &root_unstable_tree);
564
565 ksm_pages_unshared--;
566 rmap_item->address &= PAGE_MASK;
567 }
568out:
569 cond_resched();
570}
571
572static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
573 struct rmap_item **rmap_list)
574{
575 while (*rmap_list) {
576 struct rmap_item *rmap_item = *rmap_list;
577 *rmap_list = rmap_item->rmap_list;
578 remove_rmap_item_from_tree(rmap_item);
579 free_rmap_item(rmap_item);
580 }
581}
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596static int unmerge_ksm_pages(struct vm_area_struct *vma,
597 unsigned long start, unsigned long end)
598{
599 unsigned long addr;
600 int err = 0;
601
602 for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
603 if (ksm_test_exit(vma->vm_mm))
604 break;
605 if (signal_pending(current))
606 err = -ERESTARTSYS;
607 else
608 err = break_ksm(vma, addr);
609 }
610 return err;
611}
612
613#ifdef CONFIG_SYSFS
614
615
616
617static int unmerge_and_remove_all_rmap_items(void)
618{
619 struct mm_slot *mm_slot;
620 struct mm_struct *mm;
621 struct vm_area_struct *vma;
622 int err = 0;
623
624 spin_lock(&ksm_mmlist_lock);
625 ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
626 struct mm_slot, mm_list);
627 spin_unlock(&ksm_mmlist_lock);
628
629 for (mm_slot = ksm_scan.mm_slot;
630 mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
631 mm = mm_slot->mm;
632 down_read(&mm->mmap_sem);
633 for (vma = mm->mmap; vma; vma = vma->vm_next) {
634 if (ksm_test_exit(mm))
635 break;
636 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
637 continue;
638 err = unmerge_ksm_pages(vma,
639 vma->vm_start, vma->vm_end);
640 if (err)
641 goto error;
642 }
643
644 remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
645
646 spin_lock(&ksm_mmlist_lock);
647 ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
648 struct mm_slot, mm_list);
649 if (ksm_test_exit(mm)) {
650 hlist_del(&mm_slot->link);
651 list_del(&mm_slot->mm_list);
652 spin_unlock(&ksm_mmlist_lock);
653
654 free_mm_slot(mm_slot);
655 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
656 up_read(&mm->mmap_sem);
657 mmdrop(mm);
658 } else {
659 spin_unlock(&ksm_mmlist_lock);
660 up_read(&mm->mmap_sem);
661 }
662 }
663
664 ksm_scan.seqnr = 0;
665 return 0;
666
667error:
668 up_read(&mm->mmap_sem);
669 spin_lock(&ksm_mmlist_lock);
670 ksm_scan.mm_slot = &ksm_mm_head;
671 spin_unlock(&ksm_mmlist_lock);
672 return err;
673}
674#endif
675
676static u32 calc_checksum(struct page *page)
677{
678 u32 checksum;
679 void *addr = kmap_atomic(page);
680 checksum = jhash2(addr, PAGE_SIZE / 4, 17);
681 kunmap_atomic(addr);
682 return checksum;
683}
684
685static int memcmp_pages(struct page *page1, struct page *page2)
686{
687 char *addr1, *addr2;
688 int ret;
689
690 addr1 = kmap_atomic(page1);
691 addr2 = kmap_atomic(page2);
692 ret = memcmp(addr1, addr2, PAGE_SIZE);
693 kunmap_atomic(addr2);
694 kunmap_atomic(addr1);
695 return ret;
696}
697
698static inline int pages_identical(struct page *page1, struct page *page2)
699{
700 return !memcmp_pages(page1, page2);
701}
702
703static int write_protect_page(struct vm_area_struct *vma, struct page *page,
704 pte_t *orig_pte)
705{
706 struct mm_struct *mm = vma->vm_mm;
707 unsigned long addr;
708 pte_t *ptep;
709 spinlock_t *ptl;
710 int swapped;
711 int err = -EFAULT;
712 unsigned long mmun_start;
713 unsigned long mmun_end;
714
715 addr = page_address_in_vma(page, vma);
716 if (addr == -EFAULT)
717 goto out;
718
719 BUG_ON(PageTransCompound(page));
720
721 mmun_start = addr;
722 mmun_end = addr + PAGE_SIZE;
723 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
724
725 ptep = page_check_address(page, mm, addr, &ptl, 0);
726 if (!ptep)
727 goto out_mn;
728
729 if (pte_write(*ptep) || pte_dirty(*ptep)) {
730 pte_t entry;
731
732 swapped = PageSwapCache(page);
733 flush_cache_page(vma, addr, page_to_pfn(page));
734
735
736
737
738
739
740
741
742
743 entry = ptep_clear_flush(vma, addr, ptep);
744
745
746
747
748 if (page_mapcount(page) + 1 + swapped != page_count(page)) {
749 set_pte_at(mm, addr, ptep, entry);
750 goto out_unlock;
751 }
752 if (pte_dirty(entry))
753 set_page_dirty(page);
754 entry = pte_mkclean(pte_wrprotect(entry));
755 set_pte_at_notify(mm, addr, ptep, entry);
756 }
757 *orig_pte = *ptep;
758 err = 0;
759
760out_unlock:
761 pte_unmap_unlock(ptep, ptl);
762out_mn:
763 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
764out:
765 return err;
766}
767
768
769
770
771
772
773
774
775
776
777static int replace_page(struct vm_area_struct *vma, struct page *page,
778 struct page *kpage, pte_t orig_pte)
779{
780 struct mm_struct *mm = vma->vm_mm;
781 pgd_t *pgd;
782 pud_t *pud;
783 pmd_t *pmd;
784 pte_t *ptep;
785 spinlock_t *ptl;
786 unsigned long addr;
787 int err = -EFAULT;
788 unsigned long mmun_start;
789 unsigned long mmun_end;
790
791 addr = page_address_in_vma(page, vma);
792 if (addr == -EFAULT)
793 goto out;
794
795 pgd = pgd_offset(mm, addr);
796 if (!pgd_present(*pgd))
797 goto out;
798
799 pud = pud_offset(pgd, addr);
800 if (!pud_present(*pud))
801 goto out;
802
803 pmd = pmd_offset(pud, addr);
804 BUG_ON(pmd_trans_huge(*pmd));
805 if (!pmd_present(*pmd))
806 goto out;
807
808 mmun_start = addr;
809 mmun_end = addr + PAGE_SIZE;
810 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
811
812 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
813 if (!pte_same(*ptep, orig_pte)) {
814 pte_unmap_unlock(ptep, ptl);
815 goto out_mn;
816 }
817
818 get_page(kpage);
819 page_add_anon_rmap(kpage, vma, addr);
820
821 flush_cache_page(vma, addr, pte_pfn(*ptep));
822 ptep_clear_flush(vma, addr, ptep);
823 set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
824
825 page_remove_rmap(page);
826 if (!page_mapped(page))
827 try_to_free_swap(page);
828 put_page(page);
829
830 pte_unmap_unlock(ptep, ptl);
831 err = 0;
832out_mn:
833 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
834out:
835 return err;
836}
837
838static int page_trans_compound_anon_split(struct page *page)
839{
840 int ret = 0;
841 struct page *transhuge_head = page_trans_compound_anon(page);
842 if (transhuge_head) {
843
844 if (get_page_unless_zero(transhuge_head)) {
845
846
847
848
849 if (PageAnon(transhuge_head))
850 ret = split_huge_page(transhuge_head);
851 else
852
853
854
855
856 ret = 1;
857 put_page(transhuge_head);
858 } else
859
860 ret = 1;
861 }
862 return ret;
863}
864
865
866
867
868
869
870
871
872
873
874static int try_to_merge_one_page(struct vm_area_struct *vma,
875 struct page *page, struct page *kpage)
876{
877 pte_t orig_pte = __pte(0);
878 int err = -EFAULT;
879
880 if (page == kpage)
881 return 0;
882
883 if (!(vma->vm_flags & VM_MERGEABLE))
884 goto out;
885 if (PageTransCompound(page) && page_trans_compound_anon_split(page))
886 goto out;
887 BUG_ON(PageTransCompound(page));
888 if (!PageAnon(page))
889 goto out;
890
891
892
893
894
895
896
897
898 if (!trylock_page(page))
899 goto out;
900
901
902
903
904
905
906 if (write_protect_page(vma, page, &orig_pte) == 0) {
907 if (!kpage) {
908
909
910
911
912
913 set_page_stable_node(page, NULL);
914 mark_page_accessed(page);
915 err = 0;
916 } else if (pages_identical(page, kpage))
917 err = replace_page(vma, page, kpage, orig_pte);
918 }
919
920 if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
921 munlock_vma_page(page);
922 if (!PageMlocked(kpage)) {
923 unlock_page(page);
924 lock_page(kpage);
925 mlock_vma_page(kpage);
926 page = kpage;
927 }
928 }
929
930 unlock_page(page);
931out:
932 return err;
933}
934
935
936
937
938
939
940
941static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
942 struct page *page, struct page *kpage)
943{
944 struct mm_struct *mm = rmap_item->mm;
945 struct vm_area_struct *vma;
946 int err = -EFAULT;
947
948 down_read(&mm->mmap_sem);
949 if (ksm_test_exit(mm))
950 goto out;
951 vma = find_vma(mm, rmap_item->address);
952 if (!vma || vma->vm_start > rmap_item->address)
953 goto out;
954
955 err = try_to_merge_one_page(vma, page, kpage);
956 if (err)
957 goto out;
958
959
960 rmap_item->anon_vma = vma->anon_vma;
961 get_anon_vma(vma->anon_vma);
962out:
963 up_read(&mm->mmap_sem);
964 return err;
965}
966
967
968
969
970
971
972
973
974
975
976
977static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
978 struct page *page,
979 struct rmap_item *tree_rmap_item,
980 struct page *tree_page)
981{
982 int err;
983
984 err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
985 if (!err) {
986 err = try_to_merge_with_ksm_page(tree_rmap_item,
987 tree_page, page);
988
989
990
991
992 if (err)
993 break_cow(rmap_item);
994 }
995 return err ? NULL : page;
996}
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007static struct page *stable_tree_search(struct page *page)
1008{
1009 struct rb_node *node = root_stable_tree.rb_node;
1010 struct stable_node *stable_node;
1011
1012 stable_node = page_stable_node(page);
1013 if (stable_node) {
1014 get_page(page);
1015 return page;
1016 }
1017
1018 while (node) {
1019 struct page *tree_page;
1020 int ret;
1021
1022 cond_resched();
1023 stable_node = rb_entry(node, struct stable_node, node);
1024 tree_page = get_ksm_page(stable_node);
1025 if (!tree_page)
1026 return NULL;
1027
1028 ret = memcmp_pages(page, tree_page);
1029
1030 if (ret < 0) {
1031 put_page(tree_page);
1032 node = node->rb_left;
1033 } else if (ret > 0) {
1034 put_page(tree_page);
1035 node = node->rb_right;
1036 } else
1037 return tree_page;
1038 }
1039
1040 return NULL;
1041}
1042
1043
1044
1045
1046
1047
1048
1049
1050static struct stable_node *stable_tree_insert(struct page *kpage)
1051{
1052 struct rb_node **new = &root_stable_tree.rb_node;
1053 struct rb_node *parent = NULL;
1054 struct stable_node *stable_node;
1055
1056 while (*new) {
1057 struct page *tree_page;
1058 int ret;
1059
1060 cond_resched();
1061 stable_node = rb_entry(*new, struct stable_node, node);
1062 tree_page = get_ksm_page(stable_node);
1063 if (!tree_page)
1064 return NULL;
1065
1066 ret = memcmp_pages(kpage, tree_page);
1067 put_page(tree_page);
1068
1069 parent = *new;
1070 if (ret < 0)
1071 new = &parent->rb_left;
1072 else if (ret > 0)
1073 new = &parent->rb_right;
1074 else {
1075
1076
1077
1078
1079
1080 return NULL;
1081 }
1082 }
1083
1084 stable_node = alloc_stable_node();
1085 if (!stable_node)
1086 return NULL;
1087
1088 rb_link_node(&stable_node->node, parent, new);
1089 rb_insert_color(&stable_node->node, &root_stable_tree);
1090
1091 INIT_HLIST_HEAD(&stable_node->hlist);
1092
1093 stable_node->kpfn = page_to_pfn(kpage);
1094 set_page_stable_node(kpage, stable_node);
1095
1096 return stable_node;
1097}
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113static
1114struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
1115 struct page *page,
1116 struct page **tree_pagep)
1117
1118{
1119 struct rb_node **new = &root_unstable_tree.rb_node;
1120 struct rb_node *parent = NULL;
1121
1122 while (*new) {
1123 struct rmap_item *tree_rmap_item;
1124 struct page *tree_page;
1125 int ret;
1126
1127 cond_resched();
1128 tree_rmap_item = rb_entry(*new, struct rmap_item, node);
1129 tree_page = get_mergeable_page(tree_rmap_item);
1130 if (IS_ERR_OR_NULL(tree_page))
1131 return NULL;
1132
1133
1134
1135
1136 if (page == tree_page) {
1137 put_page(tree_page);
1138 return NULL;
1139 }
1140
1141 ret = memcmp_pages(page, tree_page);
1142
1143 parent = *new;
1144 if (ret < 0) {
1145 put_page(tree_page);
1146 new = &parent->rb_left;
1147 } else if (ret > 0) {
1148 put_page(tree_page);
1149 new = &parent->rb_right;
1150 } else {
1151 *tree_pagep = tree_page;
1152 return tree_rmap_item;
1153 }
1154 }
1155
1156 rmap_item->address |= UNSTABLE_FLAG;
1157 rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
1158 rb_link_node(&rmap_item->node, parent, new);
1159 rb_insert_color(&rmap_item->node, &root_unstable_tree);
1160
1161 ksm_pages_unshared++;
1162 return NULL;
1163}
1164
1165
1166
1167
1168
1169
1170static void stable_tree_append(struct rmap_item *rmap_item,
1171 struct stable_node *stable_node)
1172{
1173 rmap_item->head = stable_node;
1174 rmap_item->address |= STABLE_FLAG;
1175 hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
1176
1177 if (rmap_item->hlist.next)
1178 ksm_pages_sharing++;
1179 else
1180 ksm_pages_shared++;
1181}
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1193{
1194 struct rmap_item *tree_rmap_item;
1195 struct page *tree_page = NULL;
1196 struct stable_node *stable_node;
1197 struct page *kpage;
1198 unsigned int checksum;
1199 int err;
1200
1201 remove_rmap_item_from_tree(rmap_item);
1202
1203
1204 kpage = stable_tree_search(page);
1205 if (kpage) {
1206 err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
1207 if (!err) {
1208
1209
1210
1211
1212 lock_page(kpage);
1213 stable_tree_append(rmap_item, page_stable_node(kpage));
1214 unlock_page(kpage);
1215 }
1216 put_page(kpage);
1217 return;
1218 }
1219
1220
1221
1222
1223
1224
1225
1226 checksum = calc_checksum(page);
1227 if (rmap_item->oldchecksum != checksum) {
1228 rmap_item->oldchecksum = checksum;
1229 return;
1230 }
1231
1232 tree_rmap_item =
1233 unstable_tree_search_insert(rmap_item, page, &tree_page);
1234 if (tree_rmap_item) {
1235 kpage = try_to_merge_two_pages(rmap_item, page,
1236 tree_rmap_item, tree_page);
1237 put_page(tree_page);
1238
1239
1240
1241
1242
1243 if (kpage) {
1244 remove_rmap_item_from_tree(tree_rmap_item);
1245
1246 lock_page(kpage);
1247 stable_node = stable_tree_insert(kpage);
1248 if (stable_node) {
1249 stable_tree_append(tree_rmap_item, stable_node);
1250 stable_tree_append(rmap_item, stable_node);
1251 }
1252 unlock_page(kpage);
1253
1254
1255
1256
1257
1258
1259
1260 if (!stable_node) {
1261 break_cow(tree_rmap_item);
1262 break_cow(rmap_item);
1263 }
1264 }
1265 }
1266}
1267
1268static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
1269 struct rmap_item **rmap_list,
1270 unsigned long addr)
1271{
1272 struct rmap_item *rmap_item;
1273
1274 while (*rmap_list) {
1275 rmap_item = *rmap_list;
1276 if ((rmap_item->address & PAGE_MASK) == addr)
1277 return rmap_item;
1278 if (rmap_item->address > addr)
1279 break;
1280 *rmap_list = rmap_item->rmap_list;
1281 remove_rmap_item_from_tree(rmap_item);
1282 free_rmap_item(rmap_item);
1283 }
1284
1285 rmap_item = alloc_rmap_item();
1286 if (rmap_item) {
1287
1288 rmap_item->mm = mm_slot->mm;
1289 rmap_item->address = addr;
1290 rmap_item->rmap_list = *rmap_list;
1291 *rmap_list = rmap_item;
1292 }
1293 return rmap_item;
1294}
1295
1296static struct rmap_item *scan_get_next_rmap_item(struct page **page)
1297{
1298 struct mm_struct *mm;
1299 struct mm_slot *slot;
1300 struct vm_area_struct *vma;
1301 struct rmap_item *rmap_item;
1302
1303 if (list_empty(&ksm_mm_head.mm_list))
1304 return NULL;
1305
1306 slot = ksm_scan.mm_slot;
1307 if (slot == &ksm_mm_head) {
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318 lru_add_drain_all();
1319
1320 root_unstable_tree = RB_ROOT;
1321
1322 spin_lock(&ksm_mmlist_lock);
1323 slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
1324 ksm_scan.mm_slot = slot;
1325 spin_unlock(&ksm_mmlist_lock);
1326
1327
1328
1329
1330 if (slot == &ksm_mm_head)
1331 return NULL;
1332next_mm:
1333 ksm_scan.address = 0;
1334 ksm_scan.rmap_list = &slot->rmap_list;
1335 }
1336
1337 mm = slot->mm;
1338 down_read(&mm->mmap_sem);
1339 if (ksm_test_exit(mm))
1340 vma = NULL;
1341 else
1342 vma = find_vma(mm, ksm_scan.address);
1343
1344 for (; vma; vma = vma->vm_next) {
1345 if (!(vma->vm_flags & VM_MERGEABLE))
1346 continue;
1347 if (ksm_scan.address < vma->vm_start)
1348 ksm_scan.address = vma->vm_start;
1349 if (!vma->anon_vma)
1350 ksm_scan.address = vma->vm_end;
1351
1352 while (ksm_scan.address < vma->vm_end) {
1353 if (ksm_test_exit(mm))
1354 break;
1355 *page = follow_page(vma, ksm_scan.address, FOLL_GET);
1356 if (IS_ERR_OR_NULL(*page)) {
1357 ksm_scan.address += PAGE_SIZE;
1358 cond_resched();
1359 continue;
1360 }
1361 if (PageAnon(*page) ||
1362 page_trans_compound_anon(*page)) {
1363 flush_anon_page(vma, *page, ksm_scan.address);
1364 flush_dcache_page(*page);
1365 rmap_item = get_next_rmap_item(slot,
1366 ksm_scan.rmap_list, ksm_scan.address);
1367 if (rmap_item) {
1368 ksm_scan.rmap_list =
1369 &rmap_item->rmap_list;
1370 ksm_scan.address += PAGE_SIZE;
1371 } else
1372 put_page(*page);
1373 up_read(&mm->mmap_sem);
1374 return rmap_item;
1375 }
1376 put_page(*page);
1377 ksm_scan.address += PAGE_SIZE;
1378 cond_resched();
1379 }
1380 }
1381
1382 if (ksm_test_exit(mm)) {
1383 ksm_scan.address = 0;
1384 ksm_scan.rmap_list = &slot->rmap_list;
1385 }
1386
1387
1388
1389
1390 remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
1391
1392 spin_lock(&ksm_mmlist_lock);
1393 ksm_scan.mm_slot = list_entry(slot->mm_list.next,
1394 struct mm_slot, mm_list);
1395 if (ksm_scan.address == 0) {
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405 hlist_del(&slot->link);
1406 list_del(&slot->mm_list);
1407 spin_unlock(&ksm_mmlist_lock);
1408
1409 free_mm_slot(slot);
1410 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1411 up_read(&mm->mmap_sem);
1412 mmdrop(mm);
1413 } else {
1414 spin_unlock(&ksm_mmlist_lock);
1415 up_read(&mm->mmap_sem);
1416 }
1417
1418
1419 slot = ksm_scan.mm_slot;
1420 if (slot != &ksm_mm_head)
1421 goto next_mm;
1422
1423 ksm_scan.seqnr++;
1424 return NULL;
1425}
1426
1427
1428
1429
1430
1431static void ksm_do_scan(unsigned int scan_npages)
1432{
1433 struct rmap_item *rmap_item;
1434 struct page *uninitialized_var(page);
1435
1436 while (scan_npages-- && likely(!freezing(current))) {
1437 cond_resched();
1438 rmap_item = scan_get_next_rmap_item(&page);
1439 if (!rmap_item)
1440 return;
1441 if (!PageKsm(page) || !in_stable_tree(rmap_item))
1442 cmp_and_merge_page(page, rmap_item);
1443 put_page(page);
1444 }
1445}
1446
1447static int ksmd_should_run(void)
1448{
1449 return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
1450}
1451
1452static int ksm_scan_thread(void *nothing)
1453{
1454 set_freezable();
1455 set_user_nice(current, 5);
1456
1457 while (!kthread_should_stop()) {
1458 mutex_lock(&ksm_thread_mutex);
1459 if (ksmd_should_run())
1460 ksm_do_scan(ksm_thread_pages_to_scan);
1461 mutex_unlock(&ksm_thread_mutex);
1462
1463 try_to_freeze();
1464
1465 if (ksmd_should_run()) {
1466 schedule_timeout_interruptible(
1467 msecs_to_jiffies(ksm_thread_sleep_millisecs));
1468 } else {
1469 wait_event_freezable(ksm_thread_wait,
1470 ksmd_should_run() || kthread_should_stop());
1471 }
1472 }
1473 return 0;
1474}
1475
1476int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
1477 unsigned long end, int advice, unsigned long *vm_flags)
1478{
1479 struct mm_struct *mm = vma->vm_mm;
1480 int err;
1481
1482 switch (advice) {
1483 case MADV_MERGEABLE:
1484
1485
1486
1487 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
1488 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1489 VM_HUGETLB | VM_NONLINEAR | VM_MIXEDMAP))
1490 return 0;
1491
1492#ifdef VM_SAO
1493 if (*vm_flags & VM_SAO)
1494 return 0;
1495#endif
1496
1497 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
1498 err = __ksm_enter(mm);
1499 if (err)
1500 return err;
1501 }
1502
1503 *vm_flags |= VM_MERGEABLE;
1504 break;
1505
1506 case MADV_UNMERGEABLE:
1507 if (!(*vm_flags & VM_MERGEABLE))
1508 return 0;
1509
1510 if (vma->anon_vma) {
1511 err = unmerge_ksm_pages(vma, start, end);
1512 if (err)
1513 return err;
1514 }
1515
1516 *vm_flags &= ~VM_MERGEABLE;
1517 break;
1518 }
1519
1520 return 0;
1521}
1522
1523int __ksm_enter(struct mm_struct *mm)
1524{
1525 struct mm_slot *mm_slot;
1526 int needs_wakeup;
1527
1528 mm_slot = alloc_mm_slot();
1529 if (!mm_slot)
1530 return -ENOMEM;
1531
1532
1533 needs_wakeup = list_empty(&ksm_mm_head.mm_list);
1534
1535 spin_lock(&ksm_mmlist_lock);
1536 insert_to_mm_slots_hash(mm, mm_slot);
1537
1538
1539
1540
1541
1542 list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
1543 spin_unlock(&ksm_mmlist_lock);
1544
1545 set_bit(MMF_VM_MERGEABLE, &mm->flags);
1546 atomic_inc(&mm->mm_count);
1547
1548 if (needs_wakeup)
1549 wake_up_interruptible(&ksm_thread_wait);
1550
1551 return 0;
1552}
1553
1554void __ksm_exit(struct mm_struct *mm)
1555{
1556 struct mm_slot *mm_slot;
1557 int easy_to_free = 0;
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568 spin_lock(&ksm_mmlist_lock);
1569 mm_slot = get_mm_slot(mm);
1570 if (mm_slot && ksm_scan.mm_slot != mm_slot) {
1571 if (!mm_slot->rmap_list) {
1572 hlist_del(&mm_slot->link);
1573 list_del(&mm_slot->mm_list);
1574 easy_to_free = 1;
1575 } else {
1576 list_move(&mm_slot->mm_list,
1577 &ksm_scan.mm_slot->mm_list);
1578 }
1579 }
1580 spin_unlock(&ksm_mmlist_lock);
1581
1582 if (easy_to_free) {
1583 free_mm_slot(mm_slot);
1584 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1585 mmdrop(mm);
1586 } else if (mm_slot) {
1587 down_write(&mm->mmap_sem);
1588 up_write(&mm->mmap_sem);
1589 }
1590}
1591
1592struct page *ksm_does_need_to_copy(struct page *page,
1593 struct vm_area_struct *vma, unsigned long address)
1594{
1595 struct page *new_page;
1596
1597 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1598 if (new_page) {
1599 copy_user_highpage(new_page, page, address, vma);
1600
1601 SetPageDirty(new_page);
1602 __SetPageUptodate(new_page);
1603 SetPageSwapBacked(new_page);
1604 __set_page_locked(new_page);
1605
1606 if (!mlocked_vma_newpage(vma, new_page))
1607 lru_cache_add_lru(new_page, LRU_ACTIVE_ANON);
1608 else
1609 add_page_to_unevictable_list(new_page);
1610 }
1611
1612 return new_page;
1613}
1614
1615int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
1616 unsigned long *vm_flags)
1617{
1618 struct stable_node *stable_node;
1619 struct rmap_item *rmap_item;
1620 struct hlist_node *hlist;
1621 unsigned int mapcount = page_mapcount(page);
1622 int referenced = 0;
1623 int search_new_forks = 0;
1624
1625 VM_BUG_ON(!PageKsm(page));
1626 VM_BUG_ON(!PageLocked(page));
1627
1628 stable_node = page_stable_node(page);
1629 if (!stable_node)
1630 return 0;
1631again:
1632 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1633 struct anon_vma *anon_vma = rmap_item->anon_vma;
1634 struct anon_vma_chain *vmac;
1635 struct vm_area_struct *vma;
1636
1637 anon_vma_lock(anon_vma);
1638 anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
1639 0, ULONG_MAX) {
1640 vma = vmac->vma;
1641 if (rmap_item->address < vma->vm_start ||
1642 rmap_item->address >= vma->vm_end)
1643 continue;
1644
1645
1646
1647
1648
1649
1650 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1651 continue;
1652
1653 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
1654 continue;
1655
1656 referenced += page_referenced_one(page, vma,
1657 rmap_item->address, &mapcount, vm_flags);
1658 if (!search_new_forks || !mapcount)
1659 break;
1660 }
1661 anon_vma_unlock(anon_vma);
1662 if (!mapcount)
1663 goto out;
1664 }
1665 if (!search_new_forks++)
1666 goto again;
1667out:
1668 return referenced;
1669}
1670
1671int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
1672{
1673 struct stable_node *stable_node;
1674 struct hlist_node *hlist;
1675 struct rmap_item *rmap_item;
1676 int ret = SWAP_AGAIN;
1677 int search_new_forks = 0;
1678
1679 VM_BUG_ON(!PageKsm(page));
1680 VM_BUG_ON(!PageLocked(page));
1681
1682 stable_node = page_stable_node(page);
1683 if (!stable_node)
1684 return SWAP_FAIL;
1685again:
1686 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1687 struct anon_vma *anon_vma = rmap_item->anon_vma;
1688 struct anon_vma_chain *vmac;
1689 struct vm_area_struct *vma;
1690
1691 anon_vma_lock(anon_vma);
1692 anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
1693 0, ULONG_MAX) {
1694 vma = vmac->vma;
1695 if (rmap_item->address < vma->vm_start ||
1696 rmap_item->address >= vma->vm_end)
1697 continue;
1698
1699
1700
1701
1702
1703
1704 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1705 continue;
1706
1707 ret = try_to_unmap_one(page, vma,
1708 rmap_item->address, flags);
1709 if (ret != SWAP_AGAIN || !page_mapped(page)) {
1710 anon_vma_unlock(anon_vma);
1711 goto out;
1712 }
1713 }
1714 anon_vma_unlock(anon_vma);
1715 }
1716 if (!search_new_forks++)
1717 goto again;
1718out:
1719 return ret;
1720}
1721
1722#ifdef CONFIG_MIGRATION
1723int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
1724 struct vm_area_struct *, unsigned long, void *), void *arg)
1725{
1726 struct stable_node *stable_node;
1727 struct hlist_node *hlist;
1728 struct rmap_item *rmap_item;
1729 int ret = SWAP_AGAIN;
1730 int search_new_forks = 0;
1731
1732 VM_BUG_ON(!PageKsm(page));
1733 VM_BUG_ON(!PageLocked(page));
1734
1735 stable_node = page_stable_node(page);
1736 if (!stable_node)
1737 return ret;
1738again:
1739 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1740 struct anon_vma *anon_vma = rmap_item->anon_vma;
1741 struct anon_vma_chain *vmac;
1742 struct vm_area_struct *vma;
1743
1744 anon_vma_lock(anon_vma);
1745 anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
1746 0, ULONG_MAX) {
1747 vma = vmac->vma;
1748 if (rmap_item->address < vma->vm_start ||
1749 rmap_item->address >= vma->vm_end)
1750 continue;
1751
1752
1753
1754
1755
1756
1757 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1758 continue;
1759
1760 ret = rmap_one(page, vma, rmap_item->address, arg);
1761 if (ret != SWAP_AGAIN) {
1762 anon_vma_unlock(anon_vma);
1763 goto out;
1764 }
1765 }
1766 anon_vma_unlock(anon_vma);
1767 }
1768 if (!search_new_forks++)
1769 goto again;
1770out:
1771 return ret;
1772}
1773
1774void ksm_migrate_page(struct page *newpage, struct page *oldpage)
1775{
1776 struct stable_node *stable_node;
1777
1778 VM_BUG_ON(!PageLocked(oldpage));
1779 VM_BUG_ON(!PageLocked(newpage));
1780 VM_BUG_ON(newpage->mapping != oldpage->mapping);
1781
1782 stable_node = page_stable_node(newpage);
1783 if (stable_node) {
1784 VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
1785 stable_node->kpfn = page_to_pfn(newpage);
1786 }
1787}
1788#endif
1789
1790#ifdef CONFIG_MEMORY_HOTREMOVE
1791static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn,
1792 unsigned long end_pfn)
1793{
1794 struct rb_node *node;
1795
1796 for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) {
1797 struct stable_node *stable_node;
1798
1799 stable_node = rb_entry(node, struct stable_node, node);
1800 if (stable_node->kpfn >= start_pfn &&
1801 stable_node->kpfn < end_pfn)
1802 return stable_node;
1803 }
1804 return NULL;
1805}
1806
1807static int ksm_memory_callback(struct notifier_block *self,
1808 unsigned long action, void *arg)
1809{
1810 struct memory_notify *mn = arg;
1811 struct stable_node *stable_node;
1812
1813 switch (action) {
1814 case MEM_GOING_OFFLINE:
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824 mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING);
1825 break;
1826
1827 case MEM_OFFLINE:
1828
1829
1830
1831
1832
1833 while ((stable_node = ksm_check_stable_tree(mn->start_pfn,
1834 mn->start_pfn + mn->nr_pages)) != NULL)
1835 remove_node_from_stable_tree(stable_node);
1836
1837
1838 case MEM_CANCEL_OFFLINE:
1839 mutex_unlock(&ksm_thread_mutex);
1840 break;
1841 }
1842 return NOTIFY_OK;
1843}
1844#endif
1845
1846#ifdef CONFIG_SYSFS
1847
1848
1849
1850
1851#define KSM_ATTR_RO(_name) \
1852 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
1853#define KSM_ATTR(_name) \
1854 static struct kobj_attribute _name##_attr = \
1855 __ATTR(_name, 0644, _name##_show, _name##_store)
1856
1857static ssize_t sleep_millisecs_show(struct kobject *kobj,
1858 struct kobj_attribute *attr, char *buf)
1859{
1860 return sprintf(buf, "%u\n", ksm_thread_sleep_millisecs);
1861}
1862
1863static ssize_t sleep_millisecs_store(struct kobject *kobj,
1864 struct kobj_attribute *attr,
1865 const char *buf, size_t count)
1866{
1867 unsigned long msecs;
1868 int err;
1869
1870 err = strict_strtoul(buf, 10, &msecs);
1871 if (err || msecs > UINT_MAX)
1872 return -EINVAL;
1873
1874 ksm_thread_sleep_millisecs = msecs;
1875
1876 return count;
1877}
1878KSM_ATTR(sleep_millisecs);
1879
1880static ssize_t pages_to_scan_show(struct kobject *kobj,
1881 struct kobj_attribute *attr, char *buf)
1882{
1883 return sprintf(buf, "%u\n", ksm_thread_pages_to_scan);
1884}
1885
1886static ssize_t pages_to_scan_store(struct kobject *kobj,
1887 struct kobj_attribute *attr,
1888 const char *buf, size_t count)
1889{
1890 int err;
1891 unsigned long nr_pages;
1892
1893 err = strict_strtoul(buf, 10, &nr_pages);
1894 if (err || nr_pages > UINT_MAX)
1895 return -EINVAL;
1896
1897 ksm_thread_pages_to_scan = nr_pages;
1898
1899 return count;
1900}
1901KSM_ATTR(pages_to_scan);
1902
1903static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
1904 char *buf)
1905{
1906 return sprintf(buf, "%u\n", ksm_run);
1907}
1908
1909static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
1910 const char *buf, size_t count)
1911{
1912 int err;
1913 unsigned long flags;
1914
1915 err = strict_strtoul(buf, 10, &flags);
1916 if (err || flags > UINT_MAX)
1917 return -EINVAL;
1918 if (flags > KSM_RUN_UNMERGE)
1919 return -EINVAL;
1920
1921
1922
1923
1924
1925
1926
1927
1928 mutex_lock(&ksm_thread_mutex);
1929 if (ksm_run != flags) {
1930 ksm_run = flags;
1931 if (flags & KSM_RUN_UNMERGE) {
1932 int oom_score_adj;
1933
1934 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1935 err = unmerge_and_remove_all_rmap_items();
1936 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX,
1937 oom_score_adj);
1938 if (err) {
1939 ksm_run = KSM_RUN_STOP;
1940 count = err;
1941 }
1942 }
1943 }
1944 mutex_unlock(&ksm_thread_mutex);
1945
1946 if (flags & KSM_RUN_MERGE)
1947 wake_up_interruptible(&ksm_thread_wait);
1948
1949 return count;
1950}
1951KSM_ATTR(run);
1952
1953static ssize_t pages_shared_show(struct kobject *kobj,
1954 struct kobj_attribute *attr, char *buf)
1955{
1956 return sprintf(buf, "%lu\n", ksm_pages_shared);
1957}
1958KSM_ATTR_RO(pages_shared);
1959
1960static ssize_t pages_sharing_show(struct kobject *kobj,
1961 struct kobj_attribute *attr, char *buf)
1962{
1963 return sprintf(buf, "%lu\n", ksm_pages_sharing);
1964}
1965KSM_ATTR_RO(pages_sharing);
1966
1967static ssize_t pages_unshared_show(struct kobject *kobj,
1968 struct kobj_attribute *attr, char *buf)
1969{
1970 return sprintf(buf, "%lu\n", ksm_pages_unshared);
1971}
1972KSM_ATTR_RO(pages_unshared);
1973
1974static ssize_t pages_volatile_show(struct kobject *kobj,
1975 struct kobj_attribute *attr, char *buf)
1976{
1977 long ksm_pages_volatile;
1978
1979 ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
1980 - ksm_pages_sharing - ksm_pages_unshared;
1981
1982
1983
1984
1985 if (ksm_pages_volatile < 0)
1986 ksm_pages_volatile = 0;
1987 return sprintf(buf, "%ld\n", ksm_pages_volatile);
1988}
1989KSM_ATTR_RO(pages_volatile);
1990
1991static ssize_t full_scans_show(struct kobject *kobj,
1992 struct kobj_attribute *attr, char *buf)
1993{
1994 return sprintf(buf, "%lu\n", ksm_scan.seqnr);
1995}
1996KSM_ATTR_RO(full_scans);
1997
1998static struct attribute *ksm_attrs[] = {
1999 &sleep_millisecs_attr.attr,
2000 &pages_to_scan_attr.attr,
2001 &run_attr.attr,
2002 &pages_shared_attr.attr,
2003 &pages_sharing_attr.attr,
2004 &pages_unshared_attr.attr,
2005 &pages_volatile_attr.attr,
2006 &full_scans_attr.attr,
2007 NULL,
2008};
2009
2010static struct attribute_group ksm_attr_group = {
2011 .attrs = ksm_attrs,
2012 .name = "ksm",
2013};
2014#endif
2015
2016static int __init ksm_init(void)
2017{
2018 struct task_struct *ksm_thread;
2019 int err;
2020
2021 err = ksm_slab_init();
2022 if (err)
2023 goto out;
2024
2025 ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
2026 if (IS_ERR(ksm_thread)) {
2027 printk(KERN_ERR "ksm: creating kthread failed\n");
2028 err = PTR_ERR(ksm_thread);
2029 goto out_free;
2030 }
2031
2032#ifdef CONFIG_SYSFS
2033 err = sysfs_create_group(mm_kobj, &ksm_attr_group);
2034 if (err) {
2035 printk(KERN_ERR "ksm: register sysfs failed\n");
2036 kthread_stop(ksm_thread);
2037 goto out_free;
2038 }
2039#else
2040 ksm_run = KSM_RUN_MERGE;
2041
2042#endif
2043
2044#ifdef CONFIG_MEMORY_HOTREMOVE
2045
2046
2047
2048
2049 hotplug_memory_notifier(ksm_memory_callback, 100);
2050#endif
2051 return 0;
2052
2053out_free:
2054 ksm_slab_free();
2055out:
2056 return err;
2057}
2058module_init(ksm_init)
2059