1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/errno.h>
18#include <linux/mm.h>
19#include <linux/fs.h>
20#include <linux/mman.h>
21#include <linux/sched.h>
22#include <linux/rwsem.h>
23#include <linux/pagemap.h>
24#include <linux/rmap.h>
25#include <linux/spinlock.h>
26#include <linux/jhash.h>
27#include <linux/delay.h>
28#include <linux/kthread.h>
29#include <linux/wait.h>
30#include <linux/slab.h>
31#include <linux/rbtree.h>
32#include <linux/memory.h>
33#include <linux/mmu_notifier.h>
34#include <linux/swap.h>
35#include <linux/ksm.h>
36#include <linux/hash.h>
37#include <linux/freezer.h>
38#include <linux/oom.h>
39
40#include <asm/tlbflush.h>
41#include "internal.h"
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90struct mm_slot {
91 struct hlist_node link;
92 struct list_head mm_list;
93 struct rmap_item *rmap_list;
94 struct mm_struct *mm;
95};
96
97
98
99
100
101
102
103
104
105
106struct ksm_scan {
107 struct mm_slot *mm_slot;
108 unsigned long address;
109 struct rmap_item **rmap_list;
110 unsigned long seqnr;
111};
112
113
114
115
116
117
118
119struct stable_node {
120 struct rb_node node;
121 struct hlist_head hlist;
122 unsigned long kpfn;
123};
124
125
126
127
128
129
130
131
132
133
134
135
136struct rmap_item {
137 struct rmap_item *rmap_list;
138 struct anon_vma *anon_vma;
139 struct mm_struct *mm;
140 unsigned long address;
141 unsigned int oldchecksum;
142 union {
143 struct rb_node node;
144 struct {
145 struct stable_node *head;
146 struct hlist_node hlist;
147 };
148 };
149};
150
151#define SEQNR_MASK 0x0ff
152#define UNSTABLE_FLAG 0x100
153#define STABLE_FLAG 0x200
154
155
156static struct rb_root root_stable_tree = RB_ROOT;
157static struct rb_root root_unstable_tree = RB_ROOT;
158
159#define MM_SLOTS_HASH_SHIFT 10
160#define MM_SLOTS_HASH_HEADS (1 << MM_SLOTS_HASH_SHIFT)
161static struct hlist_head mm_slots_hash[MM_SLOTS_HASH_HEADS];
162
163static struct mm_slot ksm_mm_head = {
164 .mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
165};
166static struct ksm_scan ksm_scan = {
167 .mm_slot = &ksm_mm_head,
168};
169
170static struct kmem_cache *rmap_item_cache;
171static struct kmem_cache *stable_node_cache;
172static struct kmem_cache *mm_slot_cache;
173
174
175static unsigned long ksm_pages_shared;
176
177
178static unsigned long ksm_pages_sharing;
179
180
181static unsigned long ksm_pages_unshared;
182
183
184static unsigned long ksm_rmap_items;
185
186
187static unsigned int ksm_thread_pages_to_scan = 100;
188
189
190static unsigned int ksm_thread_sleep_millisecs = 20;
191
192#define KSM_RUN_STOP 0
193#define KSM_RUN_MERGE 1
194#define KSM_RUN_UNMERGE 2
195static unsigned int ksm_run = KSM_RUN_STOP;
196
197static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
198static DEFINE_MUTEX(ksm_thread_mutex);
199static DEFINE_SPINLOCK(ksm_mmlist_lock);
200
201#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
202 sizeof(struct __struct), __alignof__(struct __struct),\
203 (__flags), NULL)
204
205static int __init ksm_slab_init(void)
206{
207 rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
208 if (!rmap_item_cache)
209 goto out;
210
211 stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
212 if (!stable_node_cache)
213 goto out_free1;
214
215 mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
216 if (!mm_slot_cache)
217 goto out_free2;
218
219 return 0;
220
221out_free2:
222 kmem_cache_destroy(stable_node_cache);
223out_free1:
224 kmem_cache_destroy(rmap_item_cache);
225out:
226 return -ENOMEM;
227}
228
229static void __init ksm_slab_free(void)
230{
231 kmem_cache_destroy(mm_slot_cache);
232 kmem_cache_destroy(stable_node_cache);
233 kmem_cache_destroy(rmap_item_cache);
234 mm_slot_cache = NULL;
235}
236
237static inline struct rmap_item *alloc_rmap_item(void)
238{
239 struct rmap_item *rmap_item;
240
241 rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
242 if (rmap_item)
243 ksm_rmap_items++;
244 return rmap_item;
245}
246
247static inline void free_rmap_item(struct rmap_item *rmap_item)
248{
249 ksm_rmap_items--;
250 rmap_item->mm = NULL;
251 kmem_cache_free(rmap_item_cache, rmap_item);
252}
253
254static inline struct stable_node *alloc_stable_node(void)
255{
256 return kmem_cache_alloc(stable_node_cache, GFP_KERNEL);
257}
258
259static inline void free_stable_node(struct stable_node *stable_node)
260{
261 kmem_cache_free(stable_node_cache, stable_node);
262}
263
264static inline struct mm_slot *alloc_mm_slot(void)
265{
266 if (!mm_slot_cache)
267 return NULL;
268 return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
269}
270
271static inline void free_mm_slot(struct mm_slot *mm_slot)
272{
273 kmem_cache_free(mm_slot_cache, mm_slot);
274}
275
276static struct mm_slot *get_mm_slot(struct mm_struct *mm)
277{
278 struct mm_slot *mm_slot;
279 struct hlist_head *bucket;
280 struct hlist_node *node;
281
282 bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
283 hlist_for_each_entry(mm_slot, node, bucket, link) {
284 if (mm == mm_slot->mm)
285 return mm_slot;
286 }
287 return NULL;
288}
289
290static void insert_to_mm_slots_hash(struct mm_struct *mm,
291 struct mm_slot *mm_slot)
292{
293 struct hlist_head *bucket;
294
295 bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
296 mm_slot->mm = mm;
297 hlist_add_head(&mm_slot->link, bucket);
298}
299
300static inline int in_stable_tree(struct rmap_item *rmap_item)
301{
302 return rmap_item->address & STABLE_FLAG;
303}
304
305
306
307
308
309
310
311
312
313static inline bool ksm_test_exit(struct mm_struct *mm)
314{
315 return atomic_read(&mm->mm_users) == 0;
316}
317
318
319
320
321
322
323
324
325
326
327
328
329static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
330{
331 struct page *page;
332 int ret = 0;
333
334 do {
335 cond_resched();
336 page = follow_page(vma, addr, FOLL_GET);
337 if (IS_ERR_OR_NULL(page))
338 break;
339 if (PageKsm(page))
340 ret = handle_mm_fault(vma->vm_mm, vma, addr,
341 FAULT_FLAG_WRITE);
342 else
343 ret = VM_FAULT_WRITE;
344 put_page(page);
345 } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374 return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
375}
376
377static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
378 unsigned long addr)
379{
380 struct vm_area_struct *vma;
381 if (ksm_test_exit(mm))
382 return NULL;
383 vma = find_vma(mm, addr);
384 if (!vma || vma->vm_start > addr)
385 return NULL;
386 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
387 return NULL;
388 return vma;
389}
390
391static void break_cow(struct rmap_item *rmap_item)
392{
393 struct mm_struct *mm = rmap_item->mm;
394 unsigned long addr = rmap_item->address;
395 struct vm_area_struct *vma;
396
397
398
399
400
401 put_anon_vma(rmap_item->anon_vma);
402
403 down_read(&mm->mmap_sem);
404 vma = find_mergeable_vma(mm, addr);
405 if (vma)
406 break_ksm(vma, addr);
407 up_read(&mm->mmap_sem);
408}
409
410static struct page *page_trans_compound_anon(struct page *page)
411{
412 if (PageTransCompound(page)) {
413 struct page *head = compound_trans_head(page);
414
415
416
417
418 if (PageAnon(head))
419 return head;
420 }
421 return NULL;
422}
423
424static struct page *get_mergeable_page(struct rmap_item *rmap_item)
425{
426 struct mm_struct *mm = rmap_item->mm;
427 unsigned long addr = rmap_item->address;
428 struct vm_area_struct *vma;
429 struct page *page;
430
431 down_read(&mm->mmap_sem);
432 vma = find_mergeable_vma(mm, addr);
433 if (!vma)
434 goto out;
435
436 page = follow_page(vma, addr, FOLL_GET);
437 if (IS_ERR_OR_NULL(page))
438 goto out;
439 if (PageAnon(page) || page_trans_compound_anon(page)) {
440 flush_anon_page(vma, page, addr);
441 flush_dcache_page(page);
442 } else {
443 put_page(page);
444out: page = NULL;
445 }
446 up_read(&mm->mmap_sem);
447 return page;
448}
449
450static void remove_node_from_stable_tree(struct stable_node *stable_node)
451{
452 struct rmap_item *rmap_item;
453 struct hlist_node *hlist;
454
455 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
456 if (rmap_item->hlist.next)
457 ksm_pages_sharing--;
458 else
459 ksm_pages_shared--;
460 put_anon_vma(rmap_item->anon_vma);
461 rmap_item->address &= PAGE_MASK;
462 cond_resched();
463 }
464
465 rb_erase(&stable_node->node, &root_stable_tree);
466 free_stable_node(stable_node);
467}
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498static struct page *get_ksm_page(struct stable_node *stable_node)
499{
500 struct page *page;
501 void *expected_mapping;
502
503 page = pfn_to_page(stable_node->kpfn);
504 expected_mapping = (void *)stable_node +
505 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
506 rcu_read_lock();
507 if (page->mapping != expected_mapping)
508 goto stale;
509 if (!get_page_unless_zero(page))
510 goto stale;
511 if (page->mapping != expected_mapping) {
512 put_page(page);
513 goto stale;
514 }
515 rcu_read_unlock();
516 return page;
517stale:
518 rcu_read_unlock();
519 remove_node_from_stable_tree(stable_node);
520 return NULL;
521}
522
523
524
525
526
527static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
528{
529 if (rmap_item->address & STABLE_FLAG) {
530 struct stable_node *stable_node;
531 struct page *page;
532
533 stable_node = rmap_item->head;
534 page = get_ksm_page(stable_node);
535 if (!page)
536 goto out;
537
538 lock_page(page);
539 hlist_del(&rmap_item->hlist);
540 unlock_page(page);
541 put_page(page);
542
543 if (stable_node->hlist.first)
544 ksm_pages_sharing--;
545 else
546 ksm_pages_shared--;
547
548 put_anon_vma(rmap_item->anon_vma);
549 rmap_item->address &= PAGE_MASK;
550
551 } else if (rmap_item->address & UNSTABLE_FLAG) {
552 unsigned char age;
553
554
555
556
557
558
559
560 age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
561 BUG_ON(age > 1);
562 if (!age)
563 rb_erase(&rmap_item->node, &root_unstable_tree);
564
565 ksm_pages_unshared--;
566 rmap_item->address &= PAGE_MASK;
567 }
568out:
569 cond_resched();
570}
571
572static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
573 struct rmap_item **rmap_list)
574{
575 while (*rmap_list) {
576 struct rmap_item *rmap_item = *rmap_list;
577 *rmap_list = rmap_item->rmap_list;
578 remove_rmap_item_from_tree(rmap_item);
579 free_rmap_item(rmap_item);
580 }
581}
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596static int unmerge_ksm_pages(struct vm_area_struct *vma,
597 unsigned long start, unsigned long end)
598{
599 unsigned long addr;
600 int err = 0;
601
602 for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
603 if (ksm_test_exit(vma->vm_mm))
604 break;
605 if (signal_pending(current))
606 err = -ERESTARTSYS;
607 else
608 err = break_ksm(vma, addr);
609 }
610 return err;
611}
612
613#ifdef CONFIG_SYSFS
614
615
616
617static int unmerge_and_remove_all_rmap_items(void)
618{
619 struct mm_slot *mm_slot;
620 struct mm_struct *mm;
621 struct vm_area_struct *vma;
622 int err = 0;
623
624 spin_lock(&ksm_mmlist_lock);
625 ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
626 struct mm_slot, mm_list);
627 spin_unlock(&ksm_mmlist_lock);
628
629 for (mm_slot = ksm_scan.mm_slot;
630 mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
631 mm = mm_slot->mm;
632 down_read(&mm->mmap_sem);
633 for (vma = mm->mmap; vma; vma = vma->vm_next) {
634 if (ksm_test_exit(mm))
635 break;
636 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
637 continue;
638 err = unmerge_ksm_pages(vma,
639 vma->vm_start, vma->vm_end);
640 if (err)
641 goto error;
642 }
643
644 remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
645
646 spin_lock(&ksm_mmlist_lock);
647 ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
648 struct mm_slot, mm_list);
649 if (ksm_test_exit(mm)) {
650 hlist_del(&mm_slot->link);
651 list_del(&mm_slot->mm_list);
652 spin_unlock(&ksm_mmlist_lock);
653
654 free_mm_slot(mm_slot);
655 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
656 up_read(&mm->mmap_sem);
657 mmdrop(mm);
658 } else {
659 spin_unlock(&ksm_mmlist_lock);
660 up_read(&mm->mmap_sem);
661 }
662 }
663
664 ksm_scan.seqnr = 0;
665 return 0;
666
667error:
668 up_read(&mm->mmap_sem);
669 spin_lock(&ksm_mmlist_lock);
670 ksm_scan.mm_slot = &ksm_mm_head;
671 spin_unlock(&ksm_mmlist_lock);
672 return err;
673}
674#endif
675
676static u32 calc_checksum(struct page *page)
677{
678 u32 checksum;
679 void *addr = kmap_atomic(page);
680 checksum = jhash2(addr, PAGE_SIZE / 4, 17);
681 kunmap_atomic(addr);
682 return checksum;
683}
684
685static int memcmp_pages(struct page *page1, struct page *page2)
686{
687 char *addr1, *addr2;
688 int ret;
689
690 addr1 = kmap_atomic(page1);
691 addr2 = kmap_atomic(page2);
692 ret = memcmp(addr1, addr2, PAGE_SIZE);
693 kunmap_atomic(addr2);
694 kunmap_atomic(addr1);
695 return ret;
696}
697
698static inline int pages_identical(struct page *page1, struct page *page2)
699{
700 return !memcmp_pages(page1, page2);
701}
702
703static int write_protect_page(struct vm_area_struct *vma, struct page *page,
704 pte_t *orig_pte)
705{
706 struct mm_struct *mm = vma->vm_mm;
707 unsigned long addr;
708 pte_t *ptep;
709 spinlock_t *ptl;
710 int swapped;
711 int err = -EFAULT;
712
713 addr = page_address_in_vma(page, vma);
714 if (addr == -EFAULT)
715 goto out;
716
717 BUG_ON(PageTransCompound(page));
718 ptep = page_check_address(page, mm, addr, &ptl, 0);
719 if (!ptep)
720 goto out;
721
722 if (pte_write(*ptep) || pte_dirty(*ptep)) {
723 pte_t entry;
724
725 swapped = PageSwapCache(page);
726 flush_cache_page(vma, addr, page_to_pfn(page));
727
728
729
730
731
732
733
734
735
736 entry = ptep_clear_flush(vma, addr, ptep);
737
738
739
740
741 if (page_mapcount(page) + 1 + swapped != page_count(page)) {
742 set_pte_at(mm, addr, ptep, entry);
743 goto out_unlock;
744 }
745 if (pte_dirty(entry))
746 set_page_dirty(page);
747 entry = pte_mkclean(pte_wrprotect(entry));
748 set_pte_at_notify(mm, addr, ptep, entry);
749 }
750 *orig_pte = *ptep;
751 err = 0;
752
753out_unlock:
754 pte_unmap_unlock(ptep, ptl);
755out:
756 return err;
757}
758
759
760
761
762
763
764
765
766
767
768static int replace_page(struct vm_area_struct *vma, struct page *page,
769 struct page *kpage, pte_t orig_pte)
770{
771 struct mm_struct *mm = vma->vm_mm;
772 pgd_t *pgd;
773 pud_t *pud;
774 pmd_t *pmd;
775 pte_t *ptep;
776 spinlock_t *ptl;
777 unsigned long addr;
778 int err = -EFAULT;
779
780 addr = page_address_in_vma(page, vma);
781 if (addr == -EFAULT)
782 goto out;
783
784 pgd = pgd_offset(mm, addr);
785 if (!pgd_present(*pgd))
786 goto out;
787
788 pud = pud_offset(pgd, addr);
789 if (!pud_present(*pud))
790 goto out;
791
792 pmd = pmd_offset(pud, addr);
793 BUG_ON(pmd_trans_huge(*pmd));
794 if (!pmd_present(*pmd))
795 goto out;
796
797 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
798 if (!pte_same(*ptep, orig_pte)) {
799 pte_unmap_unlock(ptep, ptl);
800 goto out;
801 }
802
803 get_page(kpage);
804 page_add_anon_rmap(kpage, vma, addr);
805
806 flush_cache_page(vma, addr, pte_pfn(*ptep));
807 ptep_clear_flush(vma, addr, ptep);
808 set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
809
810 page_remove_rmap(page);
811 if (!page_mapped(page))
812 try_to_free_swap(page);
813 put_page(page);
814
815 pte_unmap_unlock(ptep, ptl);
816 err = 0;
817out:
818 return err;
819}
820
821static int page_trans_compound_anon_split(struct page *page)
822{
823 int ret = 0;
824 struct page *transhuge_head = page_trans_compound_anon(page);
825 if (transhuge_head) {
826
827 if (get_page_unless_zero(transhuge_head)) {
828
829
830
831
832 if (PageAnon(transhuge_head))
833 ret = split_huge_page(transhuge_head);
834 else
835
836
837
838
839 ret = 1;
840 put_page(transhuge_head);
841 } else
842
843 ret = 1;
844 }
845 return ret;
846}
847
848
849
850
851
852
853
854
855
856
857static int try_to_merge_one_page(struct vm_area_struct *vma,
858 struct page *page, struct page *kpage)
859{
860 pte_t orig_pte = __pte(0);
861 int err = -EFAULT;
862
863 if (page == kpage)
864 return 0;
865
866 if (!(vma->vm_flags & VM_MERGEABLE))
867 goto out;
868 if (PageTransCompound(page) && page_trans_compound_anon_split(page))
869 goto out;
870 BUG_ON(PageTransCompound(page));
871 if (!PageAnon(page))
872 goto out;
873
874
875
876
877
878
879
880
881 if (!trylock_page(page))
882 goto out;
883
884
885
886
887
888
889 if (write_protect_page(vma, page, &orig_pte) == 0) {
890 if (!kpage) {
891
892
893
894
895
896 set_page_stable_node(page, NULL);
897 mark_page_accessed(page);
898 err = 0;
899 } else if (pages_identical(page, kpage))
900 err = replace_page(vma, page, kpage, orig_pte);
901 }
902
903 if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
904 munlock_vma_page(page);
905 if (!PageMlocked(kpage)) {
906 unlock_page(page);
907 lock_page(kpage);
908 mlock_vma_page(kpage);
909 page = kpage;
910 }
911 }
912
913 unlock_page(page);
914out:
915 return err;
916}
917
918
919
920
921
922
923
924static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
925 struct page *page, struct page *kpage)
926{
927 struct mm_struct *mm = rmap_item->mm;
928 struct vm_area_struct *vma;
929 int err = -EFAULT;
930
931 down_read(&mm->mmap_sem);
932 if (ksm_test_exit(mm))
933 goto out;
934 vma = find_vma(mm, rmap_item->address);
935 if (!vma || vma->vm_start > rmap_item->address)
936 goto out;
937
938 err = try_to_merge_one_page(vma, page, kpage);
939 if (err)
940 goto out;
941
942
943 rmap_item->anon_vma = vma->anon_vma;
944 get_anon_vma(vma->anon_vma);
945out:
946 up_read(&mm->mmap_sem);
947 return err;
948}
949
950
951
952
953
954
955
956
957
958
959
960static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
961 struct page *page,
962 struct rmap_item *tree_rmap_item,
963 struct page *tree_page)
964{
965 int err;
966
967 err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
968 if (!err) {
969 err = try_to_merge_with_ksm_page(tree_rmap_item,
970 tree_page, page);
971
972
973
974
975 if (err)
976 break_cow(rmap_item);
977 }
978 return err ? NULL : page;
979}
980
981
982
983
984
985
986
987
988
989
990static struct page *stable_tree_search(struct page *page)
991{
992 struct rb_node *node = root_stable_tree.rb_node;
993 struct stable_node *stable_node;
994
995 stable_node = page_stable_node(page);
996 if (stable_node) {
997 get_page(page);
998 return page;
999 }
1000
1001 while (node) {
1002 struct page *tree_page;
1003 int ret;
1004
1005 cond_resched();
1006 stable_node = rb_entry(node, struct stable_node, node);
1007 tree_page = get_ksm_page(stable_node);
1008 if (!tree_page)
1009 return NULL;
1010
1011 ret = memcmp_pages(page, tree_page);
1012
1013 if (ret < 0) {
1014 put_page(tree_page);
1015 node = node->rb_left;
1016 } else if (ret > 0) {
1017 put_page(tree_page);
1018 node = node->rb_right;
1019 } else
1020 return tree_page;
1021 }
1022
1023 return NULL;
1024}
1025
1026
1027
1028
1029
1030
1031
1032
1033static struct stable_node *stable_tree_insert(struct page *kpage)
1034{
1035 struct rb_node **new = &root_stable_tree.rb_node;
1036 struct rb_node *parent = NULL;
1037 struct stable_node *stable_node;
1038
1039 while (*new) {
1040 struct page *tree_page;
1041 int ret;
1042
1043 cond_resched();
1044 stable_node = rb_entry(*new, struct stable_node, node);
1045 tree_page = get_ksm_page(stable_node);
1046 if (!tree_page)
1047 return NULL;
1048
1049 ret = memcmp_pages(kpage, tree_page);
1050 put_page(tree_page);
1051
1052 parent = *new;
1053 if (ret < 0)
1054 new = &parent->rb_left;
1055 else if (ret > 0)
1056 new = &parent->rb_right;
1057 else {
1058
1059
1060
1061
1062
1063 return NULL;
1064 }
1065 }
1066
1067 stable_node = alloc_stable_node();
1068 if (!stable_node)
1069 return NULL;
1070
1071 rb_link_node(&stable_node->node, parent, new);
1072 rb_insert_color(&stable_node->node, &root_stable_tree);
1073
1074 INIT_HLIST_HEAD(&stable_node->hlist);
1075
1076 stable_node->kpfn = page_to_pfn(kpage);
1077 set_page_stable_node(kpage, stable_node);
1078
1079 return stable_node;
1080}
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096static
1097struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
1098 struct page *page,
1099 struct page **tree_pagep)
1100
1101{
1102 struct rb_node **new = &root_unstable_tree.rb_node;
1103 struct rb_node *parent = NULL;
1104
1105 while (*new) {
1106 struct rmap_item *tree_rmap_item;
1107 struct page *tree_page;
1108 int ret;
1109
1110 cond_resched();
1111 tree_rmap_item = rb_entry(*new, struct rmap_item, node);
1112 tree_page = get_mergeable_page(tree_rmap_item);
1113 if (IS_ERR_OR_NULL(tree_page))
1114 return NULL;
1115
1116
1117
1118
1119 if (page == tree_page) {
1120 put_page(tree_page);
1121 return NULL;
1122 }
1123
1124 ret = memcmp_pages(page, tree_page);
1125
1126 parent = *new;
1127 if (ret < 0) {
1128 put_page(tree_page);
1129 new = &parent->rb_left;
1130 } else if (ret > 0) {
1131 put_page(tree_page);
1132 new = &parent->rb_right;
1133 } else {
1134 *tree_pagep = tree_page;
1135 return tree_rmap_item;
1136 }
1137 }
1138
1139 rmap_item->address |= UNSTABLE_FLAG;
1140 rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
1141 rb_link_node(&rmap_item->node, parent, new);
1142 rb_insert_color(&rmap_item->node, &root_unstable_tree);
1143
1144 ksm_pages_unshared++;
1145 return NULL;
1146}
1147
1148
1149
1150
1151
1152
1153static void stable_tree_append(struct rmap_item *rmap_item,
1154 struct stable_node *stable_node)
1155{
1156 rmap_item->head = stable_node;
1157 rmap_item->address |= STABLE_FLAG;
1158 hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
1159
1160 if (rmap_item->hlist.next)
1161 ksm_pages_sharing++;
1162 else
1163 ksm_pages_shared++;
1164}
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1176{
1177 struct rmap_item *tree_rmap_item;
1178 struct page *tree_page = NULL;
1179 struct stable_node *stable_node;
1180 struct page *kpage;
1181 unsigned int checksum;
1182 int err;
1183
1184 remove_rmap_item_from_tree(rmap_item);
1185
1186
1187 kpage = stable_tree_search(page);
1188 if (kpage) {
1189 err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
1190 if (!err) {
1191
1192
1193
1194
1195 lock_page(kpage);
1196 stable_tree_append(rmap_item, page_stable_node(kpage));
1197 unlock_page(kpage);
1198 }
1199 put_page(kpage);
1200 return;
1201 }
1202
1203
1204
1205
1206
1207
1208
1209 checksum = calc_checksum(page);
1210 if (rmap_item->oldchecksum != checksum) {
1211 rmap_item->oldchecksum = checksum;
1212 return;
1213 }
1214
1215 tree_rmap_item =
1216 unstable_tree_search_insert(rmap_item, page, &tree_page);
1217 if (tree_rmap_item) {
1218 kpage = try_to_merge_two_pages(rmap_item, page,
1219 tree_rmap_item, tree_page);
1220 put_page(tree_page);
1221
1222
1223
1224
1225
1226 if (kpage) {
1227 remove_rmap_item_from_tree(tree_rmap_item);
1228
1229 lock_page(kpage);
1230 stable_node = stable_tree_insert(kpage);
1231 if (stable_node) {
1232 stable_tree_append(tree_rmap_item, stable_node);
1233 stable_tree_append(rmap_item, stable_node);
1234 }
1235 unlock_page(kpage);
1236
1237
1238
1239
1240
1241
1242
1243 if (!stable_node) {
1244 break_cow(tree_rmap_item);
1245 break_cow(rmap_item);
1246 }
1247 }
1248 }
1249}
1250
1251static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
1252 struct rmap_item **rmap_list,
1253 unsigned long addr)
1254{
1255 struct rmap_item *rmap_item;
1256
1257 while (*rmap_list) {
1258 rmap_item = *rmap_list;
1259 if ((rmap_item->address & PAGE_MASK) == addr)
1260 return rmap_item;
1261 if (rmap_item->address > addr)
1262 break;
1263 *rmap_list = rmap_item->rmap_list;
1264 remove_rmap_item_from_tree(rmap_item);
1265 free_rmap_item(rmap_item);
1266 }
1267
1268 rmap_item = alloc_rmap_item();
1269 if (rmap_item) {
1270
1271 rmap_item->mm = mm_slot->mm;
1272 rmap_item->address = addr;
1273 rmap_item->rmap_list = *rmap_list;
1274 *rmap_list = rmap_item;
1275 }
1276 return rmap_item;
1277}
1278
1279static struct rmap_item *scan_get_next_rmap_item(struct page **page)
1280{
1281 struct mm_struct *mm;
1282 struct mm_slot *slot;
1283 struct vm_area_struct *vma;
1284 struct rmap_item *rmap_item;
1285
1286 if (list_empty(&ksm_mm_head.mm_list))
1287 return NULL;
1288
1289 slot = ksm_scan.mm_slot;
1290 if (slot == &ksm_mm_head) {
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301 lru_add_drain_all();
1302
1303 root_unstable_tree = RB_ROOT;
1304
1305 spin_lock(&ksm_mmlist_lock);
1306 slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
1307 ksm_scan.mm_slot = slot;
1308 spin_unlock(&ksm_mmlist_lock);
1309
1310
1311
1312
1313 if (slot == &ksm_mm_head)
1314 return NULL;
1315next_mm:
1316 ksm_scan.address = 0;
1317 ksm_scan.rmap_list = &slot->rmap_list;
1318 }
1319
1320 mm = slot->mm;
1321 down_read(&mm->mmap_sem);
1322 if (ksm_test_exit(mm))
1323 vma = NULL;
1324 else
1325 vma = find_vma(mm, ksm_scan.address);
1326
1327 for (; vma; vma = vma->vm_next) {
1328 if (!(vma->vm_flags & VM_MERGEABLE))
1329 continue;
1330 if (ksm_scan.address < vma->vm_start)
1331 ksm_scan.address = vma->vm_start;
1332 if (!vma->anon_vma)
1333 ksm_scan.address = vma->vm_end;
1334
1335 while (ksm_scan.address < vma->vm_end) {
1336 if (ksm_test_exit(mm))
1337 break;
1338 *page = follow_page(vma, ksm_scan.address, FOLL_GET);
1339 if (IS_ERR_OR_NULL(*page)) {
1340 ksm_scan.address += PAGE_SIZE;
1341 cond_resched();
1342 continue;
1343 }
1344 if (PageAnon(*page) ||
1345 page_trans_compound_anon(*page)) {
1346 flush_anon_page(vma, *page, ksm_scan.address);
1347 flush_dcache_page(*page);
1348 rmap_item = get_next_rmap_item(slot,
1349 ksm_scan.rmap_list, ksm_scan.address);
1350 if (rmap_item) {
1351 ksm_scan.rmap_list =
1352 &rmap_item->rmap_list;
1353 ksm_scan.address += PAGE_SIZE;
1354 } else
1355 put_page(*page);
1356 up_read(&mm->mmap_sem);
1357 return rmap_item;
1358 }
1359 put_page(*page);
1360 ksm_scan.address += PAGE_SIZE;
1361 cond_resched();
1362 }
1363 }
1364
1365 if (ksm_test_exit(mm)) {
1366 ksm_scan.address = 0;
1367 ksm_scan.rmap_list = &slot->rmap_list;
1368 }
1369
1370
1371
1372
1373 remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
1374
1375 spin_lock(&ksm_mmlist_lock);
1376 ksm_scan.mm_slot = list_entry(slot->mm_list.next,
1377 struct mm_slot, mm_list);
1378 if (ksm_scan.address == 0) {
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388 hlist_del(&slot->link);
1389 list_del(&slot->mm_list);
1390 spin_unlock(&ksm_mmlist_lock);
1391
1392 free_mm_slot(slot);
1393 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1394 up_read(&mm->mmap_sem);
1395 mmdrop(mm);
1396 } else {
1397 spin_unlock(&ksm_mmlist_lock);
1398 up_read(&mm->mmap_sem);
1399 }
1400
1401
1402 slot = ksm_scan.mm_slot;
1403 if (slot != &ksm_mm_head)
1404 goto next_mm;
1405
1406 ksm_scan.seqnr++;
1407 return NULL;
1408}
1409
1410
1411
1412
1413
1414static void ksm_do_scan(unsigned int scan_npages)
1415{
1416 struct rmap_item *rmap_item;
1417 struct page *uninitialized_var(page);
1418
1419 while (scan_npages-- && likely(!freezing(current))) {
1420 cond_resched();
1421 rmap_item = scan_get_next_rmap_item(&page);
1422 if (!rmap_item)
1423 return;
1424 if (!PageKsm(page) || !in_stable_tree(rmap_item))
1425 cmp_and_merge_page(page, rmap_item);
1426 put_page(page);
1427 }
1428}
1429
1430static int ksmd_should_run(void)
1431{
1432 return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
1433}
1434
1435static int ksm_scan_thread(void *nothing)
1436{
1437 set_freezable();
1438 set_user_nice(current, 5);
1439
1440 while (!kthread_should_stop()) {
1441 mutex_lock(&ksm_thread_mutex);
1442 if (ksmd_should_run())
1443 ksm_do_scan(ksm_thread_pages_to_scan);
1444 mutex_unlock(&ksm_thread_mutex);
1445
1446 try_to_freeze();
1447
1448 if (ksmd_should_run()) {
1449 schedule_timeout_interruptible(
1450 msecs_to_jiffies(ksm_thread_sleep_millisecs));
1451 } else {
1452 wait_event_freezable(ksm_thread_wait,
1453 ksmd_should_run() || kthread_should_stop());
1454 }
1455 }
1456 return 0;
1457}
1458
1459int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
1460 unsigned long end, int advice, unsigned long *vm_flags)
1461{
1462 struct mm_struct *mm = vma->vm_mm;
1463 int err;
1464
1465 switch (advice) {
1466 case MADV_MERGEABLE:
1467
1468
1469
1470 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
1471 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1472 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1473 VM_NONLINEAR | VM_MIXEDMAP | VM_SAO))
1474 return 0;
1475
1476 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
1477 err = __ksm_enter(mm);
1478 if (err)
1479 return err;
1480 }
1481
1482 *vm_flags |= VM_MERGEABLE;
1483 break;
1484
1485 case MADV_UNMERGEABLE:
1486 if (!(*vm_flags & VM_MERGEABLE))
1487 return 0;
1488
1489 if (vma->anon_vma) {
1490 err = unmerge_ksm_pages(vma, start, end);
1491 if (err)
1492 return err;
1493 }
1494
1495 *vm_flags &= ~VM_MERGEABLE;
1496 break;
1497 }
1498
1499 return 0;
1500}
1501
1502int __ksm_enter(struct mm_struct *mm)
1503{
1504 struct mm_slot *mm_slot;
1505 int needs_wakeup;
1506
1507 mm_slot = alloc_mm_slot();
1508 if (!mm_slot)
1509 return -ENOMEM;
1510
1511
1512 needs_wakeup = list_empty(&ksm_mm_head.mm_list);
1513
1514 spin_lock(&ksm_mmlist_lock);
1515 insert_to_mm_slots_hash(mm, mm_slot);
1516
1517
1518
1519
1520
1521 list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
1522 spin_unlock(&ksm_mmlist_lock);
1523
1524 set_bit(MMF_VM_MERGEABLE, &mm->flags);
1525 atomic_inc(&mm->mm_count);
1526
1527 if (needs_wakeup)
1528 wake_up_interruptible(&ksm_thread_wait);
1529
1530 return 0;
1531}
1532
1533void __ksm_exit(struct mm_struct *mm)
1534{
1535 struct mm_slot *mm_slot;
1536 int easy_to_free = 0;
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547 spin_lock(&ksm_mmlist_lock);
1548 mm_slot = get_mm_slot(mm);
1549 if (mm_slot && ksm_scan.mm_slot != mm_slot) {
1550 if (!mm_slot->rmap_list) {
1551 hlist_del(&mm_slot->link);
1552 list_del(&mm_slot->mm_list);
1553 easy_to_free = 1;
1554 } else {
1555 list_move(&mm_slot->mm_list,
1556 &ksm_scan.mm_slot->mm_list);
1557 }
1558 }
1559 spin_unlock(&ksm_mmlist_lock);
1560
1561 if (easy_to_free) {
1562 free_mm_slot(mm_slot);
1563 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1564 mmdrop(mm);
1565 } else if (mm_slot) {
1566 down_write(&mm->mmap_sem);
1567 up_write(&mm->mmap_sem);
1568 }
1569}
1570
1571struct page *ksm_does_need_to_copy(struct page *page,
1572 struct vm_area_struct *vma, unsigned long address)
1573{
1574 struct page *new_page;
1575
1576 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1577 if (new_page) {
1578 copy_user_highpage(new_page, page, address, vma);
1579
1580 SetPageDirty(new_page);
1581 __SetPageUptodate(new_page);
1582 SetPageSwapBacked(new_page);
1583 __set_page_locked(new_page);
1584
1585 if (page_evictable(new_page, vma))
1586 lru_cache_add_lru(new_page, LRU_ACTIVE_ANON);
1587 else
1588 add_page_to_unevictable_list(new_page);
1589 }
1590
1591 return new_page;
1592}
1593
1594int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
1595 unsigned long *vm_flags)
1596{
1597 struct stable_node *stable_node;
1598 struct rmap_item *rmap_item;
1599 struct hlist_node *hlist;
1600 unsigned int mapcount = page_mapcount(page);
1601 int referenced = 0;
1602 int search_new_forks = 0;
1603
1604 VM_BUG_ON(!PageKsm(page));
1605 VM_BUG_ON(!PageLocked(page));
1606
1607 stable_node = page_stable_node(page);
1608 if (!stable_node)
1609 return 0;
1610again:
1611 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1612 struct anon_vma *anon_vma = rmap_item->anon_vma;
1613 struct anon_vma_chain *vmac;
1614 struct vm_area_struct *vma;
1615
1616 anon_vma_lock(anon_vma);
1617 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1618 vma = vmac->vma;
1619 if (rmap_item->address < vma->vm_start ||
1620 rmap_item->address >= vma->vm_end)
1621 continue;
1622
1623
1624
1625
1626
1627
1628 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1629 continue;
1630
1631 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
1632 continue;
1633
1634 referenced += page_referenced_one(page, vma,
1635 rmap_item->address, &mapcount, vm_flags);
1636 if (!search_new_forks || !mapcount)
1637 break;
1638 }
1639 anon_vma_unlock(anon_vma);
1640 if (!mapcount)
1641 goto out;
1642 }
1643 if (!search_new_forks++)
1644 goto again;
1645out:
1646 return referenced;
1647}
1648
1649int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
1650{
1651 struct stable_node *stable_node;
1652 struct hlist_node *hlist;
1653 struct rmap_item *rmap_item;
1654 int ret = SWAP_AGAIN;
1655 int search_new_forks = 0;
1656
1657 VM_BUG_ON(!PageKsm(page));
1658 VM_BUG_ON(!PageLocked(page));
1659
1660 stable_node = page_stable_node(page);
1661 if (!stable_node)
1662 return SWAP_FAIL;
1663again:
1664 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1665 struct anon_vma *anon_vma = rmap_item->anon_vma;
1666 struct anon_vma_chain *vmac;
1667 struct vm_area_struct *vma;
1668
1669 anon_vma_lock(anon_vma);
1670 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1671 vma = vmac->vma;
1672 if (rmap_item->address < vma->vm_start ||
1673 rmap_item->address >= vma->vm_end)
1674 continue;
1675
1676
1677
1678
1679
1680
1681 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1682 continue;
1683
1684 ret = try_to_unmap_one(page, vma,
1685 rmap_item->address, flags);
1686 if (ret != SWAP_AGAIN || !page_mapped(page)) {
1687 anon_vma_unlock(anon_vma);
1688 goto out;
1689 }
1690 }
1691 anon_vma_unlock(anon_vma);
1692 }
1693 if (!search_new_forks++)
1694 goto again;
1695out:
1696 return ret;
1697}
1698
1699#ifdef CONFIG_MIGRATION
1700int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
1701 struct vm_area_struct *, unsigned long, void *), void *arg)
1702{
1703 struct stable_node *stable_node;
1704 struct hlist_node *hlist;
1705 struct rmap_item *rmap_item;
1706 int ret = SWAP_AGAIN;
1707 int search_new_forks = 0;
1708
1709 VM_BUG_ON(!PageKsm(page));
1710 VM_BUG_ON(!PageLocked(page));
1711
1712 stable_node = page_stable_node(page);
1713 if (!stable_node)
1714 return ret;
1715again:
1716 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1717 struct anon_vma *anon_vma = rmap_item->anon_vma;
1718 struct anon_vma_chain *vmac;
1719 struct vm_area_struct *vma;
1720
1721 anon_vma_lock(anon_vma);
1722 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1723 vma = vmac->vma;
1724 if (rmap_item->address < vma->vm_start ||
1725 rmap_item->address >= vma->vm_end)
1726 continue;
1727
1728
1729
1730
1731
1732
1733 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1734 continue;
1735
1736 ret = rmap_one(page, vma, rmap_item->address, arg);
1737 if (ret != SWAP_AGAIN) {
1738 anon_vma_unlock(anon_vma);
1739 goto out;
1740 }
1741 }
1742 anon_vma_unlock(anon_vma);
1743 }
1744 if (!search_new_forks++)
1745 goto again;
1746out:
1747 return ret;
1748}
1749
1750void ksm_migrate_page(struct page *newpage, struct page *oldpage)
1751{
1752 struct stable_node *stable_node;
1753
1754 VM_BUG_ON(!PageLocked(oldpage));
1755 VM_BUG_ON(!PageLocked(newpage));
1756 VM_BUG_ON(newpage->mapping != oldpage->mapping);
1757
1758 stable_node = page_stable_node(newpage);
1759 if (stable_node) {
1760 VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
1761 stable_node->kpfn = page_to_pfn(newpage);
1762 }
1763}
1764#endif
1765
1766#ifdef CONFIG_MEMORY_HOTREMOVE
1767static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn,
1768 unsigned long end_pfn)
1769{
1770 struct rb_node *node;
1771
1772 for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) {
1773 struct stable_node *stable_node;
1774
1775 stable_node = rb_entry(node, struct stable_node, node);
1776 if (stable_node->kpfn >= start_pfn &&
1777 stable_node->kpfn < end_pfn)
1778 return stable_node;
1779 }
1780 return NULL;
1781}
1782
1783static int ksm_memory_callback(struct notifier_block *self,
1784 unsigned long action, void *arg)
1785{
1786 struct memory_notify *mn = arg;
1787 struct stable_node *stable_node;
1788
1789 switch (action) {
1790 case MEM_GOING_OFFLINE:
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800 mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING);
1801 break;
1802
1803 case MEM_OFFLINE:
1804
1805
1806
1807
1808
1809 while ((stable_node = ksm_check_stable_tree(mn->start_pfn,
1810 mn->start_pfn + mn->nr_pages)) != NULL)
1811 remove_node_from_stable_tree(stable_node);
1812
1813
1814 case MEM_CANCEL_OFFLINE:
1815 mutex_unlock(&ksm_thread_mutex);
1816 break;
1817 }
1818 return NOTIFY_OK;
1819}
1820#endif
1821
1822#ifdef CONFIG_SYSFS
1823
1824
1825
1826
1827#define KSM_ATTR_RO(_name) \
1828 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
1829#define KSM_ATTR(_name) \
1830 static struct kobj_attribute _name##_attr = \
1831 __ATTR(_name, 0644, _name##_show, _name##_store)
1832
1833static ssize_t sleep_millisecs_show(struct kobject *kobj,
1834 struct kobj_attribute *attr, char *buf)
1835{
1836 return sprintf(buf, "%u\n", ksm_thread_sleep_millisecs);
1837}
1838
1839static ssize_t sleep_millisecs_store(struct kobject *kobj,
1840 struct kobj_attribute *attr,
1841 const char *buf, size_t count)
1842{
1843 unsigned long msecs;
1844 int err;
1845
1846 err = strict_strtoul(buf, 10, &msecs);
1847 if (err || msecs > UINT_MAX)
1848 return -EINVAL;
1849
1850 ksm_thread_sleep_millisecs = msecs;
1851
1852 return count;
1853}
1854KSM_ATTR(sleep_millisecs);
1855
1856static ssize_t pages_to_scan_show(struct kobject *kobj,
1857 struct kobj_attribute *attr, char *buf)
1858{
1859 return sprintf(buf, "%u\n", ksm_thread_pages_to_scan);
1860}
1861
1862static ssize_t pages_to_scan_store(struct kobject *kobj,
1863 struct kobj_attribute *attr,
1864 const char *buf, size_t count)
1865{
1866 int err;
1867 unsigned long nr_pages;
1868
1869 err = strict_strtoul(buf, 10, &nr_pages);
1870 if (err || nr_pages > UINT_MAX)
1871 return -EINVAL;
1872
1873 ksm_thread_pages_to_scan = nr_pages;
1874
1875 return count;
1876}
1877KSM_ATTR(pages_to_scan);
1878
1879static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
1880 char *buf)
1881{
1882 return sprintf(buf, "%u\n", ksm_run);
1883}
1884
1885static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
1886 const char *buf, size_t count)
1887{
1888 int err;
1889 unsigned long flags;
1890
1891 err = strict_strtoul(buf, 10, &flags);
1892 if (err || flags > UINT_MAX)
1893 return -EINVAL;
1894 if (flags > KSM_RUN_UNMERGE)
1895 return -EINVAL;
1896
1897
1898
1899
1900
1901
1902
1903
1904 mutex_lock(&ksm_thread_mutex);
1905 if (ksm_run != flags) {
1906 ksm_run = flags;
1907 if (flags & KSM_RUN_UNMERGE) {
1908 int oom_score_adj;
1909
1910 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1911 err = unmerge_and_remove_all_rmap_items();
1912 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX,
1913 oom_score_adj);
1914 if (err) {
1915 ksm_run = KSM_RUN_STOP;
1916 count = err;
1917 }
1918 }
1919 }
1920 mutex_unlock(&ksm_thread_mutex);
1921
1922 if (flags & KSM_RUN_MERGE)
1923 wake_up_interruptible(&ksm_thread_wait);
1924
1925 return count;
1926}
1927KSM_ATTR(run);
1928
1929static ssize_t pages_shared_show(struct kobject *kobj,
1930 struct kobj_attribute *attr, char *buf)
1931{
1932 return sprintf(buf, "%lu\n", ksm_pages_shared);
1933}
1934KSM_ATTR_RO(pages_shared);
1935
1936static ssize_t pages_sharing_show(struct kobject *kobj,
1937 struct kobj_attribute *attr, char *buf)
1938{
1939 return sprintf(buf, "%lu\n", ksm_pages_sharing);
1940}
1941KSM_ATTR_RO(pages_sharing);
1942
1943static ssize_t pages_unshared_show(struct kobject *kobj,
1944 struct kobj_attribute *attr, char *buf)
1945{
1946 return sprintf(buf, "%lu\n", ksm_pages_unshared);
1947}
1948KSM_ATTR_RO(pages_unshared);
1949
1950static ssize_t pages_volatile_show(struct kobject *kobj,
1951 struct kobj_attribute *attr, char *buf)
1952{
1953 long ksm_pages_volatile;
1954
1955 ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
1956 - ksm_pages_sharing - ksm_pages_unshared;
1957
1958
1959
1960
1961 if (ksm_pages_volatile < 0)
1962 ksm_pages_volatile = 0;
1963 return sprintf(buf, "%ld\n", ksm_pages_volatile);
1964}
1965KSM_ATTR_RO(pages_volatile);
1966
1967static ssize_t full_scans_show(struct kobject *kobj,
1968 struct kobj_attribute *attr, char *buf)
1969{
1970 return sprintf(buf, "%lu\n", ksm_scan.seqnr);
1971}
1972KSM_ATTR_RO(full_scans);
1973
1974static struct attribute *ksm_attrs[] = {
1975 &sleep_millisecs_attr.attr,
1976 &pages_to_scan_attr.attr,
1977 &run_attr.attr,
1978 &pages_shared_attr.attr,
1979 &pages_sharing_attr.attr,
1980 &pages_unshared_attr.attr,
1981 &pages_volatile_attr.attr,
1982 &full_scans_attr.attr,
1983 NULL,
1984};
1985
1986static struct attribute_group ksm_attr_group = {
1987 .attrs = ksm_attrs,
1988 .name = "ksm",
1989};
1990#endif
1991
1992static int __init ksm_init(void)
1993{
1994 struct task_struct *ksm_thread;
1995 int err;
1996
1997 err = ksm_slab_init();
1998 if (err)
1999 goto out;
2000
2001 ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
2002 if (IS_ERR(ksm_thread)) {
2003 printk(KERN_ERR "ksm: creating kthread failed\n");
2004 err = PTR_ERR(ksm_thread);
2005 goto out_free;
2006 }
2007
2008#ifdef CONFIG_SYSFS
2009 err = sysfs_create_group(mm_kobj, &ksm_attr_group);
2010 if (err) {
2011 printk(KERN_ERR "ksm: register sysfs failed\n");
2012 kthread_stop(ksm_thread);
2013 goto out_free;
2014 }
2015#else
2016 ksm_run = KSM_RUN_MERGE;
2017
2018#endif
2019
2020#ifdef CONFIG_MEMORY_HOTREMOVE
2021
2022
2023
2024
2025 hotplug_memory_notifier(ksm_memory_callback, 100);
2026#endif
2027 return 0;
2028
2029out_free:
2030 ksm_slab_free();
2031out:
2032 return err;
2033}
2034module_init(ksm_init)
2035