1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/errno.h>
18#include <linux/mm.h>
19#include <linux/fs.h>
20#include <linux/mman.h>
21#include <linux/sched.h>
22#include <linux/rwsem.h>
23#include <linux/pagemap.h>
24#include <linux/rmap.h>
25#include <linux/spinlock.h>
26#include <linux/jhash.h>
27#include <linux/delay.h>
28#include <linux/kthread.h>
29#include <linux/wait.h>
30#include <linux/slab.h>
31#include <linux/rbtree.h>
32#include <linux/memory.h>
33#include <linux/mmu_notifier.h>
34#include <linux/swap.h>
35#include <linux/ksm.h>
36#include <linux/hash.h>
37
38#include <asm/tlbflush.h>
39#include "internal.h"
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88struct mm_slot {
89 struct hlist_node link;
90 struct list_head mm_list;
91 struct rmap_item *rmap_list;
92 struct mm_struct *mm;
93};
94
95
96
97
98
99
100
101
102
103
104struct ksm_scan {
105 struct mm_slot *mm_slot;
106 unsigned long address;
107 struct rmap_item **rmap_list;
108 unsigned long seqnr;
109};
110
111
112
113
114
115
116
117struct stable_node {
118 struct rb_node node;
119 struct hlist_head hlist;
120 unsigned long kpfn;
121};
122
123
124
125
126
127
128
129
130
131
132
133
134struct rmap_item {
135 struct rmap_item *rmap_list;
136 struct anon_vma *anon_vma;
137 struct mm_struct *mm;
138 unsigned long address;
139 unsigned int oldchecksum;
140 union {
141 struct rb_node node;
142 struct {
143 struct stable_node *head;
144 struct hlist_node hlist;
145 };
146 };
147};
148
149#define SEQNR_MASK 0x0ff
150#define UNSTABLE_FLAG 0x100
151#define STABLE_FLAG 0x200
152
153
154static struct rb_root root_stable_tree = RB_ROOT;
155static struct rb_root root_unstable_tree = RB_ROOT;
156
157#define MM_SLOTS_HASH_SHIFT 10
158#define MM_SLOTS_HASH_HEADS (1 << MM_SLOTS_HASH_SHIFT)
159static struct hlist_head mm_slots_hash[MM_SLOTS_HASH_HEADS];
160
161static struct mm_slot ksm_mm_head = {
162 .mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
163};
164static struct ksm_scan ksm_scan = {
165 .mm_slot = &ksm_mm_head,
166};
167
168static struct kmem_cache *rmap_item_cache;
169static struct kmem_cache *stable_node_cache;
170static struct kmem_cache *mm_slot_cache;
171
172
173static unsigned long ksm_pages_shared;
174
175
176static unsigned long ksm_pages_sharing;
177
178
179static unsigned long ksm_pages_unshared;
180
181
182static unsigned long ksm_rmap_items;
183
184
185static unsigned int ksm_thread_pages_to_scan = 100;
186
187
188static unsigned int ksm_thread_sleep_millisecs = 20;
189
190#define KSM_RUN_STOP 0
191#define KSM_RUN_MERGE 1
192#define KSM_RUN_UNMERGE 2
193static unsigned int ksm_run = KSM_RUN_STOP;
194
195static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
196static DEFINE_MUTEX(ksm_thread_mutex);
197static DEFINE_SPINLOCK(ksm_mmlist_lock);
198
199#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
200 sizeof(struct __struct), __alignof__(struct __struct),\
201 (__flags), NULL)
202
203static int __init ksm_slab_init(void)
204{
205 rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
206 if (!rmap_item_cache)
207 goto out;
208
209 stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
210 if (!stable_node_cache)
211 goto out_free1;
212
213 mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
214 if (!mm_slot_cache)
215 goto out_free2;
216
217 return 0;
218
219out_free2:
220 kmem_cache_destroy(stable_node_cache);
221out_free1:
222 kmem_cache_destroy(rmap_item_cache);
223out:
224 return -ENOMEM;
225}
226
227static void __init ksm_slab_free(void)
228{
229 kmem_cache_destroy(mm_slot_cache);
230 kmem_cache_destroy(stable_node_cache);
231 kmem_cache_destroy(rmap_item_cache);
232 mm_slot_cache = NULL;
233}
234
235static inline struct rmap_item *alloc_rmap_item(void)
236{
237 struct rmap_item *rmap_item;
238
239 rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
240 if (rmap_item)
241 ksm_rmap_items++;
242 return rmap_item;
243}
244
245static inline void free_rmap_item(struct rmap_item *rmap_item)
246{
247 ksm_rmap_items--;
248 rmap_item->mm = NULL;
249 kmem_cache_free(rmap_item_cache, rmap_item);
250}
251
252static inline struct stable_node *alloc_stable_node(void)
253{
254 return kmem_cache_alloc(stable_node_cache, GFP_KERNEL);
255}
256
257static inline void free_stable_node(struct stable_node *stable_node)
258{
259 kmem_cache_free(stable_node_cache, stable_node);
260}
261
262static inline struct mm_slot *alloc_mm_slot(void)
263{
264 if (!mm_slot_cache)
265 return NULL;
266 return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
267}
268
269static inline void free_mm_slot(struct mm_slot *mm_slot)
270{
271 kmem_cache_free(mm_slot_cache, mm_slot);
272}
273
274static struct mm_slot *get_mm_slot(struct mm_struct *mm)
275{
276 struct mm_slot *mm_slot;
277 struct hlist_head *bucket;
278 struct hlist_node *node;
279
280 bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
281 hlist_for_each_entry(mm_slot, node, bucket, link) {
282 if (mm == mm_slot->mm)
283 return mm_slot;
284 }
285 return NULL;
286}
287
288static void insert_to_mm_slots_hash(struct mm_struct *mm,
289 struct mm_slot *mm_slot)
290{
291 struct hlist_head *bucket;
292
293 bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
294 mm_slot->mm = mm;
295 hlist_add_head(&mm_slot->link, bucket);
296}
297
298static inline int in_stable_tree(struct rmap_item *rmap_item)
299{
300 return rmap_item->address & STABLE_FLAG;
301}
302
303static void hold_anon_vma(struct rmap_item *rmap_item,
304 struct anon_vma *anon_vma)
305{
306 rmap_item->anon_vma = anon_vma;
307 get_anon_vma(anon_vma);
308}
309
310static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
311{
312 struct anon_vma *anon_vma = rmap_item->anon_vma;
313
314 drop_anon_vma(anon_vma);
315}
316
317
318
319
320
321
322
323
324
325static inline bool ksm_test_exit(struct mm_struct *mm)
326{
327 return atomic_read(&mm->mm_users) == 0;
328}
329
330
331
332
333
334
335
336
337
338
339
340
341static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
342{
343 struct page *page;
344 int ret = 0;
345
346 do {
347 cond_resched();
348 page = follow_page(vma, addr, FOLL_GET);
349 if (IS_ERR_OR_NULL(page))
350 break;
351 if (PageKsm(page))
352 ret = handle_mm_fault(vma->vm_mm, vma, addr,
353 FAULT_FLAG_WRITE);
354 else
355 ret = VM_FAULT_WRITE;
356 put_page(page);
357 } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386 return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
387}
388
389static void break_cow(struct rmap_item *rmap_item)
390{
391 struct mm_struct *mm = rmap_item->mm;
392 unsigned long addr = rmap_item->address;
393 struct vm_area_struct *vma;
394
395
396
397
398
399 ksm_drop_anon_vma(rmap_item);
400
401 down_read(&mm->mmap_sem);
402 if (ksm_test_exit(mm))
403 goto out;
404 vma = find_vma(mm, addr);
405 if (!vma || vma->vm_start > addr)
406 goto out;
407 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
408 goto out;
409 break_ksm(vma, addr);
410out:
411 up_read(&mm->mmap_sem);
412}
413
414static struct page *get_mergeable_page(struct rmap_item *rmap_item)
415{
416 struct mm_struct *mm = rmap_item->mm;
417 unsigned long addr = rmap_item->address;
418 struct vm_area_struct *vma;
419 struct page *page;
420
421 down_read(&mm->mmap_sem);
422 if (ksm_test_exit(mm))
423 goto out;
424 vma = find_vma(mm, addr);
425 if (!vma || vma->vm_start > addr)
426 goto out;
427 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
428 goto out;
429
430 page = follow_page(vma, addr, FOLL_GET);
431 if (IS_ERR_OR_NULL(page))
432 goto out;
433 if (PageAnon(page)) {
434 flush_anon_page(vma, page, addr);
435 flush_dcache_page(page);
436 } else {
437 put_page(page);
438out: page = NULL;
439 }
440 up_read(&mm->mmap_sem);
441 return page;
442}
443
444static void remove_node_from_stable_tree(struct stable_node *stable_node)
445{
446 struct rmap_item *rmap_item;
447 struct hlist_node *hlist;
448
449 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
450 if (rmap_item->hlist.next)
451 ksm_pages_sharing--;
452 else
453 ksm_pages_shared--;
454 ksm_drop_anon_vma(rmap_item);
455 rmap_item->address &= PAGE_MASK;
456 cond_resched();
457 }
458
459 rb_erase(&stable_node->node, &root_stable_tree);
460 free_stable_node(stable_node);
461}
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492static struct page *get_ksm_page(struct stable_node *stable_node)
493{
494 struct page *page;
495 void *expected_mapping;
496
497 page = pfn_to_page(stable_node->kpfn);
498 expected_mapping = (void *)stable_node +
499 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
500 rcu_read_lock();
501 if (page->mapping != expected_mapping)
502 goto stale;
503 if (!get_page_unless_zero(page))
504 goto stale;
505 if (page->mapping != expected_mapping) {
506 put_page(page);
507 goto stale;
508 }
509 rcu_read_unlock();
510 return page;
511stale:
512 rcu_read_unlock();
513 remove_node_from_stable_tree(stable_node);
514 return NULL;
515}
516
517
518
519
520
521static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
522{
523 if (rmap_item->address & STABLE_FLAG) {
524 struct stable_node *stable_node;
525 struct page *page;
526
527 stable_node = rmap_item->head;
528 page = get_ksm_page(stable_node);
529 if (!page)
530 goto out;
531
532 lock_page(page);
533 hlist_del(&rmap_item->hlist);
534 unlock_page(page);
535 put_page(page);
536
537 if (stable_node->hlist.first)
538 ksm_pages_sharing--;
539 else
540 ksm_pages_shared--;
541
542 ksm_drop_anon_vma(rmap_item);
543 rmap_item->address &= PAGE_MASK;
544
545 } else if (rmap_item->address & UNSTABLE_FLAG) {
546 unsigned char age;
547
548
549
550
551
552
553
554 age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
555 BUG_ON(age > 1);
556 if (!age)
557 rb_erase(&rmap_item->node, &root_unstable_tree);
558
559 ksm_pages_unshared--;
560 rmap_item->address &= PAGE_MASK;
561 }
562out:
563 cond_resched();
564}
565
566static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
567 struct rmap_item **rmap_list)
568{
569 while (*rmap_list) {
570 struct rmap_item *rmap_item = *rmap_list;
571 *rmap_list = rmap_item->rmap_list;
572 remove_rmap_item_from_tree(rmap_item);
573 free_rmap_item(rmap_item);
574 }
575}
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590static int unmerge_ksm_pages(struct vm_area_struct *vma,
591 unsigned long start, unsigned long end)
592{
593 unsigned long addr;
594 int err = 0;
595
596 for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
597 if (ksm_test_exit(vma->vm_mm))
598 break;
599 if (signal_pending(current))
600 err = -ERESTARTSYS;
601 else
602 err = break_ksm(vma, addr);
603 }
604 return err;
605}
606
607#ifdef CONFIG_SYSFS
608
609
610
611static int unmerge_and_remove_all_rmap_items(void)
612{
613 struct mm_slot *mm_slot;
614 struct mm_struct *mm;
615 struct vm_area_struct *vma;
616 int err = 0;
617
618 spin_lock(&ksm_mmlist_lock);
619 ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
620 struct mm_slot, mm_list);
621 spin_unlock(&ksm_mmlist_lock);
622
623 for (mm_slot = ksm_scan.mm_slot;
624 mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
625 mm = mm_slot->mm;
626 down_read(&mm->mmap_sem);
627 for (vma = mm->mmap; vma; vma = vma->vm_next) {
628 if (ksm_test_exit(mm))
629 break;
630 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
631 continue;
632 err = unmerge_ksm_pages(vma,
633 vma->vm_start, vma->vm_end);
634 if (err)
635 goto error;
636 }
637
638 remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
639
640 spin_lock(&ksm_mmlist_lock);
641 ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
642 struct mm_slot, mm_list);
643 if (ksm_test_exit(mm)) {
644 hlist_del(&mm_slot->link);
645 list_del(&mm_slot->mm_list);
646 spin_unlock(&ksm_mmlist_lock);
647
648 free_mm_slot(mm_slot);
649 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
650 up_read(&mm->mmap_sem);
651 mmdrop(mm);
652 } else {
653 spin_unlock(&ksm_mmlist_lock);
654 up_read(&mm->mmap_sem);
655 }
656 }
657
658 ksm_scan.seqnr = 0;
659 return 0;
660
661error:
662 up_read(&mm->mmap_sem);
663 spin_lock(&ksm_mmlist_lock);
664 ksm_scan.mm_slot = &ksm_mm_head;
665 spin_unlock(&ksm_mmlist_lock);
666 return err;
667}
668#endif
669
670static u32 calc_checksum(struct page *page)
671{
672 u32 checksum;
673 void *addr = kmap_atomic(page, KM_USER0);
674 checksum = jhash2(addr, PAGE_SIZE / 4, 17);
675 kunmap_atomic(addr, KM_USER0);
676 return checksum;
677}
678
679static int memcmp_pages(struct page *page1, struct page *page2)
680{
681 char *addr1, *addr2;
682 int ret;
683
684 addr1 = kmap_atomic(page1, KM_USER0);
685 addr2 = kmap_atomic(page2, KM_USER1);
686 ret = memcmp(addr1, addr2, PAGE_SIZE);
687 kunmap_atomic(addr2, KM_USER1);
688 kunmap_atomic(addr1, KM_USER0);
689 return ret;
690}
691
692static inline int pages_identical(struct page *page1, struct page *page2)
693{
694 return !memcmp_pages(page1, page2);
695}
696
697static int write_protect_page(struct vm_area_struct *vma, struct page *page,
698 pte_t *orig_pte)
699{
700 struct mm_struct *mm = vma->vm_mm;
701 unsigned long addr;
702 pte_t *ptep;
703 spinlock_t *ptl;
704 int swapped;
705 int err = -EFAULT;
706
707 addr = page_address_in_vma(page, vma);
708 if (addr == -EFAULT)
709 goto out;
710
711 ptep = page_check_address(page, mm, addr, &ptl, 0);
712 if (!ptep)
713 goto out;
714
715 if (pte_write(*ptep) || pte_dirty(*ptep)) {
716 pte_t entry;
717
718 swapped = PageSwapCache(page);
719 flush_cache_page(vma, addr, page_to_pfn(page));
720
721
722
723
724
725
726
727
728
729 entry = ptep_clear_flush(vma, addr, ptep);
730
731
732
733
734 if (page_mapcount(page) + 1 + swapped != page_count(page)) {
735 set_pte_at(mm, addr, ptep, entry);
736 goto out_unlock;
737 }
738 if (pte_dirty(entry))
739 set_page_dirty(page);
740 entry = pte_mkclean(pte_wrprotect(entry));
741 set_pte_at_notify(mm, addr, ptep, entry);
742 }
743 *orig_pte = *ptep;
744 err = 0;
745
746out_unlock:
747 pte_unmap_unlock(ptep, ptl);
748out:
749 return err;
750}
751
752
753
754
755
756
757
758
759
760
761static int replace_page(struct vm_area_struct *vma, struct page *page,
762 struct page *kpage, pte_t orig_pte)
763{
764 struct mm_struct *mm = vma->vm_mm;
765 pgd_t *pgd;
766 pud_t *pud;
767 pmd_t *pmd;
768 pte_t *ptep;
769 spinlock_t *ptl;
770 unsigned long addr;
771 int err = -EFAULT;
772
773 addr = page_address_in_vma(page, vma);
774 if (addr == -EFAULT)
775 goto out;
776
777 pgd = pgd_offset(mm, addr);
778 if (!pgd_present(*pgd))
779 goto out;
780
781 pud = pud_offset(pgd, addr);
782 if (!pud_present(*pud))
783 goto out;
784
785 pmd = pmd_offset(pud, addr);
786 if (!pmd_present(*pmd))
787 goto out;
788
789 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
790 if (!pte_same(*ptep, orig_pte)) {
791 pte_unmap_unlock(ptep, ptl);
792 goto out;
793 }
794
795 get_page(kpage);
796 page_add_anon_rmap(kpage, vma, addr);
797
798 flush_cache_page(vma, addr, pte_pfn(*ptep));
799 ptep_clear_flush(vma, addr, ptep);
800 set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
801
802 page_remove_rmap(page);
803 put_page(page);
804
805 pte_unmap_unlock(ptep, ptl);
806 err = 0;
807out:
808 return err;
809}
810
811
812
813
814
815
816
817
818
819
820static int try_to_merge_one_page(struct vm_area_struct *vma,
821 struct page *page, struct page *kpage)
822{
823 pte_t orig_pte = __pte(0);
824 int err = -EFAULT;
825
826 if (page == kpage)
827 return 0;
828
829 if (!(vma->vm_flags & VM_MERGEABLE))
830 goto out;
831 if (!PageAnon(page))
832 goto out;
833
834
835
836
837
838
839
840
841 if (!trylock_page(page))
842 goto out;
843
844
845
846
847
848
849 if (write_protect_page(vma, page, &orig_pte) == 0) {
850 if (!kpage) {
851
852
853
854
855
856 set_page_stable_node(page, NULL);
857 mark_page_accessed(page);
858 err = 0;
859 } else if (pages_identical(page, kpage))
860 err = replace_page(vma, page, kpage, orig_pte);
861 }
862
863 if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
864 munlock_vma_page(page);
865 if (!PageMlocked(kpage)) {
866 unlock_page(page);
867 lock_page(kpage);
868 mlock_vma_page(kpage);
869 page = kpage;
870 }
871 }
872
873 unlock_page(page);
874out:
875 return err;
876}
877
878
879
880
881
882
883
884static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
885 struct page *page, struct page *kpage)
886{
887 struct mm_struct *mm = rmap_item->mm;
888 struct vm_area_struct *vma;
889 int err = -EFAULT;
890
891 down_read(&mm->mmap_sem);
892 if (ksm_test_exit(mm))
893 goto out;
894 vma = find_vma(mm, rmap_item->address);
895 if (!vma || vma->vm_start > rmap_item->address)
896 goto out;
897
898 err = try_to_merge_one_page(vma, page, kpage);
899 if (err)
900 goto out;
901
902
903 hold_anon_vma(rmap_item, vma->anon_vma);
904out:
905 up_read(&mm->mmap_sem);
906 return err;
907}
908
909
910
911
912
913
914
915
916
917
918
919static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
920 struct page *page,
921 struct rmap_item *tree_rmap_item,
922 struct page *tree_page)
923{
924 int err;
925
926 err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
927 if (!err) {
928 err = try_to_merge_with_ksm_page(tree_rmap_item,
929 tree_page, page);
930
931
932
933
934 if (err)
935 break_cow(rmap_item);
936 }
937 return err ? NULL : page;
938}
939
940
941
942
943
944
945
946
947
948
949static struct page *stable_tree_search(struct page *page)
950{
951 struct rb_node *node = root_stable_tree.rb_node;
952 struct stable_node *stable_node;
953
954 stable_node = page_stable_node(page);
955 if (stable_node) {
956 get_page(page);
957 return page;
958 }
959
960 while (node) {
961 struct page *tree_page;
962 int ret;
963
964 cond_resched();
965 stable_node = rb_entry(node, struct stable_node, node);
966 tree_page = get_ksm_page(stable_node);
967 if (!tree_page)
968 return NULL;
969
970 ret = memcmp_pages(page, tree_page);
971
972 if (ret < 0) {
973 put_page(tree_page);
974 node = node->rb_left;
975 } else if (ret > 0) {
976 put_page(tree_page);
977 node = node->rb_right;
978 } else
979 return tree_page;
980 }
981
982 return NULL;
983}
984
985
986
987
988
989
990
991
992static struct stable_node *stable_tree_insert(struct page *kpage)
993{
994 struct rb_node **new = &root_stable_tree.rb_node;
995 struct rb_node *parent = NULL;
996 struct stable_node *stable_node;
997
998 while (*new) {
999 struct page *tree_page;
1000 int ret;
1001
1002 cond_resched();
1003 stable_node = rb_entry(*new, struct stable_node, node);
1004 tree_page = get_ksm_page(stable_node);
1005 if (!tree_page)
1006 return NULL;
1007
1008 ret = memcmp_pages(kpage, tree_page);
1009 put_page(tree_page);
1010
1011 parent = *new;
1012 if (ret < 0)
1013 new = &parent->rb_left;
1014 else if (ret > 0)
1015 new = &parent->rb_right;
1016 else {
1017
1018
1019
1020
1021
1022 return NULL;
1023 }
1024 }
1025
1026 stable_node = alloc_stable_node();
1027 if (!stable_node)
1028 return NULL;
1029
1030 rb_link_node(&stable_node->node, parent, new);
1031 rb_insert_color(&stable_node->node, &root_stable_tree);
1032
1033 INIT_HLIST_HEAD(&stable_node->hlist);
1034
1035 stable_node->kpfn = page_to_pfn(kpage);
1036 set_page_stable_node(kpage, stable_node);
1037
1038 return stable_node;
1039}
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055static
1056struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
1057 struct page *page,
1058 struct page **tree_pagep)
1059
1060{
1061 struct rb_node **new = &root_unstable_tree.rb_node;
1062 struct rb_node *parent = NULL;
1063
1064 while (*new) {
1065 struct rmap_item *tree_rmap_item;
1066 struct page *tree_page;
1067 int ret;
1068
1069 cond_resched();
1070 tree_rmap_item = rb_entry(*new, struct rmap_item, node);
1071 tree_page = get_mergeable_page(tree_rmap_item);
1072 if (IS_ERR_OR_NULL(tree_page))
1073 return NULL;
1074
1075
1076
1077
1078 if (page == tree_page) {
1079 put_page(tree_page);
1080 return NULL;
1081 }
1082
1083 ret = memcmp_pages(page, tree_page);
1084
1085 parent = *new;
1086 if (ret < 0) {
1087 put_page(tree_page);
1088 new = &parent->rb_left;
1089 } else if (ret > 0) {
1090 put_page(tree_page);
1091 new = &parent->rb_right;
1092 } else {
1093 *tree_pagep = tree_page;
1094 return tree_rmap_item;
1095 }
1096 }
1097
1098 rmap_item->address |= UNSTABLE_FLAG;
1099 rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
1100 rb_link_node(&rmap_item->node, parent, new);
1101 rb_insert_color(&rmap_item->node, &root_unstable_tree);
1102
1103 ksm_pages_unshared++;
1104 return NULL;
1105}
1106
1107
1108
1109
1110
1111
1112static void stable_tree_append(struct rmap_item *rmap_item,
1113 struct stable_node *stable_node)
1114{
1115 rmap_item->head = stable_node;
1116 rmap_item->address |= STABLE_FLAG;
1117 hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
1118
1119 if (rmap_item->hlist.next)
1120 ksm_pages_sharing++;
1121 else
1122 ksm_pages_shared++;
1123}
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1135{
1136 struct rmap_item *tree_rmap_item;
1137 struct page *tree_page = NULL;
1138 struct stable_node *stable_node;
1139 struct page *kpage;
1140 unsigned int checksum;
1141 int err;
1142
1143 remove_rmap_item_from_tree(rmap_item);
1144
1145
1146 kpage = stable_tree_search(page);
1147 if (kpage) {
1148 err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
1149 if (!err) {
1150
1151
1152
1153
1154 lock_page(kpage);
1155 stable_tree_append(rmap_item, page_stable_node(kpage));
1156 unlock_page(kpage);
1157 }
1158 put_page(kpage);
1159 return;
1160 }
1161
1162
1163
1164
1165
1166
1167
1168 checksum = calc_checksum(page);
1169 if (rmap_item->oldchecksum != checksum) {
1170 rmap_item->oldchecksum = checksum;
1171 return;
1172 }
1173
1174 tree_rmap_item =
1175 unstable_tree_search_insert(rmap_item, page, &tree_page);
1176 if (tree_rmap_item) {
1177 kpage = try_to_merge_two_pages(rmap_item, page,
1178 tree_rmap_item, tree_page);
1179 put_page(tree_page);
1180
1181
1182
1183
1184
1185 if (kpage) {
1186 remove_rmap_item_from_tree(tree_rmap_item);
1187
1188 lock_page(kpage);
1189 stable_node = stable_tree_insert(kpage);
1190 if (stable_node) {
1191 stable_tree_append(tree_rmap_item, stable_node);
1192 stable_tree_append(rmap_item, stable_node);
1193 }
1194 unlock_page(kpage);
1195
1196
1197
1198
1199
1200
1201
1202 if (!stable_node) {
1203 break_cow(tree_rmap_item);
1204 break_cow(rmap_item);
1205 }
1206 }
1207 }
1208}
1209
1210static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
1211 struct rmap_item **rmap_list,
1212 unsigned long addr)
1213{
1214 struct rmap_item *rmap_item;
1215
1216 while (*rmap_list) {
1217 rmap_item = *rmap_list;
1218 if ((rmap_item->address & PAGE_MASK) == addr)
1219 return rmap_item;
1220 if (rmap_item->address > addr)
1221 break;
1222 *rmap_list = rmap_item->rmap_list;
1223 remove_rmap_item_from_tree(rmap_item);
1224 free_rmap_item(rmap_item);
1225 }
1226
1227 rmap_item = alloc_rmap_item();
1228 if (rmap_item) {
1229
1230 rmap_item->mm = mm_slot->mm;
1231 rmap_item->address = addr;
1232 rmap_item->rmap_list = *rmap_list;
1233 *rmap_list = rmap_item;
1234 }
1235 return rmap_item;
1236}
1237
1238static struct rmap_item *scan_get_next_rmap_item(struct page **page)
1239{
1240 struct mm_struct *mm;
1241 struct mm_slot *slot;
1242 struct vm_area_struct *vma;
1243 struct rmap_item *rmap_item;
1244
1245 if (list_empty(&ksm_mm_head.mm_list))
1246 return NULL;
1247
1248 slot = ksm_scan.mm_slot;
1249 if (slot == &ksm_mm_head) {
1250 root_unstable_tree = RB_ROOT;
1251
1252 spin_lock(&ksm_mmlist_lock);
1253 slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
1254 ksm_scan.mm_slot = slot;
1255 spin_unlock(&ksm_mmlist_lock);
1256next_mm:
1257 ksm_scan.address = 0;
1258 ksm_scan.rmap_list = &slot->rmap_list;
1259 }
1260
1261 mm = slot->mm;
1262 down_read(&mm->mmap_sem);
1263 if (ksm_test_exit(mm))
1264 vma = NULL;
1265 else
1266 vma = find_vma(mm, ksm_scan.address);
1267
1268 for (; vma; vma = vma->vm_next) {
1269 if (!(vma->vm_flags & VM_MERGEABLE))
1270 continue;
1271 if (ksm_scan.address < vma->vm_start)
1272 ksm_scan.address = vma->vm_start;
1273 if (!vma->anon_vma)
1274 ksm_scan.address = vma->vm_end;
1275
1276 while (ksm_scan.address < vma->vm_end) {
1277 if (ksm_test_exit(mm))
1278 break;
1279 *page = follow_page(vma, ksm_scan.address, FOLL_GET);
1280 if (!IS_ERR_OR_NULL(*page) && PageAnon(*page)) {
1281 flush_anon_page(vma, *page, ksm_scan.address);
1282 flush_dcache_page(*page);
1283 rmap_item = get_next_rmap_item(slot,
1284 ksm_scan.rmap_list, ksm_scan.address);
1285 if (rmap_item) {
1286 ksm_scan.rmap_list =
1287 &rmap_item->rmap_list;
1288 ksm_scan.address += PAGE_SIZE;
1289 } else
1290 put_page(*page);
1291 up_read(&mm->mmap_sem);
1292 return rmap_item;
1293 }
1294 if (!IS_ERR_OR_NULL(*page))
1295 put_page(*page);
1296 ksm_scan.address += PAGE_SIZE;
1297 cond_resched();
1298 }
1299 }
1300
1301 if (ksm_test_exit(mm)) {
1302 ksm_scan.address = 0;
1303 ksm_scan.rmap_list = &slot->rmap_list;
1304 }
1305
1306
1307
1308
1309 remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
1310
1311 spin_lock(&ksm_mmlist_lock);
1312 ksm_scan.mm_slot = list_entry(slot->mm_list.next,
1313 struct mm_slot, mm_list);
1314 if (ksm_scan.address == 0) {
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324 hlist_del(&slot->link);
1325 list_del(&slot->mm_list);
1326 spin_unlock(&ksm_mmlist_lock);
1327
1328 free_mm_slot(slot);
1329 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1330 up_read(&mm->mmap_sem);
1331 mmdrop(mm);
1332 } else {
1333 spin_unlock(&ksm_mmlist_lock);
1334 up_read(&mm->mmap_sem);
1335 }
1336
1337
1338 slot = ksm_scan.mm_slot;
1339 if (slot != &ksm_mm_head)
1340 goto next_mm;
1341
1342 ksm_scan.seqnr++;
1343 return NULL;
1344}
1345
1346
1347
1348
1349
1350static void ksm_do_scan(unsigned int scan_npages)
1351{
1352 struct rmap_item *rmap_item;
1353 struct page *uninitialized_var(page);
1354
1355 while (scan_npages--) {
1356 cond_resched();
1357 rmap_item = scan_get_next_rmap_item(&page);
1358 if (!rmap_item)
1359 return;
1360 if (!PageKsm(page) || !in_stable_tree(rmap_item))
1361 cmp_and_merge_page(page, rmap_item);
1362 put_page(page);
1363 }
1364}
1365
1366static int ksmd_should_run(void)
1367{
1368 return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
1369}
1370
1371static int ksm_scan_thread(void *nothing)
1372{
1373 set_user_nice(current, 5);
1374
1375 while (!kthread_should_stop()) {
1376 mutex_lock(&ksm_thread_mutex);
1377 if (ksmd_should_run())
1378 ksm_do_scan(ksm_thread_pages_to_scan);
1379 mutex_unlock(&ksm_thread_mutex);
1380
1381 if (ksmd_should_run()) {
1382 schedule_timeout_interruptible(
1383 msecs_to_jiffies(ksm_thread_sleep_millisecs));
1384 } else {
1385 wait_event_interruptible(ksm_thread_wait,
1386 ksmd_should_run() || kthread_should_stop());
1387 }
1388 }
1389 return 0;
1390}
1391
1392int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
1393 unsigned long end, int advice, unsigned long *vm_flags)
1394{
1395 struct mm_struct *mm = vma->vm_mm;
1396 int err;
1397
1398 switch (advice) {
1399 case MADV_MERGEABLE:
1400
1401
1402
1403 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
1404 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1405 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1406 VM_NONLINEAR | VM_MIXEDMAP | VM_SAO))
1407 return 0;
1408
1409 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
1410 err = __ksm_enter(mm);
1411 if (err)
1412 return err;
1413 }
1414
1415 *vm_flags |= VM_MERGEABLE;
1416 break;
1417
1418 case MADV_UNMERGEABLE:
1419 if (!(*vm_flags & VM_MERGEABLE))
1420 return 0;
1421
1422 if (vma->anon_vma) {
1423 err = unmerge_ksm_pages(vma, start, end);
1424 if (err)
1425 return err;
1426 }
1427
1428 *vm_flags &= ~VM_MERGEABLE;
1429 break;
1430 }
1431
1432 return 0;
1433}
1434
1435int __ksm_enter(struct mm_struct *mm)
1436{
1437 struct mm_slot *mm_slot;
1438 int needs_wakeup;
1439
1440 mm_slot = alloc_mm_slot();
1441 if (!mm_slot)
1442 return -ENOMEM;
1443
1444
1445 needs_wakeup = list_empty(&ksm_mm_head.mm_list);
1446
1447 spin_lock(&ksm_mmlist_lock);
1448 insert_to_mm_slots_hash(mm, mm_slot);
1449
1450
1451
1452
1453
1454 list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
1455 spin_unlock(&ksm_mmlist_lock);
1456
1457 set_bit(MMF_VM_MERGEABLE, &mm->flags);
1458 atomic_inc(&mm->mm_count);
1459
1460 if (needs_wakeup)
1461 wake_up_interruptible(&ksm_thread_wait);
1462
1463 return 0;
1464}
1465
1466void __ksm_exit(struct mm_struct *mm)
1467{
1468 struct mm_slot *mm_slot;
1469 int easy_to_free = 0;
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480 spin_lock(&ksm_mmlist_lock);
1481 mm_slot = get_mm_slot(mm);
1482 if (mm_slot && ksm_scan.mm_slot != mm_slot) {
1483 if (!mm_slot->rmap_list) {
1484 hlist_del(&mm_slot->link);
1485 list_del(&mm_slot->mm_list);
1486 easy_to_free = 1;
1487 } else {
1488 list_move(&mm_slot->mm_list,
1489 &ksm_scan.mm_slot->mm_list);
1490 }
1491 }
1492 spin_unlock(&ksm_mmlist_lock);
1493
1494 if (easy_to_free) {
1495 free_mm_slot(mm_slot);
1496 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1497 mmdrop(mm);
1498 } else if (mm_slot) {
1499 down_write(&mm->mmap_sem);
1500 up_write(&mm->mmap_sem);
1501 }
1502}
1503
1504struct page *ksm_does_need_to_copy(struct page *page,
1505 struct vm_area_struct *vma, unsigned long address)
1506{
1507 struct page *new_page;
1508
1509 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1510 if (new_page) {
1511 copy_user_highpage(new_page, page, address, vma);
1512
1513 SetPageDirty(new_page);
1514 __SetPageUptodate(new_page);
1515 SetPageSwapBacked(new_page);
1516 __set_page_locked(new_page);
1517
1518 if (page_evictable(new_page, vma))
1519 lru_cache_add_lru(new_page, LRU_ACTIVE_ANON);
1520 else
1521 add_page_to_unevictable_list(new_page);
1522 }
1523
1524 return new_page;
1525}
1526
1527int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
1528 unsigned long *vm_flags)
1529{
1530 struct stable_node *stable_node;
1531 struct rmap_item *rmap_item;
1532 struct hlist_node *hlist;
1533 unsigned int mapcount = page_mapcount(page);
1534 int referenced = 0;
1535 int search_new_forks = 0;
1536
1537 VM_BUG_ON(!PageKsm(page));
1538 VM_BUG_ON(!PageLocked(page));
1539
1540 stable_node = page_stable_node(page);
1541 if (!stable_node)
1542 return 0;
1543again:
1544 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1545 struct anon_vma *anon_vma = rmap_item->anon_vma;
1546 struct anon_vma_chain *vmac;
1547 struct vm_area_struct *vma;
1548
1549 anon_vma_lock(anon_vma);
1550 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1551 vma = vmac->vma;
1552 if (rmap_item->address < vma->vm_start ||
1553 rmap_item->address >= vma->vm_end)
1554 continue;
1555
1556
1557
1558
1559
1560
1561 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1562 continue;
1563
1564 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
1565 continue;
1566
1567 referenced += page_referenced_one(page, vma,
1568 rmap_item->address, &mapcount, vm_flags);
1569 if (!search_new_forks || !mapcount)
1570 break;
1571 }
1572 anon_vma_unlock(anon_vma);
1573 if (!mapcount)
1574 goto out;
1575 }
1576 if (!search_new_forks++)
1577 goto again;
1578out:
1579 return referenced;
1580}
1581
1582int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
1583{
1584 struct stable_node *stable_node;
1585 struct hlist_node *hlist;
1586 struct rmap_item *rmap_item;
1587 int ret = SWAP_AGAIN;
1588 int search_new_forks = 0;
1589
1590 VM_BUG_ON(!PageKsm(page));
1591 VM_BUG_ON(!PageLocked(page));
1592
1593 stable_node = page_stable_node(page);
1594 if (!stable_node)
1595 return SWAP_FAIL;
1596again:
1597 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1598 struct anon_vma *anon_vma = rmap_item->anon_vma;
1599 struct anon_vma_chain *vmac;
1600 struct vm_area_struct *vma;
1601
1602 anon_vma_lock(anon_vma);
1603 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1604 vma = vmac->vma;
1605 if (rmap_item->address < vma->vm_start ||
1606 rmap_item->address >= vma->vm_end)
1607 continue;
1608
1609
1610
1611
1612
1613
1614 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1615 continue;
1616
1617 ret = try_to_unmap_one(page, vma,
1618 rmap_item->address, flags);
1619 if (ret != SWAP_AGAIN || !page_mapped(page)) {
1620 anon_vma_unlock(anon_vma);
1621 goto out;
1622 }
1623 }
1624 anon_vma_unlock(anon_vma);
1625 }
1626 if (!search_new_forks++)
1627 goto again;
1628out:
1629 return ret;
1630}
1631
1632#ifdef CONFIG_MIGRATION
1633int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
1634 struct vm_area_struct *, unsigned long, void *), void *arg)
1635{
1636 struct stable_node *stable_node;
1637 struct hlist_node *hlist;
1638 struct rmap_item *rmap_item;
1639 int ret = SWAP_AGAIN;
1640 int search_new_forks = 0;
1641
1642 VM_BUG_ON(!PageKsm(page));
1643 VM_BUG_ON(!PageLocked(page));
1644
1645 stable_node = page_stable_node(page);
1646 if (!stable_node)
1647 return ret;
1648again:
1649 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1650 struct anon_vma *anon_vma = rmap_item->anon_vma;
1651 struct anon_vma_chain *vmac;
1652 struct vm_area_struct *vma;
1653
1654 anon_vma_lock(anon_vma);
1655 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1656 vma = vmac->vma;
1657 if (rmap_item->address < vma->vm_start ||
1658 rmap_item->address >= vma->vm_end)
1659 continue;
1660
1661
1662
1663
1664
1665
1666 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1667 continue;
1668
1669 ret = rmap_one(page, vma, rmap_item->address, arg);
1670 if (ret != SWAP_AGAIN) {
1671 anon_vma_unlock(anon_vma);
1672 goto out;
1673 }
1674 }
1675 anon_vma_unlock(anon_vma);
1676 }
1677 if (!search_new_forks++)
1678 goto again;
1679out:
1680 return ret;
1681}
1682
1683void ksm_migrate_page(struct page *newpage, struct page *oldpage)
1684{
1685 struct stable_node *stable_node;
1686
1687 VM_BUG_ON(!PageLocked(oldpage));
1688 VM_BUG_ON(!PageLocked(newpage));
1689 VM_BUG_ON(newpage->mapping != oldpage->mapping);
1690
1691 stable_node = page_stable_node(newpage);
1692 if (stable_node) {
1693 VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
1694 stable_node->kpfn = page_to_pfn(newpage);
1695 }
1696}
1697#endif
1698
1699#ifdef CONFIG_MEMORY_HOTREMOVE
1700static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn,
1701 unsigned long end_pfn)
1702{
1703 struct rb_node *node;
1704
1705 for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) {
1706 struct stable_node *stable_node;
1707
1708 stable_node = rb_entry(node, struct stable_node, node);
1709 if (stable_node->kpfn >= start_pfn &&
1710 stable_node->kpfn < end_pfn)
1711 return stable_node;
1712 }
1713 return NULL;
1714}
1715
1716static int ksm_memory_callback(struct notifier_block *self,
1717 unsigned long action, void *arg)
1718{
1719 struct memory_notify *mn = arg;
1720 struct stable_node *stable_node;
1721
1722 switch (action) {
1723 case MEM_GOING_OFFLINE:
1724
1725
1726
1727
1728 mutex_lock(&ksm_thread_mutex);
1729 break;
1730
1731 case MEM_OFFLINE:
1732
1733
1734
1735
1736
1737 while ((stable_node = ksm_check_stable_tree(mn->start_pfn,
1738 mn->start_pfn + mn->nr_pages)) != NULL)
1739 remove_node_from_stable_tree(stable_node);
1740
1741
1742 case MEM_CANCEL_OFFLINE:
1743 mutex_unlock(&ksm_thread_mutex);
1744 break;
1745 }
1746 return NOTIFY_OK;
1747}
1748#endif
1749
1750#ifdef CONFIG_SYSFS
1751
1752
1753
1754
1755#define KSM_ATTR_RO(_name) \
1756 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
1757#define KSM_ATTR(_name) \
1758 static struct kobj_attribute _name##_attr = \
1759 __ATTR(_name, 0644, _name##_show, _name##_store)
1760
1761static ssize_t sleep_millisecs_show(struct kobject *kobj,
1762 struct kobj_attribute *attr, char *buf)
1763{
1764 return sprintf(buf, "%u\n", ksm_thread_sleep_millisecs);
1765}
1766
1767static ssize_t sleep_millisecs_store(struct kobject *kobj,
1768 struct kobj_attribute *attr,
1769 const char *buf, size_t count)
1770{
1771 unsigned long msecs;
1772 int err;
1773
1774 err = strict_strtoul(buf, 10, &msecs);
1775 if (err || msecs > UINT_MAX)
1776 return -EINVAL;
1777
1778 ksm_thread_sleep_millisecs = msecs;
1779
1780 return count;
1781}
1782KSM_ATTR(sleep_millisecs);
1783
1784static ssize_t pages_to_scan_show(struct kobject *kobj,
1785 struct kobj_attribute *attr, char *buf)
1786{
1787 return sprintf(buf, "%u\n", ksm_thread_pages_to_scan);
1788}
1789
1790static ssize_t pages_to_scan_store(struct kobject *kobj,
1791 struct kobj_attribute *attr,
1792 const char *buf, size_t count)
1793{
1794 int err;
1795 unsigned long nr_pages;
1796
1797 err = strict_strtoul(buf, 10, &nr_pages);
1798 if (err || nr_pages > UINT_MAX)
1799 return -EINVAL;
1800
1801 ksm_thread_pages_to_scan = nr_pages;
1802
1803 return count;
1804}
1805KSM_ATTR(pages_to_scan);
1806
1807static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
1808 char *buf)
1809{
1810 return sprintf(buf, "%u\n", ksm_run);
1811}
1812
1813static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
1814 const char *buf, size_t count)
1815{
1816 int err;
1817 unsigned long flags;
1818
1819 err = strict_strtoul(buf, 10, &flags);
1820 if (err || flags > UINT_MAX)
1821 return -EINVAL;
1822 if (flags > KSM_RUN_UNMERGE)
1823 return -EINVAL;
1824
1825
1826
1827
1828
1829
1830
1831
1832 mutex_lock(&ksm_thread_mutex);
1833 if (ksm_run != flags) {
1834 ksm_run = flags;
1835 if (flags & KSM_RUN_UNMERGE) {
1836 current->flags |= PF_OOM_ORIGIN;
1837 err = unmerge_and_remove_all_rmap_items();
1838 current->flags &= ~PF_OOM_ORIGIN;
1839 if (err) {
1840 ksm_run = KSM_RUN_STOP;
1841 count = err;
1842 }
1843 }
1844 }
1845 mutex_unlock(&ksm_thread_mutex);
1846
1847 if (flags & KSM_RUN_MERGE)
1848 wake_up_interruptible(&ksm_thread_wait);
1849
1850 return count;
1851}
1852KSM_ATTR(run);
1853
1854static ssize_t pages_shared_show(struct kobject *kobj,
1855 struct kobj_attribute *attr, char *buf)
1856{
1857 return sprintf(buf, "%lu\n", ksm_pages_shared);
1858}
1859KSM_ATTR_RO(pages_shared);
1860
1861static ssize_t pages_sharing_show(struct kobject *kobj,
1862 struct kobj_attribute *attr, char *buf)
1863{
1864 return sprintf(buf, "%lu\n", ksm_pages_sharing);
1865}
1866KSM_ATTR_RO(pages_sharing);
1867
1868static ssize_t pages_unshared_show(struct kobject *kobj,
1869 struct kobj_attribute *attr, char *buf)
1870{
1871 return sprintf(buf, "%lu\n", ksm_pages_unshared);
1872}
1873KSM_ATTR_RO(pages_unshared);
1874
1875static ssize_t pages_volatile_show(struct kobject *kobj,
1876 struct kobj_attribute *attr, char *buf)
1877{
1878 long ksm_pages_volatile;
1879
1880 ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
1881 - ksm_pages_sharing - ksm_pages_unshared;
1882
1883
1884
1885
1886 if (ksm_pages_volatile < 0)
1887 ksm_pages_volatile = 0;
1888 return sprintf(buf, "%ld\n", ksm_pages_volatile);
1889}
1890KSM_ATTR_RO(pages_volatile);
1891
1892static ssize_t full_scans_show(struct kobject *kobj,
1893 struct kobj_attribute *attr, char *buf)
1894{
1895 return sprintf(buf, "%lu\n", ksm_scan.seqnr);
1896}
1897KSM_ATTR_RO(full_scans);
1898
1899static struct attribute *ksm_attrs[] = {
1900 &sleep_millisecs_attr.attr,
1901 &pages_to_scan_attr.attr,
1902 &run_attr.attr,
1903 &pages_shared_attr.attr,
1904 &pages_sharing_attr.attr,
1905 &pages_unshared_attr.attr,
1906 &pages_volatile_attr.attr,
1907 &full_scans_attr.attr,
1908 NULL,
1909};
1910
1911static struct attribute_group ksm_attr_group = {
1912 .attrs = ksm_attrs,
1913 .name = "ksm",
1914};
1915#endif
1916
1917static int __init ksm_init(void)
1918{
1919 struct task_struct *ksm_thread;
1920 int err;
1921
1922 err = ksm_slab_init();
1923 if (err)
1924 goto out;
1925
1926 ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
1927 if (IS_ERR(ksm_thread)) {
1928 printk(KERN_ERR "ksm: creating kthread failed\n");
1929 err = PTR_ERR(ksm_thread);
1930 goto out_free;
1931 }
1932
1933#ifdef CONFIG_SYSFS
1934 err = sysfs_create_group(mm_kobj, &ksm_attr_group);
1935 if (err) {
1936 printk(KERN_ERR "ksm: register sysfs failed\n");
1937 kthread_stop(ksm_thread);
1938 goto out_free;
1939 }
1940#else
1941 ksm_run = KSM_RUN_MERGE;
1942
1943#endif
1944
1945#ifdef CONFIG_MEMORY_HOTREMOVE
1946
1947
1948
1949
1950 hotplug_memory_notifier(ksm_memory_callback, 100);
1951#endif
1952 return 0;
1953
1954out_free:
1955 ksm_slab_free();
1956out:
1957 return err;
1958}
1959module_init(ksm_init)
1960