1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/mm.h>
15#include <linux/module.h>
16#include <linux/gfp.h>
17#include <linux/kernel_stat.h>
18#include <linux/swap.h>
19#include <linux/pagemap.h>
20#include <linux/init.h>
21#include <linux/highmem.h>
22#include <linux/vmstat.h>
23#include <linux/file.h>
24#include <linux/writeback.h>
25#include <linux/blkdev.h>
26#include <linux/buffer_head.h>
27
28#include <linux/mm_inline.h>
29#include <linux/pagevec.h>
30#include <linux/backing-dev.h>
31#include <linux/rmap.h>
32#include <linux/topology.h>
33#include <linux/cpu.h>
34#include <linux/cpuset.h>
35#include <linux/notifier.h>
36#include <linux/rwsem.h>
37#include <linux/delay.h>
38#include <linux/kthread.h>
39#include <linux/freezer.h>
40#include <linux/memcontrol.h>
41#include <linux/delayacct.h>
42#include <linux/sysctl.h>
43
44#include <asm/tlbflush.h>
45#include <asm/div64.h>
46
47#include <linux/swapops.h>
48
49#include "internal.h"
50
51struct scan_control {
52
53 unsigned long nr_scanned;
54
55
56 unsigned long nr_reclaimed;
57
58
59 unsigned long nr_to_reclaim;
60
61 unsigned long hibernation_mode;
62
63
64 gfp_t gfp_mask;
65
66 int may_writepage;
67
68
69 int may_unmap;
70
71
72 int may_swap;
73
74 int swappiness;
75
76 int order;
77
78
79
80
81
82 bool lumpy_reclaim_mode;
83
84
85 struct mem_cgroup *mem_cgroup;
86
87
88
89
90
91 nodemask_t *nodemask;
92};
93
94#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
95
96#ifdef ARCH_HAS_PREFETCH
97#define prefetch_prev_lru_page(_page, _base, _field) \
98 do { \
99 if ((_page)->lru.prev != _base) { \
100 struct page *prev; \
101 \
102 prev = lru_to_page(&(_page->lru)); \
103 prefetch(&prev->_field); \
104 } \
105 } while (0)
106#else
107#define prefetch_prev_lru_page(_page, _base, _field) do { } while (0)
108#endif
109
110#ifdef ARCH_HAS_PREFETCHW
111#define prefetchw_prev_lru_page(_page, _base, _field) \
112 do { \
113 if ((_page)->lru.prev != _base) { \
114 struct page *prev; \
115 \
116 prev = lru_to_page(&(_page->lru)); \
117 prefetchw(&prev->_field); \
118 } \
119 } while (0)
120#else
121#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
122#endif
123
124
125
126
127int vm_swappiness = 60;
128long vm_total_pages;
129
130static LIST_HEAD(shrinker_list);
131static DECLARE_RWSEM(shrinker_rwsem);
132
133#ifdef CONFIG_CGROUP_MEM_RES_CTLR
134#define scanning_global_lru(sc) (!(sc)->mem_cgroup)
135#else
136#define scanning_global_lru(sc) (1)
137#endif
138
139static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
140 struct scan_control *sc)
141{
142 if (!scanning_global_lru(sc))
143 return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);
144
145 return &zone->reclaim_stat;
146}
147
148static unsigned long zone_nr_lru_pages(struct zone *zone,
149 struct scan_control *sc, enum lru_list lru)
150{
151 if (!scanning_global_lru(sc))
152 return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
153
154 return zone_page_state(zone, NR_LRU_BASE + lru);
155}
156
157
158
159
160
161void register_shrinker(struct shrinker *shrinker)
162{
163 shrinker->nr = 0;
164 down_write(&shrinker_rwsem);
165 list_add_tail(&shrinker->list, &shrinker_list);
166 up_write(&shrinker_rwsem);
167}
168EXPORT_SYMBOL(register_shrinker);
169
170
171
172
173void unregister_shrinker(struct shrinker *shrinker)
174{
175 down_write(&shrinker_rwsem);
176 list_del(&shrinker->list);
177 up_write(&shrinker_rwsem);
178}
179EXPORT_SYMBOL(unregister_shrinker);
180
181#define SHRINK_BATCH 128
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
202 unsigned long lru_pages)
203{
204 struct shrinker *shrinker;
205 unsigned long ret = 0;
206
207 if (scanned == 0)
208 scanned = SWAP_CLUSTER_MAX;
209
210 if (!down_read_trylock(&shrinker_rwsem))
211 return 1;
212
213 list_for_each_entry(shrinker, &shrinker_list, list) {
214 unsigned long long delta;
215 unsigned long total_scan;
216 unsigned long max_pass;
217
218 max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
219 delta = (4 * scanned) / shrinker->seeks;
220 delta *= max_pass;
221 do_div(delta, lru_pages + 1);
222 shrinker->nr += delta;
223 if (shrinker->nr < 0) {
224 printk(KERN_ERR "shrink_slab: %pF negative objects to "
225 "delete nr=%ld\n",
226 shrinker->shrink, shrinker->nr);
227 shrinker->nr = max_pass;
228 }
229
230
231
232
233
234
235 if (shrinker->nr > max_pass * 2)
236 shrinker->nr = max_pass * 2;
237
238 total_scan = shrinker->nr;
239 shrinker->nr = 0;
240
241 while (total_scan >= SHRINK_BATCH) {
242 long this_scan = SHRINK_BATCH;
243 int shrink_ret;
244 int nr_before;
245
246 nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
247 shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
248 gfp_mask);
249 if (shrink_ret == -1)
250 break;
251 if (shrink_ret < nr_before)
252 ret += nr_before - shrink_ret;
253 count_vm_events(SLABS_SCANNED, this_scan);
254 total_scan -= this_scan;
255
256 cond_resched();
257 }
258
259 shrinker->nr += total_scan;
260 }
261 up_read(&shrinker_rwsem);
262 return ret;
263}
264
265static inline int is_page_cache_freeable(struct page *page)
266{
267
268
269
270
271
272 return page_count(page) - page_has_private(page) == 2;
273}
274
275static int may_write_to_queue(struct backing_dev_info *bdi)
276{
277 if (current->flags & PF_SWAPWRITE)
278 return 1;
279 if (!bdi_write_congested(bdi))
280 return 1;
281 if (bdi == current->backing_dev_info)
282 return 1;
283 return 0;
284}
285
286
287
288
289
290
291
292
293
294
295
296
297
298static void handle_write_error(struct address_space *mapping,
299 struct page *page, int error)
300{
301 lock_page_nosync(page);
302 if (page_mapping(page) == mapping)
303 mapping_set_error(mapping, error);
304 unlock_page(page);
305}
306
307
308enum pageout_io {
309 PAGEOUT_IO_ASYNC,
310 PAGEOUT_IO_SYNC,
311};
312
313
314typedef enum {
315
316 PAGE_KEEP,
317
318 PAGE_ACTIVATE,
319
320 PAGE_SUCCESS,
321
322 PAGE_CLEAN,
323} pageout_t;
324
325
326
327
328
329static pageout_t pageout(struct page *page, struct address_space *mapping,
330 enum pageout_io sync_writeback)
331{
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348 if (!is_page_cache_freeable(page))
349 return PAGE_KEEP;
350 if (!mapping) {
351
352
353
354
355 if (page_has_private(page)) {
356 if (try_to_free_buffers(page)) {
357 ClearPageDirty(page);
358 printk("%s: orphaned page\n", __func__);
359 return PAGE_CLEAN;
360 }
361 }
362 return PAGE_KEEP;
363 }
364 if (mapping->a_ops->writepage == NULL)
365 return PAGE_ACTIVATE;
366 if (!may_write_to_queue(mapping->backing_dev_info))
367 return PAGE_KEEP;
368
369 if (clear_page_dirty_for_io(page)) {
370 int res;
371 struct writeback_control wbc = {
372 .sync_mode = WB_SYNC_NONE,
373 .nr_to_write = SWAP_CLUSTER_MAX,
374 .range_start = 0,
375 .range_end = LLONG_MAX,
376 .nonblocking = 1,
377 .for_reclaim = 1,
378 };
379
380 SetPageReclaim(page);
381 res = mapping->a_ops->writepage(page, &wbc);
382 if (res < 0)
383 handle_write_error(mapping, page, res);
384 if (res == AOP_WRITEPAGE_ACTIVATE) {
385 ClearPageReclaim(page);
386 return PAGE_ACTIVATE;
387 }
388
389
390
391
392
393
394 if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC)
395 wait_on_page_writeback(page);
396
397 if (!PageWriteback(page)) {
398
399 ClearPageReclaim(page);
400 }
401 inc_zone_page_state(page, NR_VMSCAN_WRITE);
402 return PAGE_SUCCESS;
403 }
404
405 return PAGE_CLEAN;
406}
407
408
409
410
411
412static int __remove_mapping(struct address_space *mapping, struct page *page)
413{
414 BUG_ON(!PageLocked(page));
415 BUG_ON(mapping != page_mapping(page));
416
417 spin_lock_irq(&mapping->tree_lock);
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443 if (!page_freeze_refs(page, 2))
444 goto cannot_free;
445
446 if (unlikely(PageDirty(page))) {
447 page_unfreeze_refs(page, 2);
448 goto cannot_free;
449 }
450
451 if (PageSwapCache(page)) {
452 swp_entry_t swap = { .val = page_private(page) };
453 __delete_from_swap_cache(page);
454 spin_unlock_irq(&mapping->tree_lock);
455 swapcache_free(swap, page);
456 } else {
457 __remove_from_page_cache(page);
458 spin_unlock_irq(&mapping->tree_lock);
459 mem_cgroup_uncharge_cache_page(page);
460 }
461
462 return 1;
463
464cannot_free:
465 spin_unlock_irq(&mapping->tree_lock);
466 return 0;
467}
468
469
470
471
472
473
474
475int remove_mapping(struct address_space *mapping, struct page *page)
476{
477 if (__remove_mapping(mapping, page)) {
478
479
480
481
482
483 page_unfreeze_refs(page, 1);
484 return 1;
485 }
486 return 0;
487}
488
489
490
491
492
493
494
495
496
497
498void putback_lru_page(struct page *page)
499{
500 int lru;
501 int active = !!TestClearPageActive(page);
502 int was_unevictable = PageUnevictable(page);
503
504 VM_BUG_ON(PageLRU(page));
505
506redo:
507 ClearPageUnevictable(page);
508
509 if (page_evictable(page, NULL)) {
510
511
512
513
514
515
516 lru = active + page_lru_base_type(page);
517 lru_cache_add_lru(page, lru);
518 } else {
519
520
521
522
523 lru = LRU_UNEVICTABLE;
524 add_page_to_unevictable_list(page);
525
526
527
528
529
530
531
532
533
534 smp_mb();
535 }
536
537
538
539
540
541
542 if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) {
543 if (!isolate_lru_page(page)) {
544 put_page(page);
545 goto redo;
546 }
547
548
549
550
551 }
552
553 if (was_unevictable && lru != LRU_UNEVICTABLE)
554 count_vm_event(UNEVICTABLE_PGRESCUED);
555 else if (!was_unevictable && lru == LRU_UNEVICTABLE)
556 count_vm_event(UNEVICTABLE_PGCULLED);
557
558 put_page(page);
559}
560
561enum page_references {
562 PAGEREF_RECLAIM,
563 PAGEREF_RECLAIM_CLEAN,
564 PAGEREF_KEEP,
565 PAGEREF_ACTIVATE,
566};
567
568static enum page_references page_check_references(struct page *page,
569 struct scan_control *sc)
570{
571 int referenced_ptes, referenced_page;
572 unsigned long vm_flags;
573
574 referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags);
575 referenced_page = TestClearPageReferenced(page);
576
577
578 if (sc->lumpy_reclaim_mode)
579 return PAGEREF_RECLAIM;
580
581
582
583
584
585 if (vm_flags & VM_LOCKED)
586 return PAGEREF_RECLAIM;
587
588 if (referenced_ptes) {
589 if (PageAnon(page))
590 return PAGEREF_ACTIVATE;
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605 SetPageReferenced(page);
606
607 if (referenced_page)
608 return PAGEREF_ACTIVATE;
609
610 return PAGEREF_KEEP;
611 }
612
613
614 if (referenced_page)
615 return PAGEREF_RECLAIM_CLEAN;
616
617 return PAGEREF_RECLAIM;
618}
619
620
621
622
623static unsigned long shrink_page_list(struct list_head *page_list,
624 struct scan_control *sc,
625 enum pageout_io sync_writeback)
626{
627 LIST_HEAD(ret_pages);
628 struct pagevec freed_pvec;
629 int pgactivate = 0;
630 unsigned long nr_reclaimed = 0;
631
632 cond_resched();
633
634 pagevec_init(&freed_pvec, 1);
635 while (!list_empty(page_list)) {
636 enum page_references references;
637 struct address_space *mapping;
638 struct page *page;
639 int may_enter_fs;
640
641 cond_resched();
642
643 page = lru_to_page(page_list);
644 list_del(&page->lru);
645
646 if (!trylock_page(page))
647 goto keep;
648
649 VM_BUG_ON(PageActive(page));
650
651 sc->nr_scanned++;
652
653 if (unlikely(!page_evictable(page, NULL)))
654 goto cull_mlocked;
655
656 if (!sc->may_unmap && page_mapped(page))
657 goto keep_locked;
658
659
660 if (page_mapped(page) || PageSwapCache(page))
661 sc->nr_scanned++;
662
663 may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
664 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
665
666 if (PageWriteback(page)) {
667
668
669
670
671
672
673
674
675 if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs)
676 wait_on_page_writeback(page);
677 else
678 goto keep_locked;
679 }
680
681 references = page_check_references(page, sc);
682 switch (references) {
683 case PAGEREF_ACTIVATE:
684 goto activate_locked;
685 case PAGEREF_KEEP:
686 goto keep_locked;
687 case PAGEREF_RECLAIM:
688 case PAGEREF_RECLAIM_CLEAN:
689 ;
690 }
691
692
693
694
695
696 if (PageAnon(page) && !PageSwapCache(page)) {
697 if (!(sc->gfp_mask & __GFP_IO))
698 goto keep_locked;
699 if (!add_to_swap(page))
700 goto activate_locked;
701 may_enter_fs = 1;
702 }
703
704 mapping = page_mapping(page);
705
706
707
708
709
710 if (page_mapped(page) && mapping) {
711 switch (try_to_unmap(page, TTU_UNMAP)) {
712 case SWAP_FAIL:
713 goto activate_locked;
714 case SWAP_AGAIN:
715 goto keep_locked;
716 case SWAP_MLOCK:
717 goto cull_mlocked;
718 case SWAP_SUCCESS:
719 ;
720 }
721 }
722
723 if (PageDirty(page)) {
724 if (references == PAGEREF_RECLAIM_CLEAN)
725 goto keep_locked;
726 if (!may_enter_fs)
727 goto keep_locked;
728 if (!sc->may_writepage)
729 goto keep_locked;
730
731
732 switch (pageout(page, mapping, sync_writeback)) {
733 case PAGE_KEEP:
734 goto keep_locked;
735 case PAGE_ACTIVATE:
736 goto activate_locked;
737 case PAGE_SUCCESS:
738 if (PageWriteback(page) || PageDirty(page))
739 goto keep;
740
741
742
743
744 if (!trylock_page(page))
745 goto keep;
746 if (PageDirty(page) || PageWriteback(page))
747 goto keep_locked;
748 mapping = page_mapping(page);
749 case PAGE_CLEAN:
750 ;
751 }
752 }
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775 if (page_has_private(page)) {
776 if (!try_to_release_page(page, sc->gfp_mask))
777 goto activate_locked;
778 if (!mapping && page_count(page) == 1) {
779 unlock_page(page);
780 if (put_page_testzero(page))
781 goto free_it;
782 else {
783
784
785
786
787
788
789
790 nr_reclaimed++;
791 continue;
792 }
793 }
794 }
795
796 if (!mapping || !__remove_mapping(mapping, page))
797 goto keep_locked;
798
799
800
801
802
803
804
805
806 __clear_page_locked(page);
807free_it:
808 nr_reclaimed++;
809 if (!pagevec_add(&freed_pvec, page)) {
810 __pagevec_free(&freed_pvec);
811 pagevec_reinit(&freed_pvec);
812 }
813 continue;
814
815cull_mlocked:
816 if (PageSwapCache(page))
817 try_to_free_swap(page);
818 unlock_page(page);
819 putback_lru_page(page);
820 continue;
821
822activate_locked:
823
824 if (PageSwapCache(page) && vm_swap_full())
825 try_to_free_swap(page);
826 VM_BUG_ON(PageActive(page));
827 SetPageActive(page);
828 pgactivate++;
829keep_locked:
830 unlock_page(page);
831keep:
832 list_add(&page->lru, &ret_pages);
833 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
834 }
835 list_splice(&ret_pages, page_list);
836 if (pagevec_count(&freed_pvec))
837 __pagevec_free(&freed_pvec);
838 count_vm_events(PGACTIVATE, pgactivate);
839 return nr_reclaimed;
840}
841
842
843
844
845
846
847
848
849
850
851
852int __isolate_lru_page(struct page *page, int mode, int file)
853{
854 int ret = -EINVAL;
855
856
857 if (!PageLRU(page))
858 return ret;
859
860
861
862
863
864
865 if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
866 return ret;
867
868 if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)
869 return ret;
870
871
872
873
874
875
876 if (PageUnevictable(page))
877 return ret;
878
879 ret = -EBUSY;
880
881 if (likely(get_page_unless_zero(page))) {
882
883
884
885
886
887 ClearPageLRU(page);
888 ret = 0;
889 }
890
891 return ret;
892}
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
915 struct list_head *src, struct list_head *dst,
916 unsigned long *scanned, int order, int mode, int file)
917{
918 unsigned long nr_taken = 0;
919 unsigned long scan;
920
921 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
922 struct page *page;
923 unsigned long pfn;
924 unsigned long end_pfn;
925 unsigned long page_pfn;
926 int zone_id;
927
928 page = lru_to_page(src);
929 prefetchw_prev_lru_page(page, src, flags);
930
931 VM_BUG_ON(!PageLRU(page));
932
933 switch (__isolate_lru_page(page, mode, file)) {
934 case 0:
935 list_move(&page->lru, dst);
936 mem_cgroup_del_lru(page);
937 nr_taken++;
938 break;
939
940 case -EBUSY:
941
942 list_move(&page->lru, src);
943 mem_cgroup_rotate_lru_list(page, page_lru(page));
944 continue;
945
946 default:
947 BUG();
948 }
949
950 if (!order)
951 continue;
952
953
954
955
956
957
958
959
960
961
962 zone_id = page_zone_id(page);
963 page_pfn = page_to_pfn(page);
964 pfn = page_pfn & ~((1 << order) - 1);
965 end_pfn = pfn + (1 << order);
966 for (; pfn < end_pfn; pfn++) {
967 struct page *cursor_page;
968
969
970 if (unlikely(pfn == page_pfn))
971 continue;
972
973
974 if (unlikely(!pfn_valid_within(pfn)))
975 break;
976
977 cursor_page = pfn_to_page(pfn);
978
979
980 if (unlikely(page_zone_id(cursor_page) != zone_id))
981 continue;
982
983
984
985
986
987
988 if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
989 !PageSwapCache(cursor_page))
990 continue;
991
992 if (__isolate_lru_page(cursor_page, mode, file) == 0) {
993 list_move(&cursor_page->lru, dst);
994 mem_cgroup_del_lru(cursor_page);
995 nr_taken++;
996 scan++;
997 }
998 }
999 }
1000
1001 *scanned = scan;
1002 return nr_taken;
1003}
1004
1005static unsigned long isolate_pages_global(unsigned long nr,
1006 struct list_head *dst,
1007 unsigned long *scanned, int order,
1008 int mode, struct zone *z,
1009 int active, int file)
1010{
1011 int lru = LRU_BASE;
1012 if (active)
1013 lru += LRU_ACTIVE;
1014 if (file)
1015 lru += LRU_FILE;
1016 return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
1017 mode, file);
1018}
1019
1020
1021
1022
1023
1024static unsigned long clear_active_flags(struct list_head *page_list,
1025 unsigned int *count)
1026{
1027 int nr_active = 0;
1028 int lru;
1029 struct page *page;
1030
1031 list_for_each_entry(page, page_list, lru) {
1032 lru = page_lru_base_type(page);
1033 if (PageActive(page)) {
1034 lru += LRU_ACTIVE;
1035 ClearPageActive(page);
1036 nr_active++;
1037 }
1038 count[lru]++;
1039 }
1040
1041 return nr_active;
1042}
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069int isolate_lru_page(struct page *page)
1070{
1071 int ret = -EBUSY;
1072
1073 if (PageLRU(page)) {
1074 struct zone *zone = page_zone(page);
1075
1076 spin_lock_irq(&zone->lru_lock);
1077 if (PageLRU(page) && get_page_unless_zero(page)) {
1078 int lru = page_lru(page);
1079 ret = 0;
1080 ClearPageLRU(page);
1081
1082 del_page_from_lru_list(zone, page, lru);
1083 }
1084 spin_unlock_irq(&zone->lru_lock);
1085 }
1086 return ret;
1087}
1088
1089
1090
1091
1092static int too_many_isolated(struct zone *zone, int file,
1093 struct scan_control *sc)
1094{
1095 unsigned long inactive, isolated;
1096
1097 if (current_is_kswapd())
1098 return 0;
1099
1100 if (!scanning_global_lru(sc))
1101 return 0;
1102
1103 if (file) {
1104 inactive = zone_page_state(zone, NR_INACTIVE_FILE);
1105 isolated = zone_page_state(zone, NR_ISOLATED_FILE);
1106 } else {
1107 inactive = zone_page_state(zone, NR_INACTIVE_ANON);
1108 isolated = zone_page_state(zone, NR_ISOLATED_ANON);
1109 }
1110
1111 return isolated > inactive;
1112}
1113
1114
1115
1116
1117
1118static unsigned long shrink_inactive_list(unsigned long max_scan,
1119 struct zone *zone, struct scan_control *sc,
1120 int priority, int file)
1121{
1122 LIST_HEAD(page_list);
1123 struct pagevec pvec;
1124 unsigned long nr_scanned = 0;
1125 unsigned long nr_reclaimed = 0;
1126 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1127
1128 while (unlikely(too_many_isolated(zone, file, sc))) {
1129 congestion_wait(BLK_RW_ASYNC, HZ/10);
1130
1131
1132 if (fatal_signal_pending(current))
1133 return SWAP_CLUSTER_MAX;
1134 }
1135
1136
1137 pagevec_init(&pvec, 1);
1138
1139 lru_add_drain();
1140 spin_lock_irq(&zone->lru_lock);
1141 do {
1142 struct page *page;
1143 unsigned long nr_taken;
1144 unsigned long nr_scan;
1145 unsigned long nr_freed;
1146 unsigned long nr_active;
1147 unsigned int count[NR_LRU_LISTS] = { 0, };
1148 int mode = sc->lumpy_reclaim_mode ? ISOLATE_BOTH : ISOLATE_INACTIVE;
1149 unsigned long nr_anon;
1150 unsigned long nr_file;
1151
1152 if (scanning_global_lru(sc)) {
1153 nr_taken = isolate_pages_global(SWAP_CLUSTER_MAX,
1154 &page_list, &nr_scan,
1155 sc->order, mode,
1156 zone, 0, file);
1157 zone->pages_scanned += nr_scan;
1158 if (current_is_kswapd())
1159 __count_zone_vm_events(PGSCAN_KSWAPD, zone,
1160 nr_scan);
1161 else
1162 __count_zone_vm_events(PGSCAN_DIRECT, zone,
1163 nr_scan);
1164 } else {
1165 nr_taken = mem_cgroup_isolate_pages(SWAP_CLUSTER_MAX,
1166 &page_list, &nr_scan,
1167 sc->order, mode,
1168 zone, sc->mem_cgroup,
1169 0, file);
1170
1171
1172
1173
1174 }
1175
1176 if (nr_taken == 0)
1177 goto done;
1178
1179 nr_active = clear_active_flags(&page_list, count);
1180 __count_vm_events(PGDEACTIVATE, nr_active);
1181
1182 __mod_zone_page_state(zone, NR_ACTIVE_FILE,
1183 -count[LRU_ACTIVE_FILE]);
1184 __mod_zone_page_state(zone, NR_INACTIVE_FILE,
1185 -count[LRU_INACTIVE_FILE]);
1186 __mod_zone_page_state(zone, NR_ACTIVE_ANON,
1187 -count[LRU_ACTIVE_ANON]);
1188 __mod_zone_page_state(zone, NR_INACTIVE_ANON,
1189 -count[LRU_INACTIVE_ANON]);
1190
1191 nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
1192 nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
1193 __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
1194 __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
1195
1196 reclaim_stat->recent_scanned[0] += nr_anon;
1197 reclaim_stat->recent_scanned[1] += nr_file;
1198
1199 spin_unlock_irq(&zone->lru_lock);
1200
1201 nr_scanned += nr_scan;
1202 nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
1203
1204
1205
1206
1207
1208
1209
1210 if (nr_freed < nr_taken && !current_is_kswapd() &&
1211 sc->lumpy_reclaim_mode) {
1212 congestion_wait(BLK_RW_ASYNC, HZ/10);
1213
1214
1215
1216
1217
1218 nr_active = clear_active_flags(&page_list, count);
1219 count_vm_events(PGDEACTIVATE, nr_active);
1220
1221 nr_freed += shrink_page_list(&page_list, sc,
1222 PAGEOUT_IO_SYNC);
1223 }
1224
1225 nr_reclaimed += nr_freed;
1226
1227 local_irq_disable();
1228 if (current_is_kswapd())
1229 __count_vm_events(KSWAPD_STEAL, nr_freed);
1230 __count_zone_vm_events(PGSTEAL, zone, nr_freed);
1231
1232 spin_lock(&zone->lru_lock);
1233
1234
1235
1236 while (!list_empty(&page_list)) {
1237 int lru;
1238 page = lru_to_page(&page_list);
1239 VM_BUG_ON(PageLRU(page));
1240 list_del(&page->lru);
1241 if (unlikely(!page_evictable(page, NULL))) {
1242 spin_unlock_irq(&zone->lru_lock);
1243 putback_lru_page(page);
1244 spin_lock_irq(&zone->lru_lock);
1245 continue;
1246 }
1247 SetPageLRU(page);
1248 lru = page_lru(page);
1249 add_page_to_lru_list(zone, page, lru);
1250 if (is_active_lru(lru)) {
1251 int file = is_file_lru(lru);
1252 reclaim_stat->recent_rotated[file]++;
1253 }
1254 if (!pagevec_add(&pvec, page)) {
1255 spin_unlock_irq(&zone->lru_lock);
1256 __pagevec_release(&pvec);
1257 spin_lock_irq(&zone->lru_lock);
1258 }
1259 }
1260 __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
1261 __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
1262
1263 } while (nr_scanned < max_scan);
1264
1265done:
1266 spin_unlock_irq(&zone->lru_lock);
1267 pagevec_release(&pvec);
1268 return nr_reclaimed;
1269}
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279static inline void note_zone_scanning_priority(struct zone *zone, int priority)
1280{
1281 if (priority < zone->prev_priority)
1282 zone->prev_priority = priority;
1283}
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303static void move_active_pages_to_lru(struct zone *zone,
1304 struct list_head *list,
1305 enum lru_list lru)
1306{
1307 unsigned long pgmoved = 0;
1308 struct pagevec pvec;
1309 struct page *page;
1310
1311 pagevec_init(&pvec, 1);
1312
1313 while (!list_empty(list)) {
1314 page = lru_to_page(list);
1315
1316 VM_BUG_ON(PageLRU(page));
1317 SetPageLRU(page);
1318
1319 list_move(&page->lru, &zone->lru[lru].list);
1320 mem_cgroup_add_lru_list(page, lru);
1321 pgmoved++;
1322
1323 if (!pagevec_add(&pvec, page) || list_empty(list)) {
1324 spin_unlock_irq(&zone->lru_lock);
1325 if (buffer_heads_over_limit)
1326 pagevec_strip(&pvec);
1327 __pagevec_release(&pvec);
1328 spin_lock_irq(&zone->lru_lock);
1329 }
1330 }
1331 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
1332 if (!is_active_lru(lru))
1333 __count_vm_events(PGDEACTIVATE, pgmoved);
1334}
1335
1336static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1337 struct scan_control *sc, int priority, int file)
1338{
1339 unsigned long nr_taken;
1340 unsigned long pgscanned;
1341 unsigned long vm_flags;
1342 LIST_HEAD(l_hold);
1343 LIST_HEAD(l_active);
1344 LIST_HEAD(l_inactive);
1345 struct page *page;
1346 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1347 unsigned long nr_rotated = 0;
1348
1349 lru_add_drain();
1350 spin_lock_irq(&zone->lru_lock);
1351 if (scanning_global_lru(sc)) {
1352 nr_taken = isolate_pages_global(nr_pages, &l_hold,
1353 &pgscanned, sc->order,
1354 ISOLATE_ACTIVE, zone,
1355 1, file);
1356 zone->pages_scanned += pgscanned;
1357 } else {
1358 nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold,
1359 &pgscanned, sc->order,
1360 ISOLATE_ACTIVE, zone,
1361 sc->mem_cgroup, 1, file);
1362
1363
1364
1365
1366 }
1367
1368 reclaim_stat->recent_scanned[file] += nr_taken;
1369
1370 __count_zone_vm_events(PGREFILL, zone, pgscanned);
1371 if (file)
1372 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
1373 else
1374 __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
1375 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
1376 spin_unlock_irq(&zone->lru_lock);
1377
1378 while (!list_empty(&l_hold)) {
1379 cond_resched();
1380 page = lru_to_page(&l_hold);
1381 list_del(&page->lru);
1382
1383 if (unlikely(!page_evictable(page, NULL))) {
1384 putback_lru_page(page);
1385 continue;
1386 }
1387
1388 if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
1389 nr_rotated++;
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399 if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) {
1400 list_add(&page->lru, &l_active);
1401 continue;
1402 }
1403 }
1404
1405 ClearPageActive(page);
1406 list_add(&page->lru, &l_inactive);
1407 }
1408
1409
1410
1411
1412 spin_lock_irq(&zone->lru_lock);
1413
1414
1415
1416
1417
1418
1419 reclaim_stat->recent_rotated[file] += nr_rotated;
1420
1421 move_active_pages_to_lru(zone, &l_active,
1422 LRU_ACTIVE + file * LRU_FILE);
1423 move_active_pages_to_lru(zone, &l_inactive,
1424 LRU_BASE + file * LRU_FILE);
1425 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
1426 spin_unlock_irq(&zone->lru_lock);
1427}
1428
1429static int inactive_anon_is_low_global(struct zone *zone)
1430{
1431 unsigned long active, inactive;
1432
1433 active = zone_page_state(zone, NR_ACTIVE_ANON);
1434 inactive = zone_page_state(zone, NR_INACTIVE_ANON);
1435
1436 if (inactive * zone->inactive_ratio < active)
1437 return 1;
1438
1439 return 0;
1440}
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
1451{
1452 int low;
1453
1454 if (scanning_global_lru(sc))
1455 low = inactive_anon_is_low_global(zone);
1456 else
1457 low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup);
1458 return low;
1459}
1460
1461static int inactive_file_is_low_global(struct zone *zone)
1462{
1463 unsigned long active, inactive;
1464
1465 active = zone_page_state(zone, NR_ACTIVE_FILE);
1466 inactive = zone_page_state(zone, NR_INACTIVE_FILE);
1467
1468 return (active > inactive);
1469}
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
1487{
1488 int low;
1489
1490 if (scanning_global_lru(sc))
1491 low = inactive_file_is_low_global(zone);
1492 else
1493 low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup);
1494 return low;
1495}
1496
1497static int inactive_list_is_low(struct zone *zone, struct scan_control *sc,
1498 int file)
1499{
1500 if (file)
1501 return inactive_file_is_low(zone, sc);
1502 else
1503 return inactive_anon_is_low(zone, sc);
1504}
1505
1506static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1507 struct zone *zone, struct scan_control *sc, int priority)
1508{
1509 int file = is_file_lru(lru);
1510
1511 if (is_active_lru(lru)) {
1512 if (inactive_list_is_low(zone, sc, file))
1513 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1514 return 0;
1515 }
1516
1517 return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
1518}
1519
1520
1521
1522
1523
1524static unsigned long nr_scan_try_batch(unsigned long nr_to_scan,
1525 unsigned long *nr_saved_scan)
1526{
1527 unsigned long nr;
1528
1529 *nr_saved_scan += nr_to_scan;
1530 nr = *nr_saved_scan;
1531
1532 if (nr >= SWAP_CLUSTER_MAX)
1533 *nr_saved_scan = 0;
1534 else
1535 nr = 0;
1536
1537 return nr;
1538}
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548static void get_scan_count(struct zone *zone, struct scan_control *sc,
1549 unsigned long *nr, int priority)
1550{
1551 unsigned long anon, file, free;
1552 unsigned long anon_prio, file_prio;
1553 unsigned long ap, fp;
1554 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1555 u64 fraction[2], denominator;
1556 enum lru_list l;
1557 int noswap = 0;
1558
1559
1560 if (!sc->may_swap || (nr_swap_pages <= 0)) {
1561 noswap = 1;
1562 fraction[0] = 0;
1563 fraction[1] = 1;
1564 denominator = 1;
1565 goto out;
1566 }
1567
1568 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
1569 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
1570 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
1571 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
1572
1573 if (scanning_global_lru(sc)) {
1574 free = zone_page_state(zone, NR_FREE_PAGES);
1575
1576
1577 if (unlikely(file + free <= high_wmark_pages(zone))) {
1578 fraction[0] = 1;
1579 fraction[1] = 0;
1580 denominator = 1;
1581 goto out;
1582 }
1583 }
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
1597 spin_lock_irq(&zone->lru_lock);
1598 reclaim_stat->recent_scanned[0] /= 2;
1599 reclaim_stat->recent_rotated[0] /= 2;
1600 spin_unlock_irq(&zone->lru_lock);
1601 }
1602
1603 if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) {
1604 spin_lock_irq(&zone->lru_lock);
1605 reclaim_stat->recent_scanned[1] /= 2;
1606 reclaim_stat->recent_rotated[1] /= 2;
1607 spin_unlock_irq(&zone->lru_lock);
1608 }
1609
1610
1611
1612
1613
1614 anon_prio = sc->swappiness;
1615 file_prio = 200 - sc->swappiness;
1616
1617
1618
1619
1620
1621
1622 ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);
1623 ap /= reclaim_stat->recent_rotated[0] + 1;
1624
1625 fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
1626 fp /= reclaim_stat->recent_rotated[1] + 1;
1627
1628 fraction[0] = ap;
1629 fraction[1] = fp;
1630 denominator = ap + fp + 1;
1631out:
1632 for_each_evictable_lru(l) {
1633 int file = is_file_lru(l);
1634 unsigned long scan;
1635
1636 scan = zone_nr_lru_pages(zone, sc, l);
1637 if (priority || noswap) {
1638 scan >>= priority;
1639 scan = div64_u64(scan * fraction[file], denominator);
1640 }
1641 nr[l] = nr_scan_try_batch(scan,
1642 &reclaim_stat->nr_saved_scan[l]);
1643 }
1644}
1645
1646static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc)
1647{
1648
1649
1650
1651
1652
1653 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
1654 sc->lumpy_reclaim_mode = 1;
1655 else if (sc->order && priority < DEF_PRIORITY - 2)
1656 sc->lumpy_reclaim_mode = 1;
1657 else
1658 sc->lumpy_reclaim_mode = 0;
1659}
1660
1661
1662
1663
1664static void shrink_zone(int priority, struct zone *zone,
1665 struct scan_control *sc)
1666{
1667 unsigned long nr[NR_LRU_LISTS];
1668 unsigned long nr_to_scan;
1669 enum lru_list l;
1670 unsigned long nr_reclaimed = sc->nr_reclaimed;
1671 unsigned long nr_to_reclaim = sc->nr_to_reclaim;
1672
1673 get_scan_count(zone, sc, nr, priority);
1674
1675 set_lumpy_reclaim_mode(priority, sc);
1676
1677 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
1678 nr[LRU_INACTIVE_FILE]) {
1679 for_each_evictable_lru(l) {
1680 if (nr[l]) {
1681 nr_to_scan = min_t(unsigned long,
1682 nr[l], SWAP_CLUSTER_MAX);
1683 nr[l] -= nr_to_scan;
1684
1685 nr_reclaimed += shrink_list(l, nr_to_scan,
1686 zone, sc, priority);
1687 }
1688 }
1689
1690
1691
1692
1693
1694
1695
1696
1697 if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY)
1698 break;
1699 }
1700
1701 sc->nr_reclaimed = nr_reclaimed;
1702
1703
1704
1705
1706
1707 if (inactive_anon_is_low(zone, sc) && nr_swap_pages > 0)
1708 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1709
1710 throttle_vm_writeout(sc->gfp_mask);
1711}
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729static bool shrink_zones(int priority, struct zonelist *zonelist,
1730 struct scan_control *sc)
1731{
1732 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1733 struct zoneref *z;
1734 struct zone *zone;
1735 bool all_unreclaimable = true;
1736
1737 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
1738 sc->nodemask) {
1739 if (!populated_zone(zone))
1740 continue;
1741
1742
1743
1744
1745 if (scanning_global_lru(sc)) {
1746 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1747 continue;
1748 note_zone_scanning_priority(zone, priority);
1749
1750 if (zone->all_unreclaimable && priority != DEF_PRIORITY)
1751 continue;
1752 } else {
1753
1754
1755
1756
1757 mem_cgroup_note_reclaim_priority(sc->mem_cgroup,
1758 priority);
1759 }
1760
1761 shrink_zone(priority, zone, sc);
1762 all_unreclaimable = false;
1763 }
1764 return all_unreclaimable;
1765}
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1784 struct scan_control *sc)
1785{
1786 int priority;
1787 bool all_unreclaimable;
1788 unsigned long total_scanned = 0;
1789 struct reclaim_state *reclaim_state = current->reclaim_state;
1790 unsigned long lru_pages = 0;
1791 struct zoneref *z;
1792 struct zone *zone;
1793 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1794 unsigned long writeback_threshold;
1795
1796 get_mems_allowed();
1797 delayacct_freepages_start();
1798
1799 if (scanning_global_lru(sc))
1800 count_vm_event(ALLOCSTALL);
1801
1802
1803
1804 if (scanning_global_lru(sc)) {
1805 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1806
1807 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1808 continue;
1809
1810 lru_pages += zone_reclaimable_pages(zone);
1811 }
1812 }
1813
1814 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
1815 sc->nr_scanned = 0;
1816 if (!priority)
1817 disable_swap_token();
1818 all_unreclaimable = shrink_zones(priority, zonelist, sc);
1819
1820
1821
1822
1823 if (scanning_global_lru(sc)) {
1824 shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
1825 if (reclaim_state) {
1826 sc->nr_reclaimed += reclaim_state->reclaimed_slab;
1827 reclaim_state->reclaimed_slab = 0;
1828 }
1829 }
1830 total_scanned += sc->nr_scanned;
1831 if (sc->nr_reclaimed >= sc->nr_to_reclaim)
1832 goto out;
1833
1834
1835
1836
1837
1838
1839
1840
1841 writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
1842 if (total_scanned > writeback_threshold) {
1843 wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
1844 sc->may_writepage = 1;
1845 }
1846
1847
1848 if (!sc->hibernation_mode && sc->nr_scanned &&
1849 priority < DEF_PRIORITY - 2)
1850 congestion_wait(BLK_RW_ASYNC, HZ/10);
1851 }
1852
1853out:
1854
1855
1856
1857
1858
1859
1860
1861 if (priority < 0)
1862 priority = 0;
1863
1864 if (scanning_global_lru(sc)) {
1865 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1866
1867 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1868 continue;
1869
1870 zone->prev_priority = priority;
1871 }
1872 } else
1873 mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
1874
1875 delayacct_freepages_end();
1876 put_mems_allowed();
1877
1878 if (sc->nr_reclaimed)
1879 return sc->nr_reclaimed;
1880
1881
1882 if (scanning_global_lru(sc) && !all_unreclaimable)
1883 return 1;
1884
1885 return 0;
1886}
1887
1888unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
1889 gfp_t gfp_mask, nodemask_t *nodemask)
1890{
1891 struct scan_control sc = {
1892 .gfp_mask = gfp_mask,
1893 .may_writepage = !laptop_mode,
1894 .nr_to_reclaim = SWAP_CLUSTER_MAX,
1895 .may_unmap = 1,
1896 .may_swap = 1,
1897 .swappiness = vm_swappiness,
1898 .order = order,
1899 .mem_cgroup = NULL,
1900 .nodemask = nodemask,
1901 };
1902
1903 return do_try_to_free_pages(zonelist, &sc);
1904}
1905
1906#ifdef CONFIG_CGROUP_MEM_RES_CTLR
1907
1908unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
1909 gfp_t gfp_mask, bool noswap,
1910 unsigned int swappiness,
1911 struct zone *zone, int nid)
1912{
1913 struct scan_control sc = {
1914 .may_writepage = !laptop_mode,
1915 .may_unmap = 1,
1916 .may_swap = !noswap,
1917 .swappiness = swappiness,
1918 .order = 0,
1919 .mem_cgroup = mem,
1920 };
1921 nodemask_t nm = nodemask_of_node(nid);
1922
1923 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1924 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
1925 sc.nodemask = &nm;
1926 sc.nr_reclaimed = 0;
1927 sc.nr_scanned = 0;
1928
1929
1930
1931
1932
1933
1934
1935 shrink_zone(0, zone, &sc);
1936 return sc.nr_reclaimed;
1937}
1938
1939unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1940 gfp_t gfp_mask,
1941 bool noswap,
1942 unsigned int swappiness)
1943{
1944 struct zonelist *zonelist;
1945 struct scan_control sc = {
1946 .may_writepage = !laptop_mode,
1947 .may_unmap = 1,
1948 .may_swap = !noswap,
1949 .nr_to_reclaim = SWAP_CLUSTER_MAX,
1950 .swappiness = swappiness,
1951 .order = 0,
1952 .mem_cgroup = mem_cont,
1953 .nodemask = NULL,
1954 };
1955
1956 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1957 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
1958 zonelist = NODE_DATA(numa_node_id())->node_zonelists;
1959 return do_try_to_free_pages(zonelist, &sc);
1960}
1961#endif
1962
1963
1964static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
1965{
1966 int i;
1967
1968
1969 if (remaining)
1970 return 1;
1971
1972
1973 for (i = 0; i < pgdat->nr_zones; i++) {
1974 struct zone *zone = pgdat->node_zones + i;
1975
1976 if (!populated_zone(zone))
1977 continue;
1978
1979 if (zone->all_unreclaimable)
1980 continue;
1981
1982 if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
1983 0, 0))
1984 return 1;
1985 }
1986
1987 return 0;
1988}
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
2012{
2013 int all_zones_ok;
2014 int priority;
2015 int i;
2016 unsigned long total_scanned;
2017 struct reclaim_state *reclaim_state = current->reclaim_state;
2018 struct scan_control sc = {
2019 .gfp_mask = GFP_KERNEL,
2020 .may_unmap = 1,
2021 .may_swap = 1,
2022
2023
2024
2025
2026 .nr_to_reclaim = ULONG_MAX,
2027 .swappiness = vm_swappiness,
2028 .order = order,
2029 .mem_cgroup = NULL,
2030 };
2031
2032
2033
2034
2035
2036 int temp_priority[MAX_NR_ZONES];
2037
2038loop_again:
2039 total_scanned = 0;
2040 sc.nr_reclaimed = 0;
2041 sc.may_writepage = !laptop_mode;
2042 count_vm_event(PAGEOUTRUN);
2043
2044 for (i = 0; i < pgdat->nr_zones; i++)
2045 temp_priority[i] = DEF_PRIORITY;
2046
2047 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
2048 int end_zone = 0;
2049 unsigned long lru_pages = 0;
2050 int has_under_min_watermark_zone = 0;
2051
2052
2053 if (!priority)
2054 disable_swap_token();
2055
2056 all_zones_ok = 1;
2057
2058
2059
2060
2061
2062 for (i = pgdat->nr_zones - 1; i >= 0; i--) {
2063 struct zone *zone = pgdat->node_zones + i;
2064
2065 if (!populated_zone(zone))
2066 continue;
2067
2068 if (zone->all_unreclaimable && priority != DEF_PRIORITY)
2069 continue;
2070
2071
2072
2073
2074
2075 if (inactive_anon_is_low(zone, &sc))
2076 shrink_active_list(SWAP_CLUSTER_MAX, zone,
2077 &sc, priority, 0);
2078
2079 if (!zone_watermark_ok(zone, order,
2080 high_wmark_pages(zone), 0, 0)) {
2081 end_zone = i;
2082 break;
2083 }
2084 }
2085 if (i < 0)
2086 goto out;
2087
2088 for (i = 0; i <= end_zone; i++) {
2089 struct zone *zone = pgdat->node_zones + i;
2090
2091 lru_pages += zone_reclaimable_pages(zone);
2092 }
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103 for (i = 0; i <= end_zone; i++) {
2104 struct zone *zone = pgdat->node_zones + i;
2105 int nr_slab;
2106 int nid, zid;
2107
2108 if (!populated_zone(zone))
2109 continue;
2110
2111 if (zone->all_unreclaimable && priority != DEF_PRIORITY)
2112 continue;
2113
2114 temp_priority[i] = priority;
2115 sc.nr_scanned = 0;
2116 note_zone_scanning_priority(zone, priority);
2117
2118 nid = pgdat->node_id;
2119 zid = zone_idx(zone);
2120
2121
2122
2123
2124 mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
2125 nid, zid);
2126
2127
2128
2129
2130 if (!zone_watermark_ok(zone, order,
2131 8*high_wmark_pages(zone), end_zone, 0))
2132 shrink_zone(priority, zone, &sc);
2133 reclaim_state->reclaimed_slab = 0;
2134 nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
2135 lru_pages);
2136 sc.nr_reclaimed += reclaim_state->reclaimed_slab;
2137 total_scanned += sc.nr_scanned;
2138 if (zone->all_unreclaimable)
2139 continue;
2140 if (nr_slab == 0 &&
2141 zone->pages_scanned >= (zone_reclaimable_pages(zone) * 6))
2142 zone->all_unreclaimable = 1;
2143
2144
2145
2146
2147
2148 if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
2149 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
2150 sc.may_writepage = 1;
2151
2152 if (!zone_watermark_ok(zone, order,
2153 high_wmark_pages(zone), end_zone, 0)) {
2154 all_zones_ok = 0;
2155
2156
2157
2158
2159
2160 if (!zone_watermark_ok(zone, order,
2161 min_wmark_pages(zone), end_zone, 0))
2162 has_under_min_watermark_zone = 1;
2163 }
2164
2165 }
2166 if (all_zones_ok)
2167 break;
2168
2169
2170
2171
2172 if (total_scanned && (priority < DEF_PRIORITY - 2)) {
2173 if (has_under_min_watermark_zone)
2174 count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
2175 else
2176 congestion_wait(BLK_RW_ASYNC, HZ/10);
2177 }
2178
2179
2180
2181
2182
2183
2184
2185 if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
2186 break;
2187 }
2188out:
2189
2190
2191
2192
2193
2194 for (i = 0; i < pgdat->nr_zones; i++) {
2195 struct zone *zone = pgdat->node_zones + i;
2196
2197 zone->prev_priority = temp_priority[i];
2198 }
2199 if (!all_zones_ok) {
2200 cond_resched();
2201
2202 try_to_freeze();
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218 if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
2219 order = sc.order = 0;
2220
2221 goto loop_again;
2222 }
2223
2224 return sc.nr_reclaimed;
2225}
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240static int kswapd(void *p)
2241{
2242 unsigned long order;
2243 pg_data_t *pgdat = (pg_data_t*)p;
2244 struct task_struct *tsk = current;
2245 DEFINE_WAIT(wait);
2246 struct reclaim_state reclaim_state = {
2247 .reclaimed_slab = 0,
2248 };
2249 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
2250
2251 lockdep_set_current_reclaim_state(GFP_KERNEL);
2252
2253 if (!cpumask_empty(cpumask))
2254 set_cpus_allowed_ptr(tsk, cpumask);
2255 current->reclaim_state = &reclaim_state;
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2270 set_freezable();
2271
2272 order = 0;
2273 for ( ; ; ) {
2274 unsigned long new_order;
2275 int ret;
2276
2277 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
2278 new_order = pgdat->kswapd_max_order;
2279 pgdat->kswapd_max_order = 0;
2280 if (order < new_order) {
2281
2282
2283
2284
2285 order = new_order;
2286 } else {
2287 if (!freezing(current) && !kthread_should_stop()) {
2288 long remaining = 0;
2289
2290
2291 if (!sleeping_prematurely(pgdat, order, remaining)) {
2292 remaining = schedule_timeout(HZ/10);
2293 finish_wait(&pgdat->kswapd_wait, &wait);
2294 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
2295 }
2296
2297
2298
2299
2300
2301
2302 if (!sleeping_prematurely(pgdat, order, remaining))
2303 schedule();
2304 else {
2305 if (remaining)
2306 count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
2307 else
2308 count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY);
2309 }
2310 }
2311
2312 order = pgdat->kswapd_max_order;
2313 }
2314 finish_wait(&pgdat->kswapd_wait, &wait);
2315
2316 ret = try_to_freeze();
2317 if (kthread_should_stop())
2318 break;
2319
2320
2321
2322
2323
2324 if (!ret)
2325 balance_pgdat(pgdat, order);
2326 }
2327 return 0;
2328}
2329
2330
2331
2332
2333void wakeup_kswapd(struct zone *zone, int order)
2334{
2335 pg_data_t *pgdat;
2336
2337 if (!populated_zone(zone))
2338 return;
2339
2340 pgdat = zone->zone_pgdat;
2341 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
2342 return;
2343 if (pgdat->kswapd_max_order < order)
2344 pgdat->kswapd_max_order = order;
2345 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
2346 return;
2347 if (!waitqueue_active(&pgdat->kswapd_wait))
2348 return;
2349 wake_up_interruptible(&pgdat->kswapd_wait);
2350}
2351
2352
2353
2354
2355
2356
2357
2358
2359unsigned long global_reclaimable_pages(void)
2360{
2361 int nr;
2362
2363 nr = global_page_state(NR_ACTIVE_FILE) +
2364 global_page_state(NR_INACTIVE_FILE);
2365
2366 if (nr_swap_pages > 0)
2367 nr += global_page_state(NR_ACTIVE_ANON) +
2368 global_page_state(NR_INACTIVE_ANON);
2369
2370 return nr;
2371}
2372
2373unsigned long zone_reclaimable_pages(struct zone *zone)
2374{
2375 int nr;
2376
2377 nr = zone_page_state(zone, NR_ACTIVE_FILE) +
2378 zone_page_state(zone, NR_INACTIVE_FILE);
2379
2380 if (nr_swap_pages > 0)
2381 nr += zone_page_state(zone, NR_ACTIVE_ANON) +
2382 zone_page_state(zone, NR_INACTIVE_ANON);
2383
2384 return nr;
2385}
2386
2387#ifdef CONFIG_HIBERNATION
2388
2389
2390
2391
2392
2393
2394
2395
2396unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
2397{
2398 struct reclaim_state reclaim_state;
2399 struct scan_control sc = {
2400 .gfp_mask = GFP_HIGHUSER_MOVABLE,
2401 .may_swap = 1,
2402 .may_unmap = 1,
2403 .may_writepage = 1,
2404 .nr_to_reclaim = nr_to_reclaim,
2405 .hibernation_mode = 1,
2406 .swappiness = vm_swappiness,
2407 .order = 0,
2408 };
2409 struct zonelist * zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
2410 struct task_struct *p = current;
2411 unsigned long nr_reclaimed;
2412
2413 p->flags |= PF_MEMALLOC;
2414 lockdep_set_current_reclaim_state(sc.gfp_mask);
2415 reclaim_state.reclaimed_slab = 0;
2416 p->reclaim_state = &reclaim_state;
2417
2418 nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
2419
2420 p->reclaim_state = NULL;
2421 lockdep_clear_current_reclaim_state();
2422 p->flags &= ~PF_MEMALLOC;
2423
2424 return nr_reclaimed;
2425}
2426#endif
2427
2428
2429
2430
2431
2432static int __devinit cpu_callback(struct notifier_block *nfb,
2433 unsigned long action, void *hcpu)
2434{
2435 int nid;
2436
2437 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
2438 for_each_node_state(nid, N_HIGH_MEMORY) {
2439 pg_data_t *pgdat = NODE_DATA(nid);
2440 const struct cpumask *mask;
2441
2442 mask = cpumask_of_node(pgdat->node_id);
2443
2444 if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
2445
2446 set_cpus_allowed_ptr(pgdat->kswapd, mask);
2447 }
2448 }
2449 return NOTIFY_OK;
2450}
2451
2452
2453
2454
2455
2456int kswapd_run(int nid)
2457{
2458 pg_data_t *pgdat = NODE_DATA(nid);
2459 int ret = 0;
2460
2461 if (pgdat->kswapd)
2462 return 0;
2463
2464 pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
2465 if (IS_ERR(pgdat->kswapd)) {
2466
2467 BUG_ON(system_state == SYSTEM_BOOTING);
2468 printk("Failed to start kswapd on node %d\n",nid);
2469 ret = -1;
2470 }
2471 return ret;
2472}
2473
2474
2475
2476
2477void kswapd_stop(int nid)
2478{
2479 struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
2480
2481 if (kswapd)
2482 kthread_stop(kswapd);
2483}
2484
2485static int __init kswapd_init(void)
2486{
2487 int nid;
2488
2489 swap_setup();
2490 for_each_node_state(nid, N_HIGH_MEMORY)
2491 kswapd_run(nid);
2492 hotcpu_notifier(cpu_callback, 0);
2493 return 0;
2494}
2495
2496module_init(kswapd_init)
2497
2498#ifdef CONFIG_NUMA
2499
2500
2501
2502
2503
2504
2505int zone_reclaim_mode __read_mostly;
2506
2507#define RECLAIM_OFF 0
2508#define RECLAIM_ZONE (1<<0)
2509#define RECLAIM_WRITE (1<<1)
2510#define RECLAIM_SWAP (1<<2)
2511
2512
2513
2514
2515
2516
2517#define ZONE_RECLAIM_PRIORITY 4
2518
2519
2520
2521
2522
2523int sysctl_min_unmapped_ratio = 1;
2524
2525
2526
2527
2528
2529int sysctl_min_slab_ratio = 5;
2530
2531static inline unsigned long zone_unmapped_file_pages(struct zone *zone)
2532{
2533 unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED);
2534 unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) +
2535 zone_page_state(zone, NR_ACTIVE_FILE);
2536
2537
2538
2539
2540
2541
2542 return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0;
2543}
2544
2545
2546static long zone_pagecache_reclaimable(struct zone *zone)
2547{
2548 long nr_pagecache_reclaimable;
2549 long delta = 0;
2550
2551
2552
2553
2554
2555
2556
2557 if (zone_reclaim_mode & RECLAIM_SWAP)
2558 nr_pagecache_reclaimable = zone_page_state(zone, NR_FILE_PAGES);
2559 else
2560 nr_pagecache_reclaimable = zone_unmapped_file_pages(zone);
2561
2562
2563 if (!(zone_reclaim_mode & RECLAIM_WRITE))
2564 delta += zone_page_state(zone, NR_FILE_DIRTY);
2565
2566
2567 if (unlikely(delta > nr_pagecache_reclaimable))
2568 delta = nr_pagecache_reclaimable;
2569
2570 return nr_pagecache_reclaimable - delta;
2571}
2572
2573
2574
2575
2576static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2577{
2578
2579 const unsigned long nr_pages = 1 << order;
2580 struct task_struct *p = current;
2581 struct reclaim_state reclaim_state;
2582 int priority;
2583 struct scan_control sc = {
2584 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
2585 .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
2586 .may_swap = 1,
2587 .nr_to_reclaim = max_t(unsigned long, nr_pages,
2588 SWAP_CLUSTER_MAX),
2589 .gfp_mask = gfp_mask,
2590 .swappiness = vm_swappiness,
2591 .order = order,
2592 };
2593 unsigned long slab_reclaimable;
2594
2595 disable_swap_token();
2596 cond_resched();
2597
2598
2599
2600
2601
2602 p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
2603 lockdep_set_current_reclaim_state(gfp_mask);
2604 reclaim_state.reclaimed_slab = 0;
2605 p->reclaim_state = &reclaim_state;
2606
2607 if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) {
2608
2609
2610
2611
2612 priority = ZONE_RECLAIM_PRIORITY;
2613 do {
2614 note_zone_scanning_priority(zone, priority);
2615 shrink_zone(priority, zone, &sc);
2616 priority--;
2617 } while (priority >= 0 && sc.nr_reclaimed < nr_pages);
2618 }
2619
2620 slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
2621 if (slab_reclaimable > zone->min_slab_pages) {
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632 while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
2633 zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
2634 slab_reclaimable - nr_pages)
2635 ;
2636
2637
2638
2639
2640
2641 sc.nr_reclaimed += slab_reclaimable -
2642 zone_page_state(zone, NR_SLAB_RECLAIMABLE);
2643 }
2644
2645 p->reclaim_state = NULL;
2646 current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
2647 lockdep_clear_current_reclaim_state();
2648 return sc.nr_reclaimed >= nr_pages;
2649}
2650
2651int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2652{
2653 int node_id;
2654 int ret;
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666 if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages &&
2667 zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
2668 return ZONE_RECLAIM_FULL;
2669
2670 if (zone->all_unreclaimable)
2671 return ZONE_RECLAIM_FULL;
2672
2673
2674
2675
2676 if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
2677 return ZONE_RECLAIM_NOSCAN;
2678
2679
2680
2681
2682
2683
2684
2685 node_id = zone_to_nid(zone);
2686 if (node_state(node_id, N_CPU) && node_id != numa_node_id())
2687 return ZONE_RECLAIM_NOSCAN;
2688
2689 if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
2690 return ZONE_RECLAIM_NOSCAN;
2691
2692 ret = __zone_reclaim(zone, gfp_mask, order);
2693 zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
2694
2695 if (!ret)
2696 count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
2697
2698 return ret;
2699}
2700#endif
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716int page_evictable(struct page *page, struct vm_area_struct *vma)
2717{
2718
2719 if (mapping_unevictable(page_mapping(page)))
2720 return 0;
2721
2722 if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page)))
2723 return 0;
2724
2725 return 1;
2726}
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739static void check_move_unevictable_page(struct page *page, struct zone *zone)
2740{
2741 VM_BUG_ON(PageActive(page));
2742
2743retry:
2744 ClearPageUnevictable(page);
2745 if (page_evictable(page, NULL)) {
2746 enum lru_list l = page_lru_base_type(page);
2747
2748 __dec_zone_state(zone, NR_UNEVICTABLE);
2749 list_move(&page->lru, &zone->lru[l].list);
2750 mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
2751 __inc_zone_state(zone, NR_INACTIVE_ANON + l);
2752 __count_vm_event(UNEVICTABLE_PGRESCUED);
2753 } else {
2754
2755
2756
2757 SetPageUnevictable(page);
2758 list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
2759 mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
2760 if (page_evictable(page, NULL))
2761 goto retry;
2762 }
2763}
2764
2765
2766
2767
2768
2769
2770
2771
2772void scan_mapping_unevictable_pages(struct address_space *mapping)
2773{
2774 pgoff_t next = 0;
2775 pgoff_t end = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
2776 PAGE_CACHE_SHIFT;
2777 struct zone *zone;
2778 struct pagevec pvec;
2779
2780 if (mapping->nrpages == 0)
2781 return;
2782
2783 pagevec_init(&pvec, 0);
2784 while (next < end &&
2785 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
2786 int i;
2787 int pg_scanned = 0;
2788
2789 zone = NULL;
2790
2791 for (i = 0; i < pagevec_count(&pvec); i++) {
2792 struct page *page = pvec.pages[i];
2793 pgoff_t page_index = page->index;
2794 struct zone *pagezone = page_zone(page);
2795
2796 pg_scanned++;
2797 if (page_index > next)
2798 next = page_index;
2799 next++;
2800
2801 if (pagezone != zone) {
2802 if (zone)
2803 spin_unlock_irq(&zone->lru_lock);
2804 zone = pagezone;
2805 spin_lock_irq(&zone->lru_lock);
2806 }
2807
2808 if (PageLRU(page) && PageUnevictable(page))
2809 check_move_unevictable_page(page, zone);
2810 }
2811 if (zone)
2812 spin_unlock_irq(&zone->lru_lock);
2813 pagevec_release(&pvec);
2814
2815 count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
2816 }
2817
2818}
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL
2831static void scan_zone_unevictable_pages(struct zone *zone)
2832{
2833 struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list;
2834 unsigned long scan;
2835 unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE);
2836
2837 while (nr_to_scan > 0) {
2838 unsigned long batch_size = min(nr_to_scan,
2839 SCAN_UNEVICTABLE_BATCH_SIZE);
2840
2841 spin_lock_irq(&zone->lru_lock);
2842 for (scan = 0; scan < batch_size; scan++) {
2843 struct page *page = lru_to_page(l_unevictable);
2844
2845 if (!trylock_page(page))
2846 continue;
2847
2848 prefetchw_prev_lru_page(page, l_unevictable, flags);
2849
2850 if (likely(PageLRU(page) && PageUnevictable(page)))
2851 check_move_unevictable_page(page, zone);
2852
2853 unlock_page(page);
2854 }
2855 spin_unlock_irq(&zone->lru_lock);
2856
2857 nr_to_scan -= batch_size;
2858 }
2859}
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873static void scan_all_zones_unevictable_pages(void)
2874{
2875 struct zone *zone;
2876
2877 for_each_zone(zone) {
2878 scan_zone_unevictable_pages(zone);
2879 }
2880}
2881
2882
2883
2884
2885
2886unsigned long scan_unevictable_pages;
2887
2888int scan_unevictable_handler(struct ctl_table *table, int write,
2889 void __user *buffer,
2890 size_t *length, loff_t *ppos)
2891{
2892 proc_doulongvec_minmax(table, write, buffer, length, ppos);
2893
2894 if (write && *(unsigned long *)table->data)
2895 scan_all_zones_unevictable_pages();
2896
2897 scan_unevictable_pages = 0;
2898 return 0;
2899}
2900
2901
2902
2903
2904
2905
2906static ssize_t read_scan_unevictable_node(struct sys_device *dev,
2907 struct sysdev_attribute *attr,
2908 char *buf)
2909{
2910 return sprintf(buf, "0\n");
2911}
2912
2913static ssize_t write_scan_unevictable_node(struct sys_device *dev,
2914 struct sysdev_attribute *attr,
2915 const char *buf, size_t count)
2916{
2917 struct zone *node_zones = NODE_DATA(dev->id)->node_zones;
2918 struct zone *zone;
2919 unsigned long res;
2920 unsigned long req = strict_strtoul(buf, 10, &res);
2921
2922 if (!req)
2923 return 1;
2924
2925 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
2926 if (!populated_zone(zone))
2927 continue;
2928 scan_zone_unevictable_pages(zone);
2929 }
2930 return 1;
2931}
2932
2933
2934static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
2935 read_scan_unevictable_node,
2936 write_scan_unevictable_node);
2937
2938int scan_unevictable_register_node(struct node *node)
2939{
2940 return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
2941}
2942
2943void scan_unevictable_unregister_node(struct node *node)
2944{
2945 sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
2946}
2947
2948