1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/mm.h>
15#include <linux/module.h>
16#include <linux/gfp.h>
17#include <linux/kernel_stat.h>
18#include <linux/swap.h>
19#include <linux/pagemap.h>
20#include <linux/init.h>
21#include <linux/highmem.h>
22#include <linux/vmstat.h>
23#include <linux/file.h>
24#include <linux/writeback.h>
25#include <linux/blkdev.h>
26#include <linux/buffer_head.h>
27
28#include <linux/mm_inline.h>
29#include <linux/pagevec.h>
30#include <linux/backing-dev.h>
31#include <linux/rmap.h>
32#include <linux/topology.h>
33#include <linux/cpu.h>
34#include <linux/cpuset.h>
35#include <linux/notifier.h>
36#include <linux/rwsem.h>
37#include <linux/delay.h>
38#include <linux/kthread.h>
39#include <linux/freezer.h>
40#include <linux/memcontrol.h>
41#include <linux/delayacct.h>
42#include <linux/sysctl.h>
43
44#include <asm/tlbflush.h>
45#include <asm/div64.h>
46
47#include <linux/swapops.h>
48
49#include "internal.h"
50
51#define CREATE_TRACE_POINTS
52#include <trace/events/vmscan.h>
53
54enum lumpy_mode {
55 LUMPY_MODE_NONE,
56 LUMPY_MODE_ASYNC,
57 LUMPY_MODE_SYNC,
58};
59
60struct scan_control {
61
62 unsigned long nr_scanned;
63
64
65 unsigned long nr_reclaimed;
66
67
68 unsigned long nr_to_reclaim;
69
70 unsigned long hibernation_mode;
71
72
73 gfp_t gfp_mask;
74
75 int may_writepage;
76
77
78 int may_unmap;
79
80
81 int may_swap;
82
83 int swappiness;
84
85 int order;
86
87
88
89
90
91 enum lumpy_mode lumpy_reclaim_mode;
92
93
94 struct mem_cgroup *mem_cgroup;
95
96
97
98
99
100 nodemask_t *nodemask;
101};
102
103#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
104
105#ifdef ARCH_HAS_PREFETCH
106#define prefetch_prev_lru_page(_page, _base, _field) \
107 do { \
108 if ((_page)->lru.prev != _base) { \
109 struct page *prev; \
110 \
111 prev = lru_to_page(&(_page->lru)); \
112 prefetch(&prev->_field); \
113 } \
114 } while (0)
115#else
116#define prefetch_prev_lru_page(_page, _base, _field) do { } while (0)
117#endif
118
119#ifdef ARCH_HAS_PREFETCHW
120#define prefetchw_prev_lru_page(_page, _base, _field) \
121 do { \
122 if ((_page)->lru.prev != _base) { \
123 struct page *prev; \
124 \
125 prev = lru_to_page(&(_page->lru)); \
126 prefetchw(&prev->_field); \
127 } \
128 } while (0)
129#else
130#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
131#endif
132
133
134
135
136int vm_swappiness = 60;
137long vm_total_pages;
138
139static LIST_HEAD(shrinker_list);
140static DECLARE_RWSEM(shrinker_rwsem);
141
142#ifdef CONFIG_CGROUP_MEM_RES_CTLR
143#define scanning_global_lru(sc) (!(sc)->mem_cgroup)
144#else
145#define scanning_global_lru(sc) (1)
146#endif
147
148static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
149 struct scan_control *sc)
150{
151 if (!scanning_global_lru(sc))
152 return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);
153
154 return &zone->reclaim_stat;
155}
156
157static unsigned long zone_nr_lru_pages(struct zone *zone,
158 struct scan_control *sc, enum lru_list lru)
159{
160 if (!scanning_global_lru(sc))
161 return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
162
163 return zone_page_state(zone, NR_LRU_BASE + lru);
164}
165
166
167
168
169
170void register_shrinker(struct shrinker *shrinker)
171{
172 shrinker->nr = 0;
173 down_write(&shrinker_rwsem);
174 list_add_tail(&shrinker->list, &shrinker_list);
175 up_write(&shrinker_rwsem);
176}
177EXPORT_SYMBOL(register_shrinker);
178
179
180
181
182void unregister_shrinker(struct shrinker *shrinker)
183{
184 down_write(&shrinker_rwsem);
185 list_del(&shrinker->list);
186 up_write(&shrinker_rwsem);
187}
188EXPORT_SYMBOL(unregister_shrinker);
189
190#define SHRINK_BATCH 128
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
211 unsigned long lru_pages)
212{
213 struct shrinker *shrinker;
214 unsigned long ret = 0;
215
216 if (scanned == 0)
217 scanned = SWAP_CLUSTER_MAX;
218
219 if (!down_read_trylock(&shrinker_rwsem))
220 return 1;
221
222 list_for_each_entry(shrinker, &shrinker_list, list) {
223 unsigned long long delta;
224 unsigned long total_scan;
225 unsigned long max_pass;
226
227 max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
228 delta = (4 * scanned) / shrinker->seeks;
229 delta *= max_pass;
230 do_div(delta, lru_pages + 1);
231 shrinker->nr += delta;
232 if (shrinker->nr < 0) {
233 printk(KERN_ERR "shrink_slab: %pF negative objects to "
234 "delete nr=%ld\n",
235 shrinker->shrink, shrinker->nr);
236 shrinker->nr = max_pass;
237 }
238
239
240
241
242
243
244 if (shrinker->nr > max_pass * 2)
245 shrinker->nr = max_pass * 2;
246
247 total_scan = shrinker->nr;
248 shrinker->nr = 0;
249
250 while (total_scan >= SHRINK_BATCH) {
251 long this_scan = SHRINK_BATCH;
252 int shrink_ret;
253 int nr_before;
254
255 nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
256 shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
257 gfp_mask);
258 if (shrink_ret == -1)
259 break;
260 if (shrink_ret < nr_before)
261 ret += nr_before - shrink_ret;
262 count_vm_events(SLABS_SCANNED, this_scan);
263 total_scan -= this_scan;
264
265 cond_resched();
266 }
267
268 shrinker->nr += total_scan;
269 }
270 up_read(&shrinker_rwsem);
271 return ret;
272}
273
274static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc,
275 bool sync)
276{
277 enum lumpy_mode mode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC;
278
279
280
281
282
283 if (sync && sc->lumpy_reclaim_mode == LUMPY_MODE_NONE)
284 return;
285
286
287
288
289
290
291 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
292 sc->lumpy_reclaim_mode = mode;
293 else if (sc->order && priority < DEF_PRIORITY - 2)
294 sc->lumpy_reclaim_mode = mode;
295 else
296 sc->lumpy_reclaim_mode = LUMPY_MODE_NONE;
297}
298
299static void disable_lumpy_reclaim_mode(struct scan_control *sc)
300{
301 sc->lumpy_reclaim_mode = LUMPY_MODE_NONE;
302}
303
304static inline int is_page_cache_freeable(struct page *page)
305{
306
307
308
309
310
311 return page_count(page) - page_has_private(page) == 2;
312}
313
314static int may_write_to_queue(struct backing_dev_info *bdi,
315 struct scan_control *sc)
316{
317 if (current->flags & PF_SWAPWRITE)
318 return 1;
319 if (!bdi_write_congested(bdi))
320 return 1;
321 if (bdi == current->backing_dev_info)
322 return 1;
323
324
325 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
326 return 1;
327 return 0;
328}
329
330
331
332
333
334
335
336
337
338
339
340
341
342static void handle_write_error(struct address_space *mapping,
343 struct page *page, int error)
344{
345 lock_page_nosync(page);
346 if (page_mapping(page) == mapping)
347 mapping_set_error(mapping, error);
348 unlock_page(page);
349}
350
351
352typedef enum {
353
354 PAGE_KEEP,
355
356 PAGE_ACTIVATE,
357
358 PAGE_SUCCESS,
359
360 PAGE_CLEAN,
361} pageout_t;
362
363
364
365
366
367static pageout_t pageout(struct page *page, struct address_space *mapping,
368 struct scan_control *sc)
369{
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386 if (!is_page_cache_freeable(page))
387 return PAGE_KEEP;
388 if (!mapping) {
389
390
391
392
393 if (page_has_private(page)) {
394 if (try_to_free_buffers(page)) {
395 ClearPageDirty(page);
396 printk("%s: orphaned page\n", __func__);
397 return PAGE_CLEAN;
398 }
399 }
400 return PAGE_KEEP;
401 }
402 if (mapping->a_ops->writepage == NULL)
403 return PAGE_ACTIVATE;
404 if (!may_write_to_queue(mapping->backing_dev_info, sc))
405 return PAGE_KEEP;
406
407 if (clear_page_dirty_for_io(page)) {
408 int res;
409 struct writeback_control wbc = {
410 .sync_mode = WB_SYNC_NONE,
411 .nr_to_write = SWAP_CLUSTER_MAX,
412 .range_start = 0,
413 .range_end = LLONG_MAX,
414 .for_reclaim = 1,
415 };
416
417 SetPageReclaim(page);
418 res = mapping->a_ops->writepage(page, &wbc);
419 if (res < 0)
420 handle_write_error(mapping, page, res);
421 if (res == AOP_WRITEPAGE_ACTIVATE) {
422 ClearPageReclaim(page);
423 return PAGE_ACTIVATE;
424 }
425
426
427
428
429
430
431 if (PageWriteback(page) &&
432 sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC)
433 wait_on_page_writeback(page);
434
435 if (!PageWriteback(page)) {
436
437 ClearPageReclaim(page);
438 }
439 trace_mm_vmscan_writepage(page,
440 trace_reclaim_flags(page, sc->lumpy_reclaim_mode));
441 inc_zone_page_state(page, NR_VMSCAN_WRITE);
442 return PAGE_SUCCESS;
443 }
444
445 return PAGE_CLEAN;
446}
447
448
449
450
451
452static int __remove_mapping(struct address_space *mapping, struct page *page)
453{
454 BUG_ON(!PageLocked(page));
455 BUG_ON(mapping != page_mapping(page));
456
457 spin_lock_irq(&mapping->tree_lock);
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483 if (!page_freeze_refs(page, 2))
484 goto cannot_free;
485
486 if (unlikely(PageDirty(page))) {
487 page_unfreeze_refs(page, 2);
488 goto cannot_free;
489 }
490
491 if (PageSwapCache(page)) {
492 swp_entry_t swap = { .val = page_private(page) };
493 __delete_from_swap_cache(page);
494 spin_unlock_irq(&mapping->tree_lock);
495 swapcache_free(swap, page);
496 } else {
497 void (*freepage)(struct page *);
498
499 freepage = mapping->a_ops->freepage;
500
501 __remove_from_page_cache(page);
502 spin_unlock_irq(&mapping->tree_lock);
503 mem_cgroup_uncharge_cache_page(page);
504
505 if (freepage != NULL)
506 freepage(page);
507 }
508
509 return 1;
510
511cannot_free:
512 spin_unlock_irq(&mapping->tree_lock);
513 return 0;
514}
515
516
517
518
519
520
521
522int remove_mapping(struct address_space *mapping, struct page *page)
523{
524 if (__remove_mapping(mapping, page)) {
525
526
527
528
529
530 page_unfreeze_refs(page, 1);
531 return 1;
532 }
533 return 0;
534}
535
536
537
538
539
540
541
542
543
544
545void putback_lru_page(struct page *page)
546{
547 int lru;
548 int active = !!TestClearPageActive(page);
549 int was_unevictable = PageUnevictable(page);
550
551 VM_BUG_ON(PageLRU(page));
552
553redo:
554 ClearPageUnevictable(page);
555
556 if (page_evictable(page, NULL)) {
557
558
559
560
561
562
563 lru = active + page_lru_base_type(page);
564 lru_cache_add_lru(page, lru);
565 } else {
566
567
568
569
570 lru = LRU_UNEVICTABLE;
571 add_page_to_unevictable_list(page);
572
573
574
575
576
577
578
579
580
581 smp_mb();
582 }
583
584
585
586
587
588
589 if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) {
590 if (!isolate_lru_page(page)) {
591 put_page(page);
592 goto redo;
593 }
594
595
596
597
598 }
599
600 if (was_unevictable && lru != LRU_UNEVICTABLE)
601 count_vm_event(UNEVICTABLE_PGRESCUED);
602 else if (!was_unevictable && lru == LRU_UNEVICTABLE)
603 count_vm_event(UNEVICTABLE_PGCULLED);
604
605 put_page(page);
606}
607
608enum page_references {
609 PAGEREF_RECLAIM,
610 PAGEREF_RECLAIM_CLEAN,
611 PAGEREF_KEEP,
612 PAGEREF_ACTIVATE,
613};
614
615static enum page_references page_check_references(struct page *page,
616 struct scan_control *sc)
617{
618 int referenced_ptes, referenced_page;
619 unsigned long vm_flags;
620
621 referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags);
622 referenced_page = TestClearPageReferenced(page);
623
624
625 if (sc->lumpy_reclaim_mode != LUMPY_MODE_NONE)
626 return PAGEREF_RECLAIM;
627
628
629
630
631
632 if (vm_flags & VM_LOCKED)
633 return PAGEREF_RECLAIM;
634
635 if (referenced_ptes) {
636 if (PageAnon(page))
637 return PAGEREF_ACTIVATE;
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652 SetPageReferenced(page);
653
654 if (referenced_page)
655 return PAGEREF_ACTIVATE;
656
657 return PAGEREF_KEEP;
658 }
659
660
661 if (referenced_page && !PageSwapBacked(page))
662 return PAGEREF_RECLAIM_CLEAN;
663
664 return PAGEREF_RECLAIM;
665}
666
667static noinline_for_stack void free_page_list(struct list_head *free_pages)
668{
669 struct pagevec freed_pvec;
670 struct page *page, *tmp;
671
672 pagevec_init(&freed_pvec, 1);
673
674 list_for_each_entry_safe(page, tmp, free_pages, lru) {
675 list_del(&page->lru);
676 if (!pagevec_add(&freed_pvec, page)) {
677 __pagevec_free(&freed_pvec);
678 pagevec_reinit(&freed_pvec);
679 }
680 }
681
682 pagevec_free(&freed_pvec);
683}
684
685
686
687
688static unsigned long shrink_page_list(struct list_head *page_list,
689 struct zone *zone,
690 struct scan_control *sc)
691{
692 LIST_HEAD(ret_pages);
693 LIST_HEAD(free_pages);
694 int pgactivate = 0;
695 unsigned long nr_dirty = 0;
696 unsigned long nr_congested = 0;
697 unsigned long nr_reclaimed = 0;
698
699 cond_resched();
700
701 while (!list_empty(page_list)) {
702 enum page_references references;
703 struct address_space *mapping;
704 struct page *page;
705 int may_enter_fs;
706
707 cond_resched();
708
709 page = lru_to_page(page_list);
710 list_del(&page->lru);
711
712 if (!trylock_page(page))
713 goto keep;
714
715 VM_BUG_ON(PageActive(page));
716 VM_BUG_ON(page_zone(page) != zone);
717
718 sc->nr_scanned++;
719
720 if (unlikely(!page_evictable(page, NULL)))
721 goto cull_mlocked;
722
723 if (!sc->may_unmap && page_mapped(page))
724 goto keep_locked;
725
726
727 if (page_mapped(page) || PageSwapCache(page))
728 sc->nr_scanned++;
729
730 may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
731 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
732
733 if (PageWriteback(page)) {
734
735
736
737
738
739
740
741
742 if (sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC &&
743 may_enter_fs)
744 wait_on_page_writeback(page);
745 else {
746 unlock_page(page);
747 goto keep_lumpy;
748 }
749 }
750
751 references = page_check_references(page, sc);
752 switch (references) {
753 case PAGEREF_ACTIVATE:
754 goto activate_locked;
755 case PAGEREF_KEEP:
756 goto keep_locked;
757 case PAGEREF_RECLAIM:
758 case PAGEREF_RECLAIM_CLEAN:
759 ;
760 }
761
762
763
764
765
766 if (PageAnon(page) && !PageSwapCache(page)) {
767 if (!(sc->gfp_mask & __GFP_IO))
768 goto keep_locked;
769 if (!add_to_swap(page))
770 goto activate_locked;
771 may_enter_fs = 1;
772 }
773
774 mapping = page_mapping(page);
775
776
777
778
779
780 if (page_mapped(page) && mapping) {
781 switch (try_to_unmap(page, TTU_UNMAP)) {
782 case SWAP_FAIL:
783 goto activate_locked;
784 case SWAP_AGAIN:
785 goto keep_locked;
786 case SWAP_MLOCK:
787 goto cull_mlocked;
788 case SWAP_SUCCESS:
789 ;
790 }
791 }
792
793 if (PageDirty(page)) {
794 nr_dirty++;
795
796 if (references == PAGEREF_RECLAIM_CLEAN)
797 goto keep_locked;
798 if (!may_enter_fs)
799 goto keep_locked;
800 if (!sc->may_writepage)
801 goto keep_locked;
802
803
804 switch (pageout(page, mapping, sc)) {
805 case PAGE_KEEP:
806 nr_congested++;
807 goto keep_locked;
808 case PAGE_ACTIVATE:
809 goto activate_locked;
810 case PAGE_SUCCESS:
811 if (PageWriteback(page))
812 goto keep_lumpy;
813 if (PageDirty(page))
814 goto keep;
815
816
817
818
819
820 if (!trylock_page(page))
821 goto keep;
822 if (PageDirty(page) || PageWriteback(page))
823 goto keep_locked;
824 mapping = page_mapping(page);
825 case PAGE_CLEAN:
826 ;
827 }
828 }
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851 if (page_has_private(page)) {
852 if (!try_to_release_page(page, sc->gfp_mask))
853 goto activate_locked;
854 if (!mapping && page_count(page) == 1) {
855 unlock_page(page);
856 if (put_page_testzero(page))
857 goto free_it;
858 else {
859
860
861
862
863
864
865
866 nr_reclaimed++;
867 continue;
868 }
869 }
870 }
871
872 if (!mapping || !__remove_mapping(mapping, page))
873 goto keep_locked;
874
875
876
877
878
879
880
881
882 __clear_page_locked(page);
883free_it:
884 nr_reclaimed++;
885
886
887
888
889
890 list_add(&page->lru, &free_pages);
891 continue;
892
893cull_mlocked:
894 if (PageSwapCache(page))
895 try_to_free_swap(page);
896 unlock_page(page);
897 putback_lru_page(page);
898 disable_lumpy_reclaim_mode(sc);
899 continue;
900
901activate_locked:
902
903 if (PageSwapCache(page) && vm_swap_full())
904 try_to_free_swap(page);
905 VM_BUG_ON(PageActive(page));
906 SetPageActive(page);
907 pgactivate++;
908keep_locked:
909 unlock_page(page);
910keep:
911 disable_lumpy_reclaim_mode(sc);
912keep_lumpy:
913 list_add(&page->lru, &ret_pages);
914 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
915 }
916
917
918
919
920
921
922
923 if (nr_dirty == nr_congested && nr_dirty != 0)
924 zone_set_flag(zone, ZONE_CONGESTED);
925
926 free_page_list(&free_pages);
927
928 list_splice(&ret_pages, page_list);
929 count_vm_events(PGACTIVATE, pgactivate);
930 return nr_reclaimed;
931}
932
933
934
935
936
937
938
939
940
941
942
943int __isolate_lru_page(struct page *page, int mode, int file)
944{
945 int ret = -EINVAL;
946
947
948 if (!PageLRU(page))
949 return ret;
950
951
952
953
954
955
956 if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
957 return ret;
958
959 if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)
960 return ret;
961
962
963
964
965
966
967 if (PageUnevictable(page))
968 return ret;
969
970 ret = -EBUSY;
971
972 if (likely(get_page_unless_zero(page))) {
973
974
975
976
977
978 ClearPageLRU(page);
979 ret = 0;
980 }
981
982 return ret;
983}
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1006 struct list_head *src, struct list_head *dst,
1007 unsigned long *scanned, int order, int mode, int file)
1008{
1009 unsigned long nr_taken = 0;
1010 unsigned long nr_lumpy_taken = 0;
1011 unsigned long nr_lumpy_dirty = 0;
1012 unsigned long nr_lumpy_failed = 0;
1013 unsigned long scan;
1014
1015 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
1016 struct page *page;
1017 unsigned long pfn;
1018 unsigned long end_pfn;
1019 unsigned long page_pfn;
1020 int zone_id;
1021
1022 page = lru_to_page(src);
1023 prefetchw_prev_lru_page(page, src, flags);
1024
1025 VM_BUG_ON(!PageLRU(page));
1026
1027 switch (__isolate_lru_page(page, mode, file)) {
1028 case 0:
1029 list_move(&page->lru, dst);
1030 mem_cgroup_del_lru(page);
1031 nr_taken++;
1032 break;
1033
1034 case -EBUSY:
1035
1036 list_move(&page->lru, src);
1037 mem_cgroup_rotate_lru_list(page, page_lru(page));
1038 continue;
1039
1040 default:
1041 BUG();
1042 }
1043
1044 if (!order)
1045 continue;
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056 zone_id = page_zone_id(page);
1057 page_pfn = page_to_pfn(page);
1058 pfn = page_pfn & ~((1 << order) - 1);
1059 end_pfn = pfn + (1 << order);
1060 for (; pfn < end_pfn; pfn++) {
1061 struct page *cursor_page;
1062
1063
1064 if (unlikely(pfn == page_pfn))
1065 continue;
1066
1067
1068 if (unlikely(!pfn_valid_within(pfn)))
1069 break;
1070
1071 cursor_page = pfn_to_page(pfn);
1072
1073
1074 if (unlikely(page_zone_id(cursor_page) != zone_id))
1075 break;
1076
1077
1078
1079
1080
1081
1082 if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
1083 !PageSwapCache(cursor_page))
1084 break;
1085
1086 if (__isolate_lru_page(cursor_page, mode, file) == 0) {
1087 list_move(&cursor_page->lru, dst);
1088 mem_cgroup_del_lru(cursor_page);
1089 nr_taken++;
1090 nr_lumpy_taken++;
1091 if (PageDirty(cursor_page))
1092 nr_lumpy_dirty++;
1093 scan++;
1094 } else {
1095
1096 if (!page_count(cursor_page))
1097 continue;
1098 break;
1099 }
1100 }
1101
1102
1103 if (pfn < end_pfn)
1104 nr_lumpy_failed++;
1105 }
1106
1107 *scanned = scan;
1108
1109 trace_mm_vmscan_lru_isolate(order,
1110 nr_to_scan, scan,
1111 nr_taken,
1112 nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
1113 mode);
1114 return nr_taken;
1115}
1116
1117static unsigned long isolate_pages_global(unsigned long nr,
1118 struct list_head *dst,
1119 unsigned long *scanned, int order,
1120 int mode, struct zone *z,
1121 int active, int file)
1122{
1123 int lru = LRU_BASE;
1124 if (active)
1125 lru += LRU_ACTIVE;
1126 if (file)
1127 lru += LRU_FILE;
1128 return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
1129 mode, file);
1130}
1131
1132
1133
1134
1135
1136static unsigned long clear_active_flags(struct list_head *page_list,
1137 unsigned int *count)
1138{
1139 int nr_active = 0;
1140 int lru;
1141 struct page *page;
1142
1143 list_for_each_entry(page, page_list, lru) {
1144 lru = page_lru_base_type(page);
1145 if (PageActive(page)) {
1146 lru += LRU_ACTIVE;
1147 ClearPageActive(page);
1148 nr_active++;
1149 }
1150 if (count)
1151 count[lru]++;
1152 }
1153
1154 return nr_active;
1155}
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182int isolate_lru_page(struct page *page)
1183{
1184 int ret = -EBUSY;
1185
1186 if (PageLRU(page)) {
1187 struct zone *zone = page_zone(page);
1188
1189 spin_lock_irq(&zone->lru_lock);
1190 if (PageLRU(page) && get_page_unless_zero(page)) {
1191 int lru = page_lru(page);
1192 ret = 0;
1193 ClearPageLRU(page);
1194
1195 del_page_from_lru_list(zone, page, lru);
1196 }
1197 spin_unlock_irq(&zone->lru_lock);
1198 }
1199 return ret;
1200}
1201
1202
1203
1204
1205static int too_many_isolated(struct zone *zone, int file,
1206 struct scan_control *sc)
1207{
1208 unsigned long inactive, isolated;
1209
1210 if (current_is_kswapd())
1211 return 0;
1212
1213 if (!scanning_global_lru(sc))
1214 return 0;
1215
1216 if (file) {
1217 inactive = zone_page_state(zone, NR_INACTIVE_FILE);
1218 isolated = zone_page_state(zone, NR_ISOLATED_FILE);
1219 } else {
1220 inactive = zone_page_state(zone, NR_INACTIVE_ANON);
1221 isolated = zone_page_state(zone, NR_ISOLATED_ANON);
1222 }
1223
1224 return isolated > inactive;
1225}
1226
1227
1228
1229
1230static noinline_for_stack void
1231putback_lru_pages(struct zone *zone, struct scan_control *sc,
1232 unsigned long nr_anon, unsigned long nr_file,
1233 struct list_head *page_list)
1234{
1235 struct page *page;
1236 struct pagevec pvec;
1237 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1238
1239 pagevec_init(&pvec, 1);
1240
1241
1242
1243
1244 spin_lock(&zone->lru_lock);
1245 while (!list_empty(page_list)) {
1246 int lru;
1247 page = lru_to_page(page_list);
1248 VM_BUG_ON(PageLRU(page));
1249 list_del(&page->lru);
1250 if (unlikely(!page_evictable(page, NULL))) {
1251 spin_unlock_irq(&zone->lru_lock);
1252 putback_lru_page(page);
1253 spin_lock_irq(&zone->lru_lock);
1254 continue;
1255 }
1256 SetPageLRU(page);
1257 lru = page_lru(page);
1258 add_page_to_lru_list(zone, page, lru);
1259 if (is_active_lru(lru)) {
1260 int file = is_file_lru(lru);
1261 reclaim_stat->recent_rotated[file]++;
1262 }
1263 if (!pagevec_add(&pvec, page)) {
1264 spin_unlock_irq(&zone->lru_lock);
1265 __pagevec_release(&pvec);
1266 spin_lock_irq(&zone->lru_lock);
1267 }
1268 }
1269 __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
1270 __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
1271
1272 spin_unlock_irq(&zone->lru_lock);
1273 pagevec_release(&pvec);
1274}
1275
1276static noinline_for_stack void update_isolated_counts(struct zone *zone,
1277 struct scan_control *sc,
1278 unsigned long *nr_anon,
1279 unsigned long *nr_file,
1280 struct list_head *isolated_list)
1281{
1282 unsigned long nr_active;
1283 unsigned int count[NR_LRU_LISTS] = { 0, };
1284 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1285
1286 nr_active = clear_active_flags(isolated_list, count);
1287 __count_vm_events(PGDEACTIVATE, nr_active);
1288
1289 __mod_zone_page_state(zone, NR_ACTIVE_FILE,
1290 -count[LRU_ACTIVE_FILE]);
1291 __mod_zone_page_state(zone, NR_INACTIVE_FILE,
1292 -count[LRU_INACTIVE_FILE]);
1293 __mod_zone_page_state(zone, NR_ACTIVE_ANON,
1294 -count[LRU_ACTIVE_ANON]);
1295 __mod_zone_page_state(zone, NR_INACTIVE_ANON,
1296 -count[LRU_INACTIVE_ANON]);
1297
1298 *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
1299 *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
1300 __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon);
1301 __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file);
1302
1303 reclaim_stat->recent_scanned[0] += *nr_anon;
1304 reclaim_stat->recent_scanned[1] += *nr_file;
1305}
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315static inline bool should_reclaim_stall(unsigned long nr_taken,
1316 unsigned long nr_freed,
1317 int priority,
1318 struct scan_control *sc)
1319{
1320 int lumpy_stall_priority;
1321
1322
1323 if (current_is_kswapd())
1324 return false;
1325
1326
1327 if (sc->lumpy_reclaim_mode == LUMPY_MODE_NONE)
1328 return false;
1329
1330
1331 if (nr_freed == nr_taken)
1332 return false;
1333
1334
1335
1336
1337
1338
1339
1340 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
1341 lumpy_stall_priority = DEF_PRIORITY;
1342 else
1343 lumpy_stall_priority = DEF_PRIORITY / 3;
1344
1345 return priority <= lumpy_stall_priority;
1346}
1347
1348
1349
1350
1351
1352static noinline_for_stack unsigned long
1353shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1354 struct scan_control *sc, int priority, int file)
1355{
1356 LIST_HEAD(page_list);
1357 unsigned long nr_scanned;
1358 unsigned long nr_reclaimed = 0;
1359 unsigned long nr_taken;
1360 unsigned long nr_anon;
1361 unsigned long nr_file;
1362
1363 while (unlikely(too_many_isolated(zone, file, sc))) {
1364 congestion_wait(BLK_RW_ASYNC, HZ/10);
1365
1366
1367 if (fatal_signal_pending(current))
1368 return SWAP_CLUSTER_MAX;
1369 }
1370
1371 set_lumpy_reclaim_mode(priority, sc, false);
1372 lru_add_drain();
1373 spin_lock_irq(&zone->lru_lock);
1374
1375 if (scanning_global_lru(sc)) {
1376 nr_taken = isolate_pages_global(nr_to_scan,
1377 &page_list, &nr_scanned, sc->order,
1378 sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ?
1379 ISOLATE_INACTIVE : ISOLATE_BOTH,
1380 zone, 0, file);
1381 zone->pages_scanned += nr_scanned;
1382 if (current_is_kswapd())
1383 __count_zone_vm_events(PGSCAN_KSWAPD, zone,
1384 nr_scanned);
1385 else
1386 __count_zone_vm_events(PGSCAN_DIRECT, zone,
1387 nr_scanned);
1388 } else {
1389 nr_taken = mem_cgroup_isolate_pages(nr_to_scan,
1390 &page_list, &nr_scanned, sc->order,
1391 sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ?
1392 ISOLATE_INACTIVE : ISOLATE_BOTH,
1393 zone, sc->mem_cgroup,
1394 0, file);
1395
1396
1397
1398
1399 }
1400
1401 if (nr_taken == 0) {
1402 spin_unlock_irq(&zone->lru_lock);
1403 return 0;
1404 }
1405
1406 update_isolated_counts(zone, sc, &nr_anon, &nr_file, &page_list);
1407
1408 spin_unlock_irq(&zone->lru_lock);
1409
1410 nr_reclaimed = shrink_page_list(&page_list, zone, sc);
1411
1412
1413 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
1414 set_lumpy_reclaim_mode(priority, sc, true);
1415 nr_reclaimed += shrink_page_list(&page_list, zone, sc);
1416 }
1417
1418 local_irq_disable();
1419 if (current_is_kswapd())
1420 __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
1421 __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
1422
1423 putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list);
1424
1425 trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
1426 zone_idx(zone),
1427 nr_scanned, nr_reclaimed,
1428 priority,
1429 trace_shrink_flags(file, sc->lumpy_reclaim_mode));
1430 return nr_reclaimed;
1431}
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451static void move_active_pages_to_lru(struct zone *zone,
1452 struct list_head *list,
1453 enum lru_list lru)
1454{
1455 unsigned long pgmoved = 0;
1456 struct pagevec pvec;
1457 struct page *page;
1458
1459 pagevec_init(&pvec, 1);
1460
1461 while (!list_empty(list)) {
1462 page = lru_to_page(list);
1463
1464 VM_BUG_ON(PageLRU(page));
1465 SetPageLRU(page);
1466
1467 list_move(&page->lru, &zone->lru[lru].list);
1468 mem_cgroup_add_lru_list(page, lru);
1469 pgmoved++;
1470
1471 if (!pagevec_add(&pvec, page) || list_empty(list)) {
1472 spin_unlock_irq(&zone->lru_lock);
1473 if (buffer_heads_over_limit)
1474 pagevec_strip(&pvec);
1475 __pagevec_release(&pvec);
1476 spin_lock_irq(&zone->lru_lock);
1477 }
1478 }
1479 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
1480 if (!is_active_lru(lru))
1481 __count_vm_events(PGDEACTIVATE, pgmoved);
1482}
1483
1484static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1485 struct scan_control *sc, int priority, int file)
1486{
1487 unsigned long nr_taken;
1488 unsigned long pgscanned;
1489 unsigned long vm_flags;
1490 LIST_HEAD(l_hold);
1491 LIST_HEAD(l_active);
1492 LIST_HEAD(l_inactive);
1493 struct page *page;
1494 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1495 unsigned long nr_rotated = 0;
1496
1497 lru_add_drain();
1498 spin_lock_irq(&zone->lru_lock);
1499 if (scanning_global_lru(sc)) {
1500 nr_taken = isolate_pages_global(nr_pages, &l_hold,
1501 &pgscanned, sc->order,
1502 ISOLATE_ACTIVE, zone,
1503 1, file);
1504 zone->pages_scanned += pgscanned;
1505 } else {
1506 nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold,
1507 &pgscanned, sc->order,
1508 ISOLATE_ACTIVE, zone,
1509 sc->mem_cgroup, 1, file);
1510
1511
1512
1513
1514 }
1515
1516 reclaim_stat->recent_scanned[file] += nr_taken;
1517
1518 __count_zone_vm_events(PGREFILL, zone, pgscanned);
1519 if (file)
1520 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
1521 else
1522 __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
1523 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
1524 spin_unlock_irq(&zone->lru_lock);
1525
1526 while (!list_empty(&l_hold)) {
1527 cond_resched();
1528 page = lru_to_page(&l_hold);
1529 list_del(&page->lru);
1530
1531 if (unlikely(!page_evictable(page, NULL))) {
1532 putback_lru_page(page);
1533 continue;
1534 }
1535
1536 if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
1537 nr_rotated++;
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547 if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) {
1548 list_add(&page->lru, &l_active);
1549 continue;
1550 }
1551 }
1552
1553 ClearPageActive(page);
1554 list_add(&page->lru, &l_inactive);
1555 }
1556
1557
1558
1559
1560 spin_lock_irq(&zone->lru_lock);
1561
1562
1563
1564
1565
1566
1567 reclaim_stat->recent_rotated[file] += nr_rotated;
1568
1569 move_active_pages_to_lru(zone, &l_active,
1570 LRU_ACTIVE + file * LRU_FILE);
1571 move_active_pages_to_lru(zone, &l_inactive,
1572 LRU_BASE + file * LRU_FILE);
1573 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
1574 spin_unlock_irq(&zone->lru_lock);
1575}
1576
1577#ifdef CONFIG_SWAP
1578static int inactive_anon_is_low_global(struct zone *zone)
1579{
1580 unsigned long active, inactive;
1581
1582 active = zone_page_state(zone, NR_ACTIVE_ANON);
1583 inactive = zone_page_state(zone, NR_INACTIVE_ANON);
1584
1585 if (inactive * zone->inactive_ratio < active)
1586 return 1;
1587
1588 return 0;
1589}
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
1600{
1601 int low;
1602
1603
1604
1605
1606
1607 if (!total_swap_pages)
1608 return 0;
1609
1610 if (scanning_global_lru(sc))
1611 low = inactive_anon_is_low_global(zone);
1612 else
1613 low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup);
1614 return low;
1615}
1616#else
1617static inline int inactive_anon_is_low(struct zone *zone,
1618 struct scan_control *sc)
1619{
1620 return 0;
1621}
1622#endif
1623
1624static int inactive_file_is_low_global(struct zone *zone)
1625{
1626 unsigned long active, inactive;
1627
1628 active = zone_page_state(zone, NR_ACTIVE_FILE);
1629 inactive = zone_page_state(zone, NR_INACTIVE_FILE);
1630
1631 return (active > inactive);
1632}
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
1650{
1651 int low;
1652
1653 if (scanning_global_lru(sc))
1654 low = inactive_file_is_low_global(zone);
1655 else
1656 low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup);
1657 return low;
1658}
1659
1660static int inactive_list_is_low(struct zone *zone, struct scan_control *sc,
1661 int file)
1662{
1663 if (file)
1664 return inactive_file_is_low(zone, sc);
1665 else
1666 return inactive_anon_is_low(zone, sc);
1667}
1668
1669static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1670 struct zone *zone, struct scan_control *sc, int priority)
1671{
1672 int file = is_file_lru(lru);
1673
1674 if (is_active_lru(lru)) {
1675 if (inactive_list_is_low(zone, sc, file))
1676 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1677 return 0;
1678 }
1679
1680 return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
1681}
1682
1683
1684
1685
1686
1687static unsigned long nr_scan_try_batch(unsigned long nr_to_scan,
1688 unsigned long *nr_saved_scan)
1689{
1690 unsigned long nr;
1691
1692 *nr_saved_scan += nr_to_scan;
1693 nr = *nr_saved_scan;
1694
1695 if (nr >= SWAP_CLUSTER_MAX)
1696 *nr_saved_scan = 0;
1697 else
1698 nr = 0;
1699
1700 return nr;
1701}
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711static void get_scan_count(struct zone *zone, struct scan_control *sc,
1712 unsigned long *nr, int priority)
1713{
1714 unsigned long anon, file, free;
1715 unsigned long anon_prio, file_prio;
1716 unsigned long ap, fp;
1717 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1718 u64 fraction[2], denominator;
1719 enum lru_list l;
1720 int noswap = 0;
1721
1722
1723 if (!sc->may_swap || (nr_swap_pages <= 0)) {
1724 noswap = 1;
1725 fraction[0] = 0;
1726 fraction[1] = 1;
1727 denominator = 1;
1728 goto out;
1729 }
1730
1731 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
1732 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
1733 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
1734 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
1735
1736 if (scanning_global_lru(sc)) {
1737 free = zone_page_state(zone, NR_FREE_PAGES);
1738
1739
1740 if (unlikely(file + free <= high_wmark_pages(zone))) {
1741 fraction[0] = 1;
1742 fraction[1] = 0;
1743 denominator = 1;
1744 goto out;
1745 }
1746 }
1747
1748
1749
1750
1751
1752 anon_prio = sc->swappiness;
1753 file_prio = 200 - sc->swappiness;
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766 spin_lock_irq(&zone->lru_lock);
1767 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
1768 reclaim_stat->recent_scanned[0] /= 2;
1769 reclaim_stat->recent_rotated[0] /= 2;
1770 }
1771
1772 if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) {
1773 reclaim_stat->recent_scanned[1] /= 2;
1774 reclaim_stat->recent_rotated[1] /= 2;
1775 }
1776
1777
1778
1779
1780
1781
1782 ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);
1783 ap /= reclaim_stat->recent_rotated[0] + 1;
1784
1785 fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
1786 fp /= reclaim_stat->recent_rotated[1] + 1;
1787 spin_unlock_irq(&zone->lru_lock);
1788
1789 fraction[0] = ap;
1790 fraction[1] = fp;
1791 denominator = ap + fp + 1;
1792out:
1793 for_each_evictable_lru(l) {
1794 int file = is_file_lru(l);
1795 unsigned long scan;
1796
1797 scan = zone_nr_lru_pages(zone, sc, l);
1798 if (priority || noswap) {
1799 scan >>= priority;
1800 scan = div64_u64(scan * fraction[file], denominator);
1801 }
1802 nr[l] = nr_scan_try_batch(scan,
1803 &reclaim_stat->nr_saved_scan[l]);
1804 }
1805}
1806
1807
1808
1809
1810static void shrink_zone(int priority, struct zone *zone,
1811 struct scan_control *sc)
1812{
1813 unsigned long nr[NR_LRU_LISTS];
1814 unsigned long nr_to_scan;
1815 enum lru_list l;
1816 unsigned long nr_reclaimed = sc->nr_reclaimed;
1817 unsigned long nr_to_reclaim = sc->nr_to_reclaim;
1818
1819 get_scan_count(zone, sc, nr, priority);
1820
1821 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
1822 nr[LRU_INACTIVE_FILE]) {
1823 for_each_evictable_lru(l) {
1824 if (nr[l]) {
1825 nr_to_scan = min_t(unsigned long,
1826 nr[l], SWAP_CLUSTER_MAX);
1827 nr[l] -= nr_to_scan;
1828
1829 nr_reclaimed += shrink_list(l, nr_to_scan,
1830 zone, sc, priority);
1831 }
1832 }
1833
1834
1835
1836
1837
1838
1839
1840
1841 if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY)
1842 break;
1843 }
1844
1845 sc->nr_reclaimed = nr_reclaimed;
1846
1847
1848
1849
1850
1851 if (inactive_anon_is_low(zone, sc))
1852 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1853
1854 throttle_vm_writeout(sc->gfp_mask);
1855}
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873static void shrink_zones(int priority, struct zonelist *zonelist,
1874 struct scan_control *sc)
1875{
1876 struct zoneref *z;
1877 struct zone *zone;
1878
1879 for_each_zone_zonelist_nodemask(zone, z, zonelist,
1880 gfp_zone(sc->gfp_mask), sc->nodemask) {
1881 if (!populated_zone(zone))
1882 continue;
1883
1884
1885
1886
1887 if (scanning_global_lru(sc)) {
1888 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1889 continue;
1890 if (zone->all_unreclaimable && priority != DEF_PRIORITY)
1891 continue;
1892 }
1893
1894 shrink_zone(priority, zone, sc);
1895 }
1896}
1897
1898static bool zone_reclaimable(struct zone *zone)
1899{
1900 return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
1901}
1902
1903
1904
1905
1906
1907
1908static bool all_unreclaimable(struct zonelist *zonelist,
1909 struct scan_control *sc)
1910{
1911 struct zoneref *z;
1912 struct zone *zone;
1913 bool all_unreclaimable = true;
1914
1915 for_each_zone_zonelist_nodemask(zone, z, zonelist,
1916 gfp_zone(sc->gfp_mask), sc->nodemask) {
1917 if (!populated_zone(zone))
1918 continue;
1919 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1920 continue;
1921 if (zone_reclaimable(zone)) {
1922 all_unreclaimable = false;
1923 break;
1924 }
1925 }
1926
1927 return all_unreclaimable;
1928}
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1947 struct scan_control *sc)
1948{
1949 int priority;
1950 unsigned long total_scanned = 0;
1951 struct reclaim_state *reclaim_state = current->reclaim_state;
1952 struct zoneref *z;
1953 struct zone *zone;
1954 unsigned long writeback_threshold;
1955
1956 get_mems_allowed();
1957 delayacct_freepages_start();
1958
1959 if (scanning_global_lru(sc))
1960 count_vm_event(ALLOCSTALL);
1961
1962 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
1963 sc->nr_scanned = 0;
1964 if (!priority)
1965 disable_swap_token();
1966 shrink_zones(priority, zonelist, sc);
1967
1968
1969
1970
1971 if (scanning_global_lru(sc)) {
1972 unsigned long lru_pages = 0;
1973 for_each_zone_zonelist(zone, z, zonelist,
1974 gfp_zone(sc->gfp_mask)) {
1975 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1976 continue;
1977
1978 lru_pages += zone_reclaimable_pages(zone);
1979 }
1980
1981 shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
1982 if (reclaim_state) {
1983 sc->nr_reclaimed += reclaim_state->reclaimed_slab;
1984 reclaim_state->reclaimed_slab = 0;
1985 }
1986 }
1987 total_scanned += sc->nr_scanned;
1988 if (sc->nr_reclaimed >= sc->nr_to_reclaim)
1989 goto out;
1990
1991
1992
1993
1994
1995
1996
1997
1998 writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
1999 if (total_scanned > writeback_threshold) {
2000 wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
2001 sc->may_writepage = 1;
2002 }
2003
2004
2005 if (!sc->hibernation_mode && sc->nr_scanned &&
2006 priority < DEF_PRIORITY - 2) {
2007 struct zone *preferred_zone;
2008
2009 first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask),
2010 NULL, &preferred_zone);
2011 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10);
2012 }
2013 }
2014
2015out:
2016 delayacct_freepages_end();
2017 put_mems_allowed();
2018
2019 if (sc->nr_reclaimed)
2020 return sc->nr_reclaimed;
2021
2022
2023 if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc))
2024 return 1;
2025
2026 return 0;
2027}
2028
2029unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2030 gfp_t gfp_mask, nodemask_t *nodemask)
2031{
2032 unsigned long nr_reclaimed;
2033 struct scan_control sc = {
2034 .gfp_mask = gfp_mask,
2035 .may_writepage = !laptop_mode,
2036 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2037 .may_unmap = 1,
2038 .may_swap = 1,
2039 .swappiness = vm_swappiness,
2040 .order = order,
2041 .mem_cgroup = NULL,
2042 .nodemask = nodemask,
2043 };
2044
2045 trace_mm_vmscan_direct_reclaim_begin(order,
2046 sc.may_writepage,
2047 gfp_mask);
2048
2049 nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
2050
2051 trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
2052
2053 return nr_reclaimed;
2054}
2055
2056#ifdef CONFIG_CGROUP_MEM_RES_CTLR
2057
2058unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2059 gfp_t gfp_mask, bool noswap,
2060 unsigned int swappiness,
2061 struct zone *zone)
2062{
2063 struct scan_control sc = {
2064 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2065 .may_writepage = !laptop_mode,
2066 .may_unmap = 1,
2067 .may_swap = !noswap,
2068 .swappiness = swappiness,
2069 .order = 0,
2070 .mem_cgroup = mem,
2071 };
2072 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2073 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
2074
2075 trace_mm_vmscan_memcg_softlimit_reclaim_begin(0,
2076 sc.may_writepage,
2077 sc.gfp_mask);
2078
2079
2080
2081
2082
2083
2084
2085
2086 shrink_zone(0, zone, &sc);
2087
2088 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
2089
2090 return sc.nr_reclaimed;
2091}
2092
2093unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2094 gfp_t gfp_mask,
2095 bool noswap,
2096 unsigned int swappiness)
2097{
2098 struct zonelist *zonelist;
2099 unsigned long nr_reclaimed;
2100 struct scan_control sc = {
2101 .may_writepage = !laptop_mode,
2102 .may_unmap = 1,
2103 .may_swap = !noswap,
2104 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2105 .swappiness = swappiness,
2106 .order = 0,
2107 .mem_cgroup = mem_cont,
2108 .nodemask = NULL,
2109 };
2110
2111 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2112 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
2113 zonelist = NODE_DATA(numa_node_id())->node_zonelists;
2114
2115 trace_mm_vmscan_memcg_reclaim_begin(0,
2116 sc.may_writepage,
2117 sc.gfp_mask);
2118
2119 nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
2120
2121 trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
2122
2123 return nr_reclaimed;
2124}
2125#endif
2126
2127
2128static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
2129{
2130 int i;
2131
2132
2133 if (remaining)
2134 return 1;
2135
2136
2137 for (i = 0; i < pgdat->nr_zones; i++) {
2138 struct zone *zone = pgdat->node_zones + i;
2139
2140 if (!populated_zone(zone))
2141 continue;
2142
2143 if (zone->all_unreclaimable)
2144 continue;
2145
2146 if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
2147 0, 0))
2148 return 1;
2149 }
2150
2151 return 0;
2152}
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
2176{
2177 int all_zones_ok;
2178 int priority;
2179 int i;
2180 unsigned long total_scanned;
2181 struct reclaim_state *reclaim_state = current->reclaim_state;
2182 struct scan_control sc = {
2183 .gfp_mask = GFP_KERNEL,
2184 .may_unmap = 1,
2185 .may_swap = 1,
2186
2187
2188
2189
2190 .nr_to_reclaim = ULONG_MAX,
2191 .swappiness = vm_swappiness,
2192 .order = order,
2193 .mem_cgroup = NULL,
2194 };
2195loop_again:
2196 total_scanned = 0;
2197 sc.nr_reclaimed = 0;
2198 sc.may_writepage = !laptop_mode;
2199 count_vm_event(PAGEOUTRUN);
2200
2201 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
2202 int end_zone = 0;
2203 unsigned long lru_pages = 0;
2204 int has_under_min_watermark_zone = 0;
2205
2206
2207 if (!priority)
2208 disable_swap_token();
2209
2210 all_zones_ok = 1;
2211
2212
2213
2214
2215
2216 for (i = pgdat->nr_zones - 1; i >= 0; i--) {
2217 struct zone *zone = pgdat->node_zones + i;
2218
2219 if (!populated_zone(zone))
2220 continue;
2221
2222 if (zone->all_unreclaimable && priority != DEF_PRIORITY)
2223 continue;
2224
2225
2226
2227
2228
2229 if (inactive_anon_is_low(zone, &sc))
2230 shrink_active_list(SWAP_CLUSTER_MAX, zone,
2231 &sc, priority, 0);
2232
2233 if (!zone_watermark_ok(zone, order,
2234 high_wmark_pages(zone), 0, 0)) {
2235 end_zone = i;
2236 break;
2237 }
2238 }
2239 if (i < 0)
2240 goto out;
2241
2242 for (i = 0; i <= end_zone; i++) {
2243 struct zone *zone = pgdat->node_zones + i;
2244
2245 lru_pages += zone_reclaimable_pages(zone);
2246 }
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257 for (i = 0; i <= end_zone; i++) {
2258 struct zone *zone = pgdat->node_zones + i;
2259 int nr_slab;
2260
2261 if (!populated_zone(zone))
2262 continue;
2263
2264 if (zone->all_unreclaimable && priority != DEF_PRIORITY)
2265 continue;
2266
2267 sc.nr_scanned = 0;
2268
2269
2270
2271
2272
2273 mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
2274
2275
2276
2277
2278
2279 if (!zone_watermark_ok(zone, order,
2280 8*high_wmark_pages(zone), end_zone, 0))
2281 shrink_zone(priority, zone, &sc);
2282 reclaim_state->reclaimed_slab = 0;
2283 nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
2284 lru_pages);
2285 sc.nr_reclaimed += reclaim_state->reclaimed_slab;
2286 total_scanned += sc.nr_scanned;
2287 if (zone->all_unreclaimable)
2288 continue;
2289 if (nr_slab == 0 && !zone_reclaimable(zone))
2290 zone->all_unreclaimable = 1;
2291
2292
2293
2294
2295
2296 if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
2297 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
2298 sc.may_writepage = 1;
2299
2300 if (!zone_watermark_ok(zone, order,
2301 high_wmark_pages(zone), end_zone, 0)) {
2302 all_zones_ok = 0;
2303
2304
2305
2306
2307
2308 if (!zone_watermark_ok(zone, order,
2309 min_wmark_pages(zone), end_zone, 0))
2310 has_under_min_watermark_zone = 1;
2311 } else {
2312
2313
2314
2315
2316
2317
2318
2319 zone_clear_flag(zone, ZONE_CONGESTED);
2320 }
2321
2322 }
2323 if (all_zones_ok)
2324 break;
2325
2326
2327
2328
2329 if (total_scanned && (priority < DEF_PRIORITY - 2)) {
2330 if (has_under_min_watermark_zone)
2331 count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
2332 else
2333 congestion_wait(BLK_RW_ASYNC, HZ/10);
2334 }
2335
2336
2337
2338
2339
2340
2341
2342 if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
2343 break;
2344 }
2345out:
2346 if (!all_zones_ok) {
2347 cond_resched();
2348
2349 try_to_freeze();
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365 if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
2366 order = sc.order = 0;
2367
2368 goto loop_again;
2369 }
2370
2371 return sc.nr_reclaimed;
2372}
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387static int kswapd(void *p)
2388{
2389 unsigned long order;
2390 pg_data_t *pgdat = (pg_data_t*)p;
2391 struct task_struct *tsk = current;
2392 DEFINE_WAIT(wait);
2393 struct reclaim_state reclaim_state = {
2394 .reclaimed_slab = 0,
2395 };
2396 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
2397
2398 lockdep_set_current_reclaim_state(GFP_KERNEL);
2399
2400 if (!cpumask_empty(cpumask))
2401 set_cpus_allowed_ptr(tsk, cpumask);
2402 current->reclaim_state = &reclaim_state;
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2417 set_freezable();
2418
2419 order = 0;
2420 for ( ; ; ) {
2421 unsigned long new_order;
2422 int ret;
2423
2424 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
2425 new_order = pgdat->kswapd_max_order;
2426 pgdat->kswapd_max_order = 0;
2427 if (order < new_order) {
2428
2429
2430
2431
2432 order = new_order;
2433 } else {
2434 if (!freezing(current) && !kthread_should_stop()) {
2435 long remaining = 0;
2436
2437
2438 if (!sleeping_prematurely(pgdat, order, remaining)) {
2439 remaining = schedule_timeout(HZ/10);
2440 finish_wait(&pgdat->kswapd_wait, &wait);
2441 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
2442 }
2443
2444
2445
2446
2447
2448
2449 if (!sleeping_prematurely(pgdat, order, remaining)) {
2450 trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
2451 schedule();
2452 } else {
2453 if (remaining)
2454 count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
2455 else
2456 count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY);
2457 }
2458 }
2459
2460 order = pgdat->kswapd_max_order;
2461 }
2462 finish_wait(&pgdat->kswapd_wait, &wait);
2463
2464 ret = try_to_freeze();
2465 if (kthread_should_stop())
2466 break;
2467
2468
2469
2470
2471
2472 if (!ret) {
2473 trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
2474 balance_pgdat(pgdat, order);
2475 }
2476 }
2477 return 0;
2478}
2479
2480
2481
2482
2483void wakeup_kswapd(struct zone *zone, int order)
2484{
2485 pg_data_t *pgdat;
2486
2487 if (!populated_zone(zone))
2488 return;
2489
2490 pgdat = zone->zone_pgdat;
2491 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
2492 return;
2493 if (pgdat->kswapd_max_order < order)
2494 pgdat->kswapd_max_order = order;
2495 trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
2496 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
2497 return;
2498 if (!waitqueue_active(&pgdat->kswapd_wait))
2499 return;
2500 wake_up_interruptible(&pgdat->kswapd_wait);
2501}
2502
2503
2504
2505
2506
2507
2508
2509
2510unsigned long global_reclaimable_pages(void)
2511{
2512 int nr;
2513
2514 nr = global_page_state(NR_ACTIVE_FILE) +
2515 global_page_state(NR_INACTIVE_FILE);
2516
2517 if (nr_swap_pages > 0)
2518 nr += global_page_state(NR_ACTIVE_ANON) +
2519 global_page_state(NR_INACTIVE_ANON);
2520
2521 return nr;
2522}
2523
2524unsigned long zone_reclaimable_pages(struct zone *zone)
2525{
2526 int nr;
2527
2528 nr = zone_page_state(zone, NR_ACTIVE_FILE) +
2529 zone_page_state(zone, NR_INACTIVE_FILE);
2530
2531 if (nr_swap_pages > 0)
2532 nr += zone_page_state(zone, NR_ACTIVE_ANON) +
2533 zone_page_state(zone, NR_INACTIVE_ANON);
2534
2535 return nr;
2536}
2537
2538#ifdef CONFIG_HIBERNATION
2539
2540
2541
2542
2543
2544
2545
2546
2547unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
2548{
2549 struct reclaim_state reclaim_state;
2550 struct scan_control sc = {
2551 .gfp_mask = GFP_HIGHUSER_MOVABLE,
2552 .may_swap = 1,
2553 .may_unmap = 1,
2554 .may_writepage = 1,
2555 .nr_to_reclaim = nr_to_reclaim,
2556 .hibernation_mode = 1,
2557 .swappiness = vm_swappiness,
2558 .order = 0,
2559 };
2560 struct zonelist * zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
2561 struct task_struct *p = current;
2562 unsigned long nr_reclaimed;
2563
2564 p->flags |= PF_MEMALLOC;
2565 lockdep_set_current_reclaim_state(sc.gfp_mask);
2566 reclaim_state.reclaimed_slab = 0;
2567 p->reclaim_state = &reclaim_state;
2568
2569 nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
2570
2571 p->reclaim_state = NULL;
2572 lockdep_clear_current_reclaim_state();
2573 p->flags &= ~PF_MEMALLOC;
2574
2575 return nr_reclaimed;
2576}
2577#endif
2578
2579
2580
2581
2582
2583static int __devinit cpu_callback(struct notifier_block *nfb,
2584 unsigned long action, void *hcpu)
2585{
2586 int nid;
2587
2588 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
2589 for_each_node_state(nid, N_HIGH_MEMORY) {
2590 pg_data_t *pgdat = NODE_DATA(nid);
2591 const struct cpumask *mask;
2592
2593 mask = cpumask_of_node(pgdat->node_id);
2594
2595 if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
2596
2597 set_cpus_allowed_ptr(pgdat->kswapd, mask);
2598 }
2599 }
2600 return NOTIFY_OK;
2601}
2602
2603
2604
2605
2606
2607int kswapd_run(int nid)
2608{
2609 pg_data_t *pgdat = NODE_DATA(nid);
2610 int ret = 0;
2611
2612 if (pgdat->kswapd)
2613 return 0;
2614
2615 pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
2616 if (IS_ERR(pgdat->kswapd)) {
2617
2618 BUG_ON(system_state == SYSTEM_BOOTING);
2619 printk("Failed to start kswapd on node %d\n",nid);
2620 ret = -1;
2621 }
2622 return ret;
2623}
2624
2625
2626
2627
2628void kswapd_stop(int nid)
2629{
2630 struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
2631
2632 if (kswapd)
2633 kthread_stop(kswapd);
2634}
2635
2636static int __init kswapd_init(void)
2637{
2638 int nid;
2639
2640 swap_setup();
2641 for_each_node_state(nid, N_HIGH_MEMORY)
2642 kswapd_run(nid);
2643 hotcpu_notifier(cpu_callback, 0);
2644 return 0;
2645}
2646
2647module_init(kswapd_init)
2648
2649#ifdef CONFIG_NUMA
2650
2651
2652
2653
2654
2655
2656int zone_reclaim_mode __read_mostly;
2657
2658#define RECLAIM_OFF 0
2659#define RECLAIM_ZONE (1<<0)
2660#define RECLAIM_WRITE (1<<1)
2661#define RECLAIM_SWAP (1<<2)
2662
2663
2664
2665
2666
2667
2668#define ZONE_RECLAIM_PRIORITY 4
2669
2670
2671
2672
2673
2674int sysctl_min_unmapped_ratio = 1;
2675
2676
2677
2678
2679
2680int sysctl_min_slab_ratio = 5;
2681
2682static inline unsigned long zone_unmapped_file_pages(struct zone *zone)
2683{
2684 unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED);
2685 unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) +
2686 zone_page_state(zone, NR_ACTIVE_FILE);
2687
2688
2689
2690
2691
2692
2693 return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0;
2694}
2695
2696
2697static long zone_pagecache_reclaimable(struct zone *zone)
2698{
2699 long nr_pagecache_reclaimable;
2700 long delta = 0;
2701
2702
2703
2704
2705
2706
2707
2708 if (zone_reclaim_mode & RECLAIM_SWAP)
2709 nr_pagecache_reclaimable = zone_page_state(zone, NR_FILE_PAGES);
2710 else
2711 nr_pagecache_reclaimable = zone_unmapped_file_pages(zone);
2712
2713
2714 if (!(zone_reclaim_mode & RECLAIM_WRITE))
2715 delta += zone_page_state(zone, NR_FILE_DIRTY);
2716
2717
2718 if (unlikely(delta > nr_pagecache_reclaimable))
2719 delta = nr_pagecache_reclaimable;
2720
2721 return nr_pagecache_reclaimable - delta;
2722}
2723
2724
2725
2726
2727static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2728{
2729
2730 const unsigned long nr_pages = 1 << order;
2731 struct task_struct *p = current;
2732 struct reclaim_state reclaim_state;
2733 int priority;
2734 struct scan_control sc = {
2735 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
2736 .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
2737 .may_swap = 1,
2738 .nr_to_reclaim = max_t(unsigned long, nr_pages,
2739 SWAP_CLUSTER_MAX),
2740 .gfp_mask = gfp_mask,
2741 .swappiness = vm_swappiness,
2742 .order = order,
2743 };
2744 unsigned long nr_slab_pages0, nr_slab_pages1;
2745
2746 cond_resched();
2747
2748
2749
2750
2751
2752 p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
2753 lockdep_set_current_reclaim_state(gfp_mask);
2754 reclaim_state.reclaimed_slab = 0;
2755 p->reclaim_state = &reclaim_state;
2756
2757 if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) {
2758
2759
2760
2761
2762 priority = ZONE_RECLAIM_PRIORITY;
2763 do {
2764 shrink_zone(priority, zone, &sc);
2765 priority--;
2766 } while (priority >= 0 && sc.nr_reclaimed < nr_pages);
2767 }
2768
2769 nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
2770 if (nr_slab_pages0 > zone->min_slab_pages) {
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781 for (;;) {
2782 unsigned long lru_pages = zone_reclaimable_pages(zone);
2783
2784
2785 if (!shrink_slab(sc.nr_scanned, gfp_mask, lru_pages))
2786 break;
2787
2788
2789 nr_slab_pages1 = zone_page_state(zone,
2790 NR_SLAB_RECLAIMABLE);
2791 if (nr_slab_pages1 + nr_pages <= nr_slab_pages0)
2792 break;
2793 }
2794
2795
2796
2797
2798
2799 nr_slab_pages1 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
2800 if (nr_slab_pages1 < nr_slab_pages0)
2801 sc.nr_reclaimed += nr_slab_pages0 - nr_slab_pages1;
2802 }
2803
2804 p->reclaim_state = NULL;
2805 current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
2806 lockdep_clear_current_reclaim_state();
2807 return sc.nr_reclaimed >= nr_pages;
2808}
2809
2810int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2811{
2812 int node_id;
2813 int ret;
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825 if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages &&
2826 zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
2827 return ZONE_RECLAIM_FULL;
2828
2829 if (zone->all_unreclaimable)
2830 return ZONE_RECLAIM_FULL;
2831
2832
2833
2834
2835 if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
2836 return ZONE_RECLAIM_NOSCAN;
2837
2838
2839
2840
2841
2842
2843
2844 node_id = zone_to_nid(zone);
2845 if (node_state(node_id, N_CPU) && node_id != numa_node_id())
2846 return ZONE_RECLAIM_NOSCAN;
2847
2848 if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
2849 return ZONE_RECLAIM_NOSCAN;
2850
2851 ret = __zone_reclaim(zone, gfp_mask, order);
2852 zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
2853
2854 if (!ret)
2855 count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
2856
2857 return ret;
2858}
2859#endif
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875int page_evictable(struct page *page, struct vm_area_struct *vma)
2876{
2877
2878 if (mapping_unevictable(page_mapping(page)))
2879 return 0;
2880
2881 if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page)))
2882 return 0;
2883
2884 return 1;
2885}
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898static void check_move_unevictable_page(struct page *page, struct zone *zone)
2899{
2900 VM_BUG_ON(PageActive(page));
2901
2902retry:
2903 ClearPageUnevictable(page);
2904 if (page_evictable(page, NULL)) {
2905 enum lru_list l = page_lru_base_type(page);
2906
2907 __dec_zone_state(zone, NR_UNEVICTABLE);
2908 list_move(&page->lru, &zone->lru[l].list);
2909 mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
2910 __inc_zone_state(zone, NR_INACTIVE_ANON + l);
2911 __count_vm_event(UNEVICTABLE_PGRESCUED);
2912 } else {
2913
2914
2915
2916 SetPageUnevictable(page);
2917 list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
2918 mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
2919 if (page_evictable(page, NULL))
2920 goto retry;
2921 }
2922}
2923
2924
2925
2926
2927
2928
2929
2930
2931void scan_mapping_unevictable_pages(struct address_space *mapping)
2932{
2933 pgoff_t next = 0;
2934 pgoff_t end = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
2935 PAGE_CACHE_SHIFT;
2936 struct zone *zone;
2937 struct pagevec pvec;
2938
2939 if (mapping->nrpages == 0)
2940 return;
2941
2942 pagevec_init(&pvec, 0);
2943 while (next < end &&
2944 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
2945 int i;
2946 int pg_scanned = 0;
2947
2948 zone = NULL;
2949
2950 for (i = 0; i < pagevec_count(&pvec); i++) {
2951 struct page *page = pvec.pages[i];
2952 pgoff_t page_index = page->index;
2953 struct zone *pagezone = page_zone(page);
2954
2955 pg_scanned++;
2956 if (page_index > next)
2957 next = page_index;
2958 next++;
2959
2960 if (pagezone != zone) {
2961 if (zone)
2962 spin_unlock_irq(&zone->lru_lock);
2963 zone = pagezone;
2964 spin_lock_irq(&zone->lru_lock);
2965 }
2966
2967 if (PageLRU(page) && PageUnevictable(page))
2968 check_move_unevictable_page(page, zone);
2969 }
2970 if (zone)
2971 spin_unlock_irq(&zone->lru_lock);
2972 pagevec_release(&pvec);
2973
2974 count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
2975 }
2976
2977}
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL
2990static void scan_zone_unevictable_pages(struct zone *zone)
2991{
2992 struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list;
2993 unsigned long scan;
2994 unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE);
2995
2996 while (nr_to_scan > 0) {
2997 unsigned long batch_size = min(nr_to_scan,
2998 SCAN_UNEVICTABLE_BATCH_SIZE);
2999
3000 spin_lock_irq(&zone->lru_lock);
3001 for (scan = 0; scan < batch_size; scan++) {
3002 struct page *page = lru_to_page(l_unevictable);
3003
3004 if (!trylock_page(page))
3005 continue;
3006
3007 prefetchw_prev_lru_page(page, l_unevictable, flags);
3008
3009 if (likely(PageLRU(page) && PageUnevictable(page)))
3010 check_move_unevictable_page(page, zone);
3011
3012 unlock_page(page);
3013 }
3014 spin_unlock_irq(&zone->lru_lock);
3015
3016 nr_to_scan -= batch_size;
3017 }
3018}
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032static void scan_all_zones_unevictable_pages(void)
3033{
3034 struct zone *zone;
3035
3036 for_each_zone(zone) {
3037 scan_zone_unevictable_pages(zone);
3038 }
3039}
3040
3041
3042
3043
3044
3045unsigned long scan_unevictable_pages;
3046
3047int scan_unevictable_handler(struct ctl_table *table, int write,
3048 void __user *buffer,
3049 size_t *length, loff_t *ppos)
3050{
3051 proc_doulongvec_minmax(table, write, buffer, length, ppos);
3052
3053 if (write && *(unsigned long *)table->data)
3054 scan_all_zones_unevictable_pages();
3055
3056 scan_unevictable_pages = 0;
3057 return 0;
3058}
3059
3060#ifdef CONFIG_NUMA
3061
3062
3063
3064
3065
3066static ssize_t read_scan_unevictable_node(struct sys_device *dev,
3067 struct sysdev_attribute *attr,
3068 char *buf)
3069{
3070 return sprintf(buf, "0\n");
3071}
3072
3073static ssize_t write_scan_unevictable_node(struct sys_device *dev,
3074 struct sysdev_attribute *attr,
3075 const char *buf, size_t count)
3076{
3077 struct zone *node_zones = NODE_DATA(dev->id)->node_zones;
3078 struct zone *zone;
3079 unsigned long res;
3080 unsigned long req = strict_strtoul(buf, 10, &res);
3081
3082 if (!req)
3083 return 1;
3084
3085 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
3086 if (!populated_zone(zone))
3087 continue;
3088 scan_zone_unevictable_pages(zone);
3089 }
3090 return 1;
3091}
3092
3093
3094static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
3095 read_scan_unevictable_node,
3096 write_scan_unevictable_node);
3097
3098int scan_unevictable_register_node(struct node *node)
3099{
3100 return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
3101}
3102
3103void scan_unevictable_unregister_node(struct node *node)
3104{
3105 sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
3106}
3107#endif
3108