1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/mm.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/kernel_stat.h>
18#include <linux/swap.h>
19#include <linux/pagemap.h>
20#include <linux/init.h>
21#include <linux/highmem.h>
22#include <linux/vmstat.h>
23#include <linux/file.h>
24#include <linux/writeback.h>
25#include <linux/blkdev.h>
26#include <linux/buffer_head.h>
27
28#include <linux/mm_inline.h>
29#include <linux/pagevec.h>
30#include <linux/backing-dev.h>
31#include <linux/rmap.h>
32#include <linux/topology.h>
33#include <linux/cpu.h>
34#include <linux/cpuset.h>
35#include <linux/notifier.h>
36#include <linux/rwsem.h>
37#include <linux/delay.h>
38#include <linux/kthread.h>
39#include <linux/freezer.h>
40#include <linux/memcontrol.h>
41#include <linux/delayacct.h>
42#include <linux/sysctl.h>
43
44#include <asm/tlbflush.h>
45#include <asm/div64.h>
46
47#include <linux/swapops.h>
48
49#include "internal.h"
50
51struct scan_control {
52
53 unsigned long nr_scanned;
54
55
56 unsigned long nr_reclaimed;
57
58
59 unsigned long nr_to_reclaim;
60
61 unsigned long hibernation_mode;
62
63
64 gfp_t gfp_mask;
65
66 int may_writepage;
67
68
69 int may_unmap;
70
71
72 int may_swap;
73
74 int swappiness;
75
76 int all_unreclaimable;
77
78 int order;
79
80
81 struct mem_cgroup *mem_cgroup;
82
83
84
85
86
87 nodemask_t *nodemask;
88
89
90 unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
91 unsigned long *scanned, int order, int mode,
92 struct zone *z, struct mem_cgroup *mem_cont,
93 int active, int file);
94};
95
96#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
97
98#ifdef ARCH_HAS_PREFETCH
99#define prefetch_prev_lru_page(_page, _base, _field) \
100 do { \
101 if ((_page)->lru.prev != _base) { \
102 struct page *prev; \
103 \
104 prev = lru_to_page(&(_page->lru)); \
105 prefetch(&prev->_field); \
106 } \
107 } while (0)
108#else
109#define prefetch_prev_lru_page(_page, _base, _field) do { } while (0)
110#endif
111
112#ifdef ARCH_HAS_PREFETCHW
113#define prefetchw_prev_lru_page(_page, _base, _field) \
114 do { \
115 if ((_page)->lru.prev != _base) { \
116 struct page *prev; \
117 \
118 prev = lru_to_page(&(_page->lru)); \
119 prefetchw(&prev->_field); \
120 } \
121 } while (0)
122#else
123#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
124#endif
125
126
127
128
129int vm_swappiness = 60;
130long vm_total_pages;
131
132static LIST_HEAD(shrinker_list);
133static DECLARE_RWSEM(shrinker_rwsem);
134
135#ifdef CONFIG_CGROUP_MEM_RES_CTLR
136#define scanning_global_lru(sc) (!(sc)->mem_cgroup)
137#else
138#define scanning_global_lru(sc) (1)
139#endif
140
141static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
142 struct scan_control *sc)
143{
144 if (!scanning_global_lru(sc))
145 return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);
146
147 return &zone->reclaim_stat;
148}
149
150static unsigned long zone_nr_lru_pages(struct zone *zone,
151 struct scan_control *sc, enum lru_list lru)
152{
153 if (!scanning_global_lru(sc))
154 return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
155
156 return zone_page_state(zone, NR_LRU_BASE + lru);
157}
158
159
160
161
162
163void register_shrinker(struct shrinker *shrinker)
164{
165 shrinker->nr = 0;
166 down_write(&shrinker_rwsem);
167 list_add_tail(&shrinker->list, &shrinker_list);
168 up_write(&shrinker_rwsem);
169}
170EXPORT_SYMBOL(register_shrinker);
171
172
173
174
175void unregister_shrinker(struct shrinker *shrinker)
176{
177 down_write(&shrinker_rwsem);
178 list_del(&shrinker->list);
179 up_write(&shrinker_rwsem);
180}
181EXPORT_SYMBOL(unregister_shrinker);
182
183#define SHRINK_BATCH 128
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
204 unsigned long lru_pages)
205{
206 struct shrinker *shrinker;
207 unsigned long ret = 0;
208
209 if (scanned == 0)
210 scanned = SWAP_CLUSTER_MAX;
211
212 if (!down_read_trylock(&shrinker_rwsem))
213 return 1;
214
215 list_for_each_entry(shrinker, &shrinker_list, list) {
216 unsigned long long delta;
217 unsigned long total_scan;
218 unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask);
219
220 delta = (4 * scanned) / shrinker->seeks;
221 delta *= max_pass;
222 do_div(delta, lru_pages + 1);
223 shrinker->nr += delta;
224 if (shrinker->nr < 0) {
225 printk(KERN_ERR "shrink_slab: %pF negative objects to "
226 "delete nr=%ld\n",
227 shrinker->shrink, shrinker->nr);
228 shrinker->nr = max_pass;
229 }
230
231
232
233
234
235
236 if (shrinker->nr > max_pass * 2)
237 shrinker->nr = max_pass * 2;
238
239 total_scan = shrinker->nr;
240 shrinker->nr = 0;
241
242 while (total_scan >= SHRINK_BATCH) {
243 long this_scan = SHRINK_BATCH;
244 int shrink_ret;
245 int nr_before;
246
247 nr_before = (*shrinker->shrink)(0, gfp_mask);
248 shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask);
249 if (shrink_ret == -1)
250 break;
251 if (shrink_ret < nr_before)
252 ret += nr_before - shrink_ret;
253 count_vm_events(SLABS_SCANNED, this_scan);
254 total_scan -= this_scan;
255
256 cond_resched();
257 }
258
259 shrinker->nr += total_scan;
260 }
261 up_read(&shrinker_rwsem);
262 return ret;
263}
264
265
266static inline int page_mapping_inuse(struct page *page)
267{
268 struct address_space *mapping;
269
270
271 if (page_mapped(page))
272 return 1;
273
274
275 if (PageSwapCache(page))
276 return 1;
277
278 mapping = page_mapping(page);
279 if (!mapping)
280 return 0;
281
282
283 return mapping_mapped(mapping);
284}
285
286static inline int is_page_cache_freeable(struct page *page)
287{
288
289
290
291
292
293 return page_count(page) - page_has_private(page) == 2;
294}
295
296static int may_write_to_queue(struct backing_dev_info *bdi)
297{
298 if (current->flags & PF_SWAPWRITE)
299 return 1;
300 if (!bdi_write_congested(bdi))
301 return 1;
302 if (bdi == current->backing_dev_info)
303 return 1;
304 return 0;
305}
306
307
308
309
310
311
312
313
314
315
316
317
318
319static void handle_write_error(struct address_space *mapping,
320 struct page *page, int error)
321{
322 lock_page(page);
323 if (page_mapping(page) == mapping)
324 mapping_set_error(mapping, error);
325 unlock_page(page);
326}
327
328
329enum pageout_io {
330 PAGEOUT_IO_ASYNC,
331 PAGEOUT_IO_SYNC,
332};
333
334
335typedef enum {
336
337 PAGE_KEEP,
338
339 PAGE_ACTIVATE,
340
341 PAGE_SUCCESS,
342
343 PAGE_CLEAN,
344} pageout_t;
345
346
347
348
349
350static pageout_t pageout(struct page *page, struct address_space *mapping,
351 enum pageout_io sync_writeback)
352{
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369 if (!is_page_cache_freeable(page))
370 return PAGE_KEEP;
371 if (!mapping) {
372
373
374
375
376 if (page_has_private(page)) {
377 if (try_to_free_buffers(page)) {
378 ClearPageDirty(page);
379 printk("%s: orphaned page\n", __func__);
380 return PAGE_CLEAN;
381 }
382 }
383 return PAGE_KEEP;
384 }
385 if (mapping->a_ops->writepage == NULL)
386 return PAGE_ACTIVATE;
387 if (!may_write_to_queue(mapping->backing_dev_info))
388 return PAGE_KEEP;
389
390 if (clear_page_dirty_for_io(page)) {
391 int res;
392 struct writeback_control wbc = {
393 .sync_mode = WB_SYNC_NONE,
394 .nr_to_write = SWAP_CLUSTER_MAX,
395 .range_start = 0,
396 .range_end = LLONG_MAX,
397 .nonblocking = 1,
398 .for_reclaim = 1,
399 };
400
401 SetPageReclaim(page);
402 res = mapping->a_ops->writepage(page, &wbc);
403 if (res < 0)
404 handle_write_error(mapping, page, res);
405 if (res == AOP_WRITEPAGE_ACTIVATE) {
406 ClearPageReclaim(page);
407 return PAGE_ACTIVATE;
408 }
409
410
411
412
413
414
415 if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC)
416 wait_on_page_writeback(page);
417
418 if (!PageWriteback(page)) {
419
420 ClearPageReclaim(page);
421 }
422 inc_zone_page_state(page, NR_VMSCAN_WRITE);
423 return PAGE_SUCCESS;
424 }
425
426 return PAGE_CLEAN;
427}
428
429
430
431
432
433static int __remove_mapping(struct address_space *mapping, struct page *page)
434{
435 BUG_ON(!PageLocked(page));
436 BUG_ON(mapping != page_mapping(page));
437
438 spin_lock_irq(&mapping->tree_lock);
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464 if (!page_freeze_refs(page, 2))
465 goto cannot_free;
466
467 if (unlikely(PageDirty(page))) {
468 page_unfreeze_refs(page, 2);
469 goto cannot_free;
470 }
471
472 if (PageSwapCache(page)) {
473 swp_entry_t swap = { .val = page_private(page) };
474 __delete_from_swap_cache(page);
475 spin_unlock_irq(&mapping->tree_lock);
476 swapcache_free(swap, page);
477 } else {
478 __remove_from_page_cache(page);
479 spin_unlock_irq(&mapping->tree_lock);
480 mem_cgroup_uncharge_cache_page(page);
481 }
482
483 return 1;
484
485cannot_free:
486 spin_unlock_irq(&mapping->tree_lock);
487 return 0;
488}
489
490
491
492
493
494
495
496int remove_mapping(struct address_space *mapping, struct page *page)
497{
498 if (__remove_mapping(mapping, page)) {
499
500
501
502
503
504 page_unfreeze_refs(page, 1);
505 return 1;
506 }
507 return 0;
508}
509
510
511
512
513
514
515
516
517
518
519void putback_lru_page(struct page *page)
520{
521 int lru;
522 int active = !!TestClearPageActive(page);
523 int was_unevictable = PageUnevictable(page);
524
525 VM_BUG_ON(PageLRU(page));
526
527redo:
528 ClearPageUnevictable(page);
529
530 if (page_evictable(page, NULL)) {
531
532
533
534
535
536
537 lru = active + page_lru_base_type(page);
538 lru_cache_add_lru(page, lru);
539 } else {
540
541
542
543
544 lru = LRU_UNEVICTABLE;
545 add_page_to_unevictable_list(page);
546
547
548
549
550
551
552
553
554
555 smp_mb();
556 }
557
558
559
560
561
562
563 if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) {
564 if (!isolate_lru_page(page)) {
565 put_page(page);
566 goto redo;
567 }
568
569
570
571
572 }
573
574 if (was_unevictable && lru != LRU_UNEVICTABLE)
575 count_vm_event(UNEVICTABLE_PGRESCUED);
576 else if (!was_unevictable && lru == LRU_UNEVICTABLE)
577 count_vm_event(UNEVICTABLE_PGCULLED);
578
579 put_page(page);
580}
581
582
583
584
585static unsigned long shrink_page_list(struct list_head *page_list,
586 struct scan_control *sc,
587 enum pageout_io sync_writeback)
588{
589 LIST_HEAD(ret_pages);
590 struct pagevec freed_pvec;
591 int pgactivate = 0;
592 unsigned long nr_reclaimed = 0;
593 unsigned long vm_flags;
594
595 cond_resched();
596
597 pagevec_init(&freed_pvec, 1);
598 while (!list_empty(page_list)) {
599 struct address_space *mapping;
600 struct page *page;
601 int may_enter_fs;
602 int referenced;
603
604 cond_resched();
605
606 page = lru_to_page(page_list);
607 list_del(&page->lru);
608
609 if (!trylock_page(page))
610 goto keep;
611
612 VM_BUG_ON(PageActive(page));
613
614 sc->nr_scanned++;
615
616 if (unlikely(!page_evictable(page, NULL)))
617 goto cull_mlocked;
618
619 if (!sc->may_unmap && page_mapped(page))
620 goto keep_locked;
621
622
623 if (page_mapped(page) || PageSwapCache(page))
624 sc->nr_scanned++;
625
626 may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
627 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
628
629 if (PageWriteback(page)) {
630
631
632
633
634
635
636
637
638 if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs)
639 wait_on_page_writeback(page);
640 else
641 goto keep_locked;
642 }
643
644 referenced = page_referenced(page, 1,
645 sc->mem_cgroup, &vm_flags);
646
647
648
649
650
651 if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
652 referenced && page_mapping_inuse(page)
653 && !(vm_flags & VM_LOCKED))
654 goto activate_locked;
655
656
657
658
659
660 if (PageAnon(page) && !PageSwapCache(page)) {
661 if (!(sc->gfp_mask & __GFP_IO))
662 goto keep_locked;
663 if (!add_to_swap(page))
664 goto activate_locked;
665 may_enter_fs = 1;
666 }
667
668 mapping = page_mapping(page);
669
670
671
672
673
674 if (page_mapped(page) && mapping) {
675 switch (try_to_unmap(page, TTU_UNMAP)) {
676 case SWAP_FAIL:
677 goto activate_locked;
678 case SWAP_AGAIN:
679 goto keep_locked;
680 case SWAP_MLOCK:
681 goto cull_mlocked;
682 case SWAP_SUCCESS:
683 ;
684 }
685 }
686
687 if (PageDirty(page)) {
688 if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced)
689 goto keep_locked;
690 if (!may_enter_fs)
691 goto keep_locked;
692 if (!sc->may_writepage)
693 goto keep_locked;
694
695
696 switch (pageout(page, mapping, sync_writeback)) {
697 case PAGE_KEEP:
698 goto keep_locked;
699 case PAGE_ACTIVATE:
700 goto activate_locked;
701 case PAGE_SUCCESS:
702 if (PageWriteback(page) || PageDirty(page))
703 goto keep;
704
705
706
707
708 if (!trylock_page(page))
709 goto keep;
710 if (PageDirty(page) || PageWriteback(page))
711 goto keep_locked;
712 mapping = page_mapping(page);
713 case PAGE_CLEAN:
714 ;
715 }
716 }
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739 if (page_has_private(page)) {
740 if (!try_to_release_page(page, sc->gfp_mask))
741 goto activate_locked;
742 if (!mapping && page_count(page) == 1) {
743 unlock_page(page);
744 if (put_page_testzero(page))
745 goto free_it;
746 else {
747
748
749
750
751
752
753
754 nr_reclaimed++;
755 continue;
756 }
757 }
758 }
759
760 if (!mapping || !__remove_mapping(mapping, page))
761 goto keep_locked;
762
763
764
765
766
767
768
769
770 __clear_page_locked(page);
771free_it:
772 nr_reclaimed++;
773 if (!pagevec_add(&freed_pvec, page)) {
774 __pagevec_free(&freed_pvec);
775 pagevec_reinit(&freed_pvec);
776 }
777 continue;
778
779cull_mlocked:
780 if (PageSwapCache(page))
781 try_to_free_swap(page);
782 unlock_page(page);
783 putback_lru_page(page);
784 continue;
785
786activate_locked:
787
788 if (PageSwapCache(page) && vm_swap_full())
789 try_to_free_swap(page);
790 VM_BUG_ON(PageActive(page));
791 SetPageActive(page);
792 pgactivate++;
793keep_locked:
794 unlock_page(page);
795keep:
796 list_add(&page->lru, &ret_pages);
797 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
798 }
799 list_splice(&ret_pages, page_list);
800 if (pagevec_count(&freed_pvec))
801 __pagevec_free(&freed_pvec);
802 count_vm_events(PGACTIVATE, pgactivate);
803 return nr_reclaimed;
804}
805
806
807#define ISOLATE_INACTIVE 0
808#define ISOLATE_ACTIVE 1
809#define ISOLATE_BOTH 2
810
811
812
813
814
815
816
817
818
819
820
821int __isolate_lru_page(struct page *page, int mode, int file)
822{
823 int ret = -EINVAL;
824
825
826 if (!PageLRU(page))
827 return ret;
828
829
830
831
832
833
834 if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
835 return ret;
836
837 if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)
838 return ret;
839
840
841
842
843
844
845 if (PageUnevictable(page))
846 return ret;
847
848 ret = -EBUSY;
849
850 if (likely(get_page_unless_zero(page))) {
851
852
853
854
855
856 ClearPageLRU(page);
857 ret = 0;
858 }
859
860 return ret;
861}
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
884 struct list_head *src, struct list_head *dst,
885 unsigned long *scanned, int order, int mode, int file)
886{
887 unsigned long nr_taken = 0;
888 unsigned long scan;
889
890 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
891 struct page *page;
892 unsigned long pfn;
893 unsigned long end_pfn;
894 unsigned long page_pfn;
895 int zone_id;
896
897 page = lru_to_page(src);
898 prefetchw_prev_lru_page(page, src, flags);
899
900 VM_BUG_ON(!PageLRU(page));
901
902 switch (__isolate_lru_page(page, mode, file)) {
903 case 0:
904 list_move(&page->lru, dst);
905 mem_cgroup_del_lru(page);
906 nr_taken++;
907 break;
908
909 case -EBUSY:
910
911 list_move(&page->lru, src);
912 mem_cgroup_rotate_lru_list(page, page_lru(page));
913 continue;
914
915 default:
916 BUG();
917 }
918
919 if (!order)
920 continue;
921
922
923
924
925
926
927
928
929
930
931 zone_id = page_zone_id(page);
932 page_pfn = page_to_pfn(page);
933 pfn = page_pfn & ~((1 << order) - 1);
934 end_pfn = pfn + (1 << order);
935 for (; pfn < end_pfn; pfn++) {
936 struct page *cursor_page;
937
938
939 if (unlikely(pfn == page_pfn))
940 continue;
941
942
943 if (unlikely(!pfn_valid_within(pfn)))
944 break;
945
946 cursor_page = pfn_to_page(pfn);
947
948
949 if (unlikely(page_zone_id(cursor_page) != zone_id))
950 continue;
951
952
953
954
955
956
957 if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
958 !PageSwapCache(cursor_page))
959 continue;
960
961 if (__isolate_lru_page(cursor_page, mode, file) == 0) {
962 list_move(&cursor_page->lru, dst);
963 mem_cgroup_del_lru(cursor_page);
964 nr_taken++;
965 scan++;
966 }
967 }
968 }
969
970 *scanned = scan;
971 return nr_taken;
972}
973
974static unsigned long isolate_pages_global(unsigned long nr,
975 struct list_head *dst,
976 unsigned long *scanned, int order,
977 int mode, struct zone *z,
978 struct mem_cgroup *mem_cont,
979 int active, int file)
980{
981 int lru = LRU_BASE;
982 if (active)
983 lru += LRU_ACTIVE;
984 if (file)
985 lru += LRU_FILE;
986 return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
987 mode, file);
988}
989
990
991
992
993
994static unsigned long clear_active_flags(struct list_head *page_list,
995 unsigned int *count)
996{
997 int nr_active = 0;
998 int lru;
999 struct page *page;
1000
1001 list_for_each_entry(page, page_list, lru) {
1002 lru = page_lru_base_type(page);
1003 if (PageActive(page)) {
1004 lru += LRU_ACTIVE;
1005 ClearPageActive(page);
1006 nr_active++;
1007 }
1008 count[lru]++;
1009 }
1010
1011 return nr_active;
1012}
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039int isolate_lru_page(struct page *page)
1040{
1041 int ret = -EBUSY;
1042
1043 if (PageLRU(page)) {
1044 struct zone *zone = page_zone(page);
1045
1046 spin_lock_irq(&zone->lru_lock);
1047 if (PageLRU(page) && get_page_unless_zero(page)) {
1048 int lru = page_lru(page);
1049 ret = 0;
1050 ClearPageLRU(page);
1051
1052 del_page_from_lru_list(zone, page, lru);
1053 }
1054 spin_unlock_irq(&zone->lru_lock);
1055 }
1056 return ret;
1057}
1058
1059
1060
1061
1062static int too_many_isolated(struct zone *zone, int file,
1063 struct scan_control *sc)
1064{
1065 unsigned long inactive, isolated;
1066
1067 if (current_is_kswapd())
1068 return 0;
1069
1070 if (!scanning_global_lru(sc))
1071 return 0;
1072
1073 if (file) {
1074 inactive = zone_page_state(zone, NR_INACTIVE_FILE);
1075 isolated = zone_page_state(zone, NR_ISOLATED_FILE);
1076 } else {
1077 inactive = zone_page_state(zone, NR_INACTIVE_ANON);
1078 isolated = zone_page_state(zone, NR_ISOLATED_ANON);
1079 }
1080
1081 return isolated > inactive;
1082}
1083
1084
1085
1086
1087
1088static unsigned long shrink_inactive_list(unsigned long max_scan,
1089 struct zone *zone, struct scan_control *sc,
1090 int priority, int file)
1091{
1092 LIST_HEAD(page_list);
1093 struct pagevec pvec;
1094 unsigned long nr_scanned = 0;
1095 unsigned long nr_reclaimed = 0;
1096 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1097 int lumpy_reclaim = 0;
1098
1099 while (unlikely(too_many_isolated(zone, file, sc))) {
1100 congestion_wait(BLK_RW_ASYNC, HZ/10);
1101
1102
1103 if (fatal_signal_pending(current))
1104 return SWAP_CLUSTER_MAX;
1105 }
1106
1107
1108
1109
1110
1111
1112
1113
1114 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
1115 lumpy_reclaim = 1;
1116 else if (sc->order && priority < DEF_PRIORITY - 2)
1117 lumpy_reclaim = 1;
1118
1119 pagevec_init(&pvec, 1);
1120
1121 lru_add_drain();
1122 spin_lock_irq(&zone->lru_lock);
1123 do {
1124 struct page *page;
1125 unsigned long nr_taken;
1126 unsigned long nr_scan;
1127 unsigned long nr_freed;
1128 unsigned long nr_active;
1129 unsigned int count[NR_LRU_LISTS] = { 0, };
1130 int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE;
1131 unsigned long nr_anon;
1132 unsigned long nr_file;
1133
1134 nr_taken = sc->isolate_pages(SWAP_CLUSTER_MAX,
1135 &page_list, &nr_scan, sc->order, mode,
1136 zone, sc->mem_cgroup, 0, file);
1137
1138 if (scanning_global_lru(sc)) {
1139 zone->pages_scanned += nr_scan;
1140 if (current_is_kswapd())
1141 __count_zone_vm_events(PGSCAN_KSWAPD, zone,
1142 nr_scan);
1143 else
1144 __count_zone_vm_events(PGSCAN_DIRECT, zone,
1145 nr_scan);
1146 }
1147
1148 if (nr_taken == 0)
1149 goto done;
1150
1151 nr_active = clear_active_flags(&page_list, count);
1152 __count_vm_events(PGDEACTIVATE, nr_active);
1153
1154 __mod_zone_page_state(zone, NR_ACTIVE_FILE,
1155 -count[LRU_ACTIVE_FILE]);
1156 __mod_zone_page_state(zone, NR_INACTIVE_FILE,
1157 -count[LRU_INACTIVE_FILE]);
1158 __mod_zone_page_state(zone, NR_ACTIVE_ANON,
1159 -count[LRU_ACTIVE_ANON]);
1160 __mod_zone_page_state(zone, NR_INACTIVE_ANON,
1161 -count[LRU_INACTIVE_ANON]);
1162
1163 nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
1164 nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
1165 __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
1166 __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
1167
1168 reclaim_stat->recent_scanned[0] += nr_anon;
1169 reclaim_stat->recent_scanned[1] += nr_file;
1170
1171 spin_unlock_irq(&zone->lru_lock);
1172
1173 nr_scanned += nr_scan;
1174 nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
1175
1176
1177
1178
1179
1180
1181
1182 if (nr_freed < nr_taken && !current_is_kswapd() &&
1183 lumpy_reclaim) {
1184 congestion_wait(BLK_RW_ASYNC, HZ/10);
1185
1186
1187
1188
1189
1190 nr_active = clear_active_flags(&page_list, count);
1191 count_vm_events(PGDEACTIVATE, nr_active);
1192
1193 nr_freed += shrink_page_list(&page_list, sc,
1194 PAGEOUT_IO_SYNC);
1195 }
1196
1197 nr_reclaimed += nr_freed;
1198
1199 local_irq_disable();
1200 if (current_is_kswapd())
1201 __count_vm_events(KSWAPD_STEAL, nr_freed);
1202 __count_zone_vm_events(PGSTEAL, zone, nr_freed);
1203
1204 spin_lock(&zone->lru_lock);
1205
1206
1207
1208 while (!list_empty(&page_list)) {
1209 int lru;
1210 page = lru_to_page(&page_list);
1211 VM_BUG_ON(PageLRU(page));
1212 list_del(&page->lru);
1213 if (unlikely(!page_evictable(page, NULL))) {
1214 spin_unlock_irq(&zone->lru_lock);
1215 putback_lru_page(page);
1216 spin_lock_irq(&zone->lru_lock);
1217 continue;
1218 }
1219 SetPageLRU(page);
1220 lru = page_lru(page);
1221 add_page_to_lru_list(zone, page, lru);
1222 if (is_active_lru(lru)) {
1223 int file = is_file_lru(lru);
1224 reclaim_stat->recent_rotated[file]++;
1225 }
1226 if (!pagevec_add(&pvec, page)) {
1227 spin_unlock_irq(&zone->lru_lock);
1228 __pagevec_release(&pvec);
1229 spin_lock_irq(&zone->lru_lock);
1230 }
1231 }
1232 __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
1233 __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
1234
1235 } while (nr_scanned < max_scan);
1236
1237done:
1238 spin_unlock_irq(&zone->lru_lock);
1239 pagevec_release(&pvec);
1240 return nr_reclaimed;
1241}
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251static inline void note_zone_scanning_priority(struct zone *zone, int priority)
1252{
1253 if (priority < zone->prev_priority)
1254 zone->prev_priority = priority;
1255}
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275static void move_active_pages_to_lru(struct zone *zone,
1276 struct list_head *list,
1277 enum lru_list lru)
1278{
1279 unsigned long pgmoved = 0;
1280 struct pagevec pvec;
1281 struct page *page;
1282
1283 pagevec_init(&pvec, 1);
1284
1285 while (!list_empty(list)) {
1286 page = lru_to_page(list);
1287
1288 VM_BUG_ON(PageLRU(page));
1289 SetPageLRU(page);
1290
1291 list_move(&page->lru, &zone->lru[lru].list);
1292 mem_cgroup_add_lru_list(page, lru);
1293 pgmoved++;
1294
1295 if (!pagevec_add(&pvec, page) || list_empty(list)) {
1296 spin_unlock_irq(&zone->lru_lock);
1297 if (buffer_heads_over_limit)
1298 pagevec_strip(&pvec);
1299 __pagevec_release(&pvec);
1300 spin_lock_irq(&zone->lru_lock);
1301 }
1302 }
1303 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
1304 if (!is_active_lru(lru))
1305 __count_vm_events(PGDEACTIVATE, pgmoved);
1306}
1307
1308static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1309 struct scan_control *sc, int priority, int file)
1310{
1311 unsigned long nr_taken;
1312 unsigned long pgscanned;
1313 unsigned long vm_flags;
1314 LIST_HEAD(l_hold);
1315 LIST_HEAD(l_active);
1316 LIST_HEAD(l_inactive);
1317 struct page *page;
1318 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1319 unsigned long nr_rotated = 0;
1320
1321 lru_add_drain();
1322 spin_lock_irq(&zone->lru_lock);
1323 nr_taken = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
1324 ISOLATE_ACTIVE, zone,
1325 sc->mem_cgroup, 1, file);
1326
1327
1328
1329
1330 if (scanning_global_lru(sc)) {
1331 zone->pages_scanned += pgscanned;
1332 }
1333 reclaim_stat->recent_scanned[file] += nr_taken;
1334
1335 __count_zone_vm_events(PGREFILL, zone, pgscanned);
1336 if (file)
1337 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
1338 else
1339 __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
1340 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
1341 spin_unlock_irq(&zone->lru_lock);
1342
1343 while (!list_empty(&l_hold)) {
1344 cond_resched();
1345 page = lru_to_page(&l_hold);
1346 list_del(&page->lru);
1347
1348 if (unlikely(!page_evictable(page, NULL))) {
1349 putback_lru_page(page);
1350 continue;
1351 }
1352
1353
1354 if (page_mapping_inuse(page) &&
1355 page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
1356 nr_rotated++;
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366 if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) {
1367 list_add(&page->lru, &l_active);
1368 continue;
1369 }
1370 }
1371
1372 ClearPageActive(page);
1373 list_add(&page->lru, &l_inactive);
1374 }
1375
1376
1377
1378
1379 spin_lock_irq(&zone->lru_lock);
1380
1381
1382
1383
1384
1385
1386 reclaim_stat->recent_rotated[file] += nr_rotated;
1387
1388 move_active_pages_to_lru(zone, &l_active,
1389 LRU_ACTIVE + file * LRU_FILE);
1390 move_active_pages_to_lru(zone, &l_inactive,
1391 LRU_BASE + file * LRU_FILE);
1392 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
1393 spin_unlock_irq(&zone->lru_lock);
1394}
1395
1396static int inactive_anon_is_low_global(struct zone *zone)
1397{
1398 unsigned long active, inactive;
1399
1400 active = zone_page_state(zone, NR_ACTIVE_ANON);
1401 inactive = zone_page_state(zone, NR_INACTIVE_ANON);
1402
1403 if (inactive * zone->inactive_ratio < active)
1404 return 1;
1405
1406 return 0;
1407}
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
1418{
1419 int low;
1420
1421 if (scanning_global_lru(sc))
1422 low = inactive_anon_is_low_global(zone);
1423 else
1424 low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup);
1425 return low;
1426}
1427
1428static int inactive_file_is_low_global(struct zone *zone)
1429{
1430 unsigned long active, inactive;
1431
1432 active = zone_page_state(zone, NR_ACTIVE_FILE);
1433 inactive = zone_page_state(zone, NR_INACTIVE_FILE);
1434
1435 return (active > inactive);
1436}
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
1454{
1455 int low;
1456
1457 if (scanning_global_lru(sc))
1458 low = inactive_file_is_low_global(zone);
1459 else
1460 low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup);
1461 return low;
1462}
1463
1464static int inactive_list_is_low(struct zone *zone, struct scan_control *sc,
1465 int file)
1466{
1467 if (file)
1468 return inactive_file_is_low(zone, sc);
1469 else
1470 return inactive_anon_is_low(zone, sc);
1471}
1472
1473static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1474 struct zone *zone, struct scan_control *sc, int priority)
1475{
1476 int file = is_file_lru(lru);
1477
1478 if (is_active_lru(lru)) {
1479 if (inactive_list_is_low(zone, sc, file))
1480 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1481 return 0;
1482 }
1483
1484 return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
1485}
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1497 unsigned long *percent)
1498{
1499 unsigned long anon, file, free;
1500 unsigned long anon_prio, file_prio;
1501 unsigned long ap, fp;
1502 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1503
1504 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
1505 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
1506 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
1507 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
1508
1509 if (scanning_global_lru(sc)) {
1510 free = zone_page_state(zone, NR_FREE_PAGES);
1511
1512
1513 if (unlikely(file + free <= high_wmark_pages(zone))) {
1514 percent[0] = 100;
1515 percent[1] = 0;
1516 return;
1517 }
1518 }
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
1532 spin_lock_irq(&zone->lru_lock);
1533 reclaim_stat->recent_scanned[0] /= 2;
1534 reclaim_stat->recent_rotated[0] /= 2;
1535 spin_unlock_irq(&zone->lru_lock);
1536 }
1537
1538 if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) {
1539 spin_lock_irq(&zone->lru_lock);
1540 reclaim_stat->recent_scanned[1] /= 2;
1541 reclaim_stat->recent_rotated[1] /= 2;
1542 spin_unlock_irq(&zone->lru_lock);
1543 }
1544
1545
1546
1547
1548
1549 anon_prio = sc->swappiness;
1550 file_prio = 200 - sc->swappiness;
1551
1552
1553
1554
1555
1556
1557 ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);
1558 ap /= reclaim_stat->recent_rotated[0] + 1;
1559
1560 fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
1561 fp /= reclaim_stat->recent_rotated[1] + 1;
1562
1563
1564 percent[0] = 100 * ap / (ap + fp + 1);
1565 percent[1] = 100 - percent[0];
1566}
1567
1568
1569
1570
1571
1572static unsigned long nr_scan_try_batch(unsigned long nr_to_scan,
1573 unsigned long *nr_saved_scan)
1574{
1575 unsigned long nr;
1576
1577 *nr_saved_scan += nr_to_scan;
1578 nr = *nr_saved_scan;
1579
1580 if (nr >= SWAP_CLUSTER_MAX)
1581 *nr_saved_scan = 0;
1582 else
1583 nr = 0;
1584
1585 return nr;
1586}
1587
1588
1589
1590
1591static void shrink_zone(int priority, struct zone *zone,
1592 struct scan_control *sc)
1593{
1594 unsigned long nr[NR_LRU_LISTS];
1595 unsigned long nr_to_scan;
1596 unsigned long percent[2];
1597 enum lru_list l;
1598 unsigned long nr_reclaimed = sc->nr_reclaimed;
1599 unsigned long nr_to_reclaim = sc->nr_to_reclaim;
1600 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1601 int noswap = 0;
1602
1603
1604 if (!sc->may_swap || (nr_swap_pages <= 0)) {
1605 noswap = 1;
1606 percent[0] = 0;
1607 percent[1] = 100;
1608 } else
1609 get_scan_ratio(zone, sc, percent);
1610
1611 for_each_evictable_lru(l) {
1612 int file = is_file_lru(l);
1613 unsigned long scan;
1614
1615 scan = zone_nr_lru_pages(zone, sc, l);
1616 if (priority || noswap) {
1617 scan >>= priority;
1618 scan = (scan * percent[file]) / 100;
1619 }
1620 nr[l] = nr_scan_try_batch(scan,
1621 &reclaim_stat->nr_saved_scan[l]);
1622 }
1623
1624 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
1625 nr[LRU_INACTIVE_FILE]) {
1626 for_each_evictable_lru(l) {
1627 if (nr[l]) {
1628 nr_to_scan = min_t(unsigned long,
1629 nr[l], SWAP_CLUSTER_MAX);
1630 nr[l] -= nr_to_scan;
1631
1632 nr_reclaimed += shrink_list(l, nr_to_scan,
1633 zone, sc, priority);
1634 }
1635 }
1636
1637
1638
1639
1640
1641
1642
1643
1644 if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY)
1645 break;
1646 }
1647
1648 sc->nr_reclaimed = nr_reclaimed;
1649
1650
1651
1652
1653
1654 if (inactive_anon_is_low(zone, sc) && nr_swap_pages > 0)
1655 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1656
1657 throttle_vm_writeout(sc->gfp_mask);
1658}
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676static void shrink_zones(int priority, struct zonelist *zonelist,
1677 struct scan_control *sc)
1678{
1679 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1680 struct zoneref *z;
1681 struct zone *zone;
1682
1683 sc->all_unreclaimable = 1;
1684 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
1685 sc->nodemask) {
1686 if (!populated_zone(zone))
1687 continue;
1688
1689
1690
1691
1692 if (scanning_global_lru(sc)) {
1693 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1694 continue;
1695 note_zone_scanning_priority(zone, priority);
1696
1697 if (zone_is_all_unreclaimable(zone) &&
1698 priority != DEF_PRIORITY)
1699 continue;
1700 sc->all_unreclaimable = 0;
1701 } else {
1702
1703
1704
1705
1706 sc->all_unreclaimable = 0;
1707 mem_cgroup_note_reclaim_priority(sc->mem_cgroup,
1708 priority);
1709 }
1710
1711 shrink_zone(priority, zone, sc);
1712 }
1713}
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1732 struct scan_control *sc)
1733{
1734 int priority;
1735 unsigned long ret = 0;
1736 unsigned long total_scanned = 0;
1737 struct reclaim_state *reclaim_state = current->reclaim_state;
1738 unsigned long lru_pages = 0;
1739 struct zoneref *z;
1740 struct zone *zone;
1741 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1742 unsigned long writeback_threshold;
1743
1744 delayacct_freepages_start();
1745
1746 if (scanning_global_lru(sc))
1747 count_vm_event(ALLOCSTALL);
1748
1749
1750
1751 if (scanning_global_lru(sc)) {
1752 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1753
1754 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1755 continue;
1756
1757 lru_pages += zone_reclaimable_pages(zone);
1758 }
1759 }
1760
1761 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
1762 sc->nr_scanned = 0;
1763 if (!priority)
1764 disable_swap_token();
1765 shrink_zones(priority, zonelist, sc);
1766
1767
1768
1769
1770 if (scanning_global_lru(sc)) {
1771 shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
1772 if (reclaim_state) {
1773 sc->nr_reclaimed += reclaim_state->reclaimed_slab;
1774 reclaim_state->reclaimed_slab = 0;
1775 }
1776 }
1777 total_scanned += sc->nr_scanned;
1778 if (sc->nr_reclaimed >= sc->nr_to_reclaim) {
1779 ret = sc->nr_reclaimed;
1780 goto out;
1781 }
1782
1783
1784
1785
1786
1787
1788
1789
1790 writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
1791 if (total_scanned > writeback_threshold) {
1792 wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
1793 sc->may_writepage = 1;
1794 }
1795
1796
1797 if (!sc->hibernation_mode && sc->nr_scanned &&
1798 priority < DEF_PRIORITY - 2)
1799 congestion_wait(BLK_RW_ASYNC, HZ/10);
1800 }
1801
1802 if (!sc->all_unreclaimable && scanning_global_lru(sc))
1803 ret = sc->nr_reclaimed;
1804out:
1805
1806
1807
1808
1809
1810
1811
1812 if (priority < 0)
1813 priority = 0;
1814
1815 if (scanning_global_lru(sc)) {
1816 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1817
1818 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1819 continue;
1820
1821 zone->prev_priority = priority;
1822 }
1823 } else
1824 mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
1825
1826 delayacct_freepages_end();
1827
1828 return ret;
1829}
1830
1831unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
1832 gfp_t gfp_mask, nodemask_t *nodemask)
1833{
1834 struct scan_control sc = {
1835 .gfp_mask = gfp_mask,
1836 .may_writepage = !laptop_mode,
1837 .nr_to_reclaim = SWAP_CLUSTER_MAX,
1838 .may_unmap = 1,
1839 .may_swap = 1,
1840 .swappiness = vm_swappiness,
1841 .order = order,
1842 .mem_cgroup = NULL,
1843 .isolate_pages = isolate_pages_global,
1844 .nodemask = nodemask,
1845 };
1846
1847 return do_try_to_free_pages(zonelist, &sc);
1848}
1849
1850#ifdef CONFIG_CGROUP_MEM_RES_CTLR
1851
1852unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
1853 gfp_t gfp_mask, bool noswap,
1854 unsigned int swappiness,
1855 struct zone *zone, int nid)
1856{
1857 struct scan_control sc = {
1858 .may_writepage = !laptop_mode,
1859 .may_unmap = 1,
1860 .may_swap = !noswap,
1861 .swappiness = swappiness,
1862 .order = 0,
1863 .mem_cgroup = mem,
1864 .isolate_pages = mem_cgroup_isolate_pages,
1865 };
1866 nodemask_t nm = nodemask_of_node(nid);
1867
1868 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1869 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
1870 sc.nodemask = &nm;
1871 sc.nr_reclaimed = 0;
1872 sc.nr_scanned = 0;
1873
1874
1875
1876
1877
1878
1879
1880 shrink_zone(0, zone, &sc);
1881 return sc.nr_reclaimed;
1882}
1883
1884unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1885 gfp_t gfp_mask,
1886 bool noswap,
1887 unsigned int swappiness)
1888{
1889 struct zonelist *zonelist;
1890 struct scan_control sc = {
1891 .may_writepage = !laptop_mode,
1892 .may_unmap = 1,
1893 .may_swap = !noswap,
1894 .nr_to_reclaim = SWAP_CLUSTER_MAX,
1895 .swappiness = swappiness,
1896 .order = 0,
1897 .mem_cgroup = mem_cont,
1898 .isolate_pages = mem_cgroup_isolate_pages,
1899 .nodemask = NULL,
1900 };
1901
1902 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1903 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
1904 zonelist = NODE_DATA(numa_node_id())->node_zonelists;
1905 return do_try_to_free_pages(zonelist, &sc);
1906}
1907#endif
1908
1909
1910static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
1911{
1912 int i;
1913
1914
1915 if (remaining)
1916 return 1;
1917
1918
1919 for (i = 0; i < pgdat->nr_zones; i++) {
1920 struct zone *zone = pgdat->node_zones + i;
1921
1922 if (!populated_zone(zone))
1923 continue;
1924
1925 if (zone_is_all_unreclaimable(zone))
1926 continue;
1927
1928 if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
1929 0, 0))
1930 return 1;
1931 }
1932
1933 return 0;
1934}
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
1958{
1959 int all_zones_ok;
1960 int priority;
1961 int i;
1962 unsigned long total_scanned;
1963 struct reclaim_state *reclaim_state = current->reclaim_state;
1964 struct scan_control sc = {
1965 .gfp_mask = GFP_KERNEL,
1966 .may_unmap = 1,
1967 .may_swap = 1,
1968
1969
1970
1971
1972 .nr_to_reclaim = ULONG_MAX,
1973 .swappiness = vm_swappiness,
1974 .order = order,
1975 .mem_cgroup = NULL,
1976 .isolate_pages = isolate_pages_global,
1977 };
1978
1979
1980
1981
1982
1983 int temp_priority[MAX_NR_ZONES];
1984
1985loop_again:
1986 total_scanned = 0;
1987 sc.nr_reclaimed = 0;
1988 sc.may_writepage = !laptop_mode;
1989 count_vm_event(PAGEOUTRUN);
1990
1991 for (i = 0; i < pgdat->nr_zones; i++)
1992 temp_priority[i] = DEF_PRIORITY;
1993
1994 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
1995 int end_zone = 0;
1996 unsigned long lru_pages = 0;
1997 int has_under_min_watermark_zone = 0;
1998
1999
2000 if (!priority)
2001 disable_swap_token();
2002
2003 all_zones_ok = 1;
2004
2005
2006
2007
2008
2009 for (i = pgdat->nr_zones - 1; i >= 0; i--) {
2010 struct zone *zone = pgdat->node_zones + i;
2011
2012 if (!populated_zone(zone))
2013 continue;
2014
2015 if (zone_is_all_unreclaimable(zone) &&
2016 priority != DEF_PRIORITY)
2017 continue;
2018
2019
2020
2021
2022
2023 if (inactive_anon_is_low(zone, &sc))
2024 shrink_active_list(SWAP_CLUSTER_MAX, zone,
2025 &sc, priority, 0);
2026
2027 if (!zone_watermark_ok(zone, order,
2028 high_wmark_pages(zone), 0, 0)) {
2029 end_zone = i;
2030 break;
2031 }
2032 }
2033 if (i < 0)
2034 goto out;
2035
2036 for (i = 0; i <= end_zone; i++) {
2037 struct zone *zone = pgdat->node_zones + i;
2038
2039 lru_pages += zone_reclaimable_pages(zone);
2040 }
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051 for (i = 0; i <= end_zone; i++) {
2052 struct zone *zone = pgdat->node_zones + i;
2053 int nr_slab;
2054 int nid, zid;
2055
2056 if (!populated_zone(zone))
2057 continue;
2058
2059 if (zone_is_all_unreclaimable(zone) &&
2060 priority != DEF_PRIORITY)
2061 continue;
2062
2063 if (!zone_watermark_ok(zone, order,
2064 high_wmark_pages(zone), end_zone, 0))
2065 all_zones_ok = 0;
2066 temp_priority[i] = priority;
2067 sc.nr_scanned = 0;
2068 note_zone_scanning_priority(zone, priority);
2069
2070 nid = pgdat->node_id;
2071 zid = zone_idx(zone);
2072
2073
2074
2075
2076 mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
2077 nid, zid);
2078
2079
2080
2081
2082 if (!zone_watermark_ok(zone, order,
2083 8*high_wmark_pages(zone), end_zone, 0))
2084 shrink_zone(priority, zone, &sc);
2085 reclaim_state->reclaimed_slab = 0;
2086 nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
2087 lru_pages);
2088 sc.nr_reclaimed += reclaim_state->reclaimed_slab;
2089 total_scanned += sc.nr_scanned;
2090 if (zone_is_all_unreclaimable(zone))
2091 continue;
2092 if (nr_slab == 0 && zone->pages_scanned >=
2093 (zone_reclaimable_pages(zone) * 6))
2094 zone_set_flag(zone,
2095 ZONE_ALL_UNRECLAIMABLE);
2096
2097
2098
2099
2100
2101 if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
2102 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
2103 sc.may_writepage = 1;
2104
2105
2106
2107
2108
2109 if (!zone_watermark_ok(zone, order, min_wmark_pages(zone),
2110 end_zone, 0))
2111 has_under_min_watermark_zone = 1;
2112
2113 }
2114 if (all_zones_ok)
2115 break;
2116
2117
2118
2119
2120 if (total_scanned && (priority < DEF_PRIORITY - 2)) {
2121 if (has_under_min_watermark_zone)
2122 count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
2123 else
2124 congestion_wait(BLK_RW_ASYNC, HZ/10);
2125 }
2126
2127
2128
2129
2130
2131
2132
2133 if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
2134 break;
2135 }
2136out:
2137
2138
2139
2140
2141
2142 for (i = 0; i < pgdat->nr_zones; i++) {
2143 struct zone *zone = pgdat->node_zones + i;
2144
2145 zone->prev_priority = temp_priority[i];
2146 }
2147 if (!all_zones_ok) {
2148 cond_resched();
2149
2150 try_to_freeze();
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166 if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
2167 order = sc.order = 0;
2168
2169 goto loop_again;
2170 }
2171
2172 return sc.nr_reclaimed;
2173}
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188static int kswapd(void *p)
2189{
2190 unsigned long order;
2191 pg_data_t *pgdat = (pg_data_t*)p;
2192 struct task_struct *tsk = current;
2193 DEFINE_WAIT(wait);
2194 struct reclaim_state reclaim_state = {
2195 .reclaimed_slab = 0,
2196 };
2197 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
2198
2199 lockdep_set_current_reclaim_state(GFP_KERNEL);
2200
2201 if (!cpumask_empty(cpumask))
2202 set_cpus_allowed_ptr(tsk, cpumask);
2203 current->reclaim_state = &reclaim_state;
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2218 set_freezable();
2219
2220 order = 0;
2221 for ( ; ; ) {
2222 unsigned long new_order;
2223 int ret;
2224
2225 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
2226 new_order = pgdat->kswapd_max_order;
2227 pgdat->kswapd_max_order = 0;
2228 if (order < new_order) {
2229
2230
2231
2232
2233 order = new_order;
2234 } else {
2235 if (!freezing(current) && !kthread_should_stop()) {
2236 long remaining = 0;
2237
2238
2239 if (!sleeping_prematurely(pgdat, order, remaining)) {
2240 remaining = schedule_timeout(HZ/10);
2241 finish_wait(&pgdat->kswapd_wait, &wait);
2242 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
2243 }
2244
2245
2246
2247
2248
2249
2250 if (!sleeping_prematurely(pgdat, order, remaining))
2251 schedule();
2252 else {
2253 if (remaining)
2254 count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
2255 else
2256 count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY);
2257 }
2258 }
2259
2260 order = pgdat->kswapd_max_order;
2261 }
2262 finish_wait(&pgdat->kswapd_wait, &wait);
2263
2264 ret = try_to_freeze();
2265 if (kthread_should_stop())
2266 break;
2267
2268
2269
2270
2271
2272 if (!ret)
2273 balance_pgdat(pgdat, order);
2274 }
2275 return 0;
2276}
2277
2278
2279
2280
2281void wakeup_kswapd(struct zone *zone, int order)
2282{
2283 pg_data_t *pgdat;
2284
2285 if (!populated_zone(zone))
2286 return;
2287
2288 pgdat = zone->zone_pgdat;
2289 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
2290 return;
2291 if (pgdat->kswapd_max_order < order)
2292 pgdat->kswapd_max_order = order;
2293 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
2294 return;
2295 if (!waitqueue_active(&pgdat->kswapd_wait))
2296 return;
2297 wake_up_interruptible(&pgdat->kswapd_wait);
2298}
2299
2300
2301
2302
2303
2304
2305
2306
2307unsigned long global_reclaimable_pages(void)
2308{
2309 int nr;
2310
2311 nr = global_page_state(NR_ACTIVE_FILE) +
2312 global_page_state(NR_INACTIVE_FILE);
2313
2314 if (nr_swap_pages > 0)
2315 nr += global_page_state(NR_ACTIVE_ANON) +
2316 global_page_state(NR_INACTIVE_ANON);
2317
2318 return nr;
2319}
2320
2321unsigned long zone_reclaimable_pages(struct zone *zone)
2322{
2323 int nr;
2324
2325 nr = zone_page_state(zone, NR_ACTIVE_FILE) +
2326 zone_page_state(zone, NR_INACTIVE_FILE);
2327
2328 if (nr_swap_pages > 0)
2329 nr += zone_page_state(zone, NR_ACTIVE_ANON) +
2330 zone_page_state(zone, NR_INACTIVE_ANON);
2331
2332 return nr;
2333}
2334
2335#ifdef CONFIG_HIBERNATION
2336
2337
2338
2339
2340
2341
2342
2343
2344unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
2345{
2346 struct reclaim_state reclaim_state;
2347 struct scan_control sc = {
2348 .gfp_mask = GFP_HIGHUSER_MOVABLE,
2349 .may_swap = 1,
2350 .may_unmap = 1,
2351 .may_writepage = 1,
2352 .nr_to_reclaim = nr_to_reclaim,
2353 .hibernation_mode = 1,
2354 .swappiness = vm_swappiness,
2355 .order = 0,
2356 .isolate_pages = isolate_pages_global,
2357 };
2358 struct zonelist * zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
2359 struct task_struct *p = current;
2360 unsigned long nr_reclaimed;
2361
2362 p->flags |= PF_MEMALLOC;
2363 lockdep_set_current_reclaim_state(sc.gfp_mask);
2364 reclaim_state.reclaimed_slab = 0;
2365 p->reclaim_state = &reclaim_state;
2366
2367 nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
2368
2369 p->reclaim_state = NULL;
2370 lockdep_clear_current_reclaim_state();
2371 p->flags &= ~PF_MEMALLOC;
2372
2373 return nr_reclaimed;
2374}
2375#endif
2376
2377
2378
2379
2380
2381static int __devinit cpu_callback(struct notifier_block *nfb,
2382 unsigned long action, void *hcpu)
2383{
2384 int nid;
2385
2386 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
2387 for_each_node_state(nid, N_HIGH_MEMORY) {
2388 pg_data_t *pgdat = NODE_DATA(nid);
2389 const struct cpumask *mask;
2390
2391 mask = cpumask_of_node(pgdat->node_id);
2392
2393 if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
2394
2395 set_cpus_allowed_ptr(pgdat->kswapd, mask);
2396 }
2397 }
2398 return NOTIFY_OK;
2399}
2400
2401
2402
2403
2404
2405int kswapd_run(int nid)
2406{
2407 pg_data_t *pgdat = NODE_DATA(nid);
2408 int ret = 0;
2409
2410 if (pgdat->kswapd)
2411 return 0;
2412
2413 pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
2414 if (IS_ERR(pgdat->kswapd)) {
2415
2416 BUG_ON(system_state == SYSTEM_BOOTING);
2417 printk("Failed to start kswapd on node %d\n",nid);
2418 ret = -1;
2419 }
2420 return ret;
2421}
2422
2423
2424
2425
2426void kswapd_stop(int nid)
2427{
2428 struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
2429
2430 if (kswapd)
2431 kthread_stop(kswapd);
2432}
2433
2434static int __init kswapd_init(void)
2435{
2436 int nid;
2437
2438 swap_setup();
2439 for_each_node_state(nid, N_HIGH_MEMORY)
2440 kswapd_run(nid);
2441 hotcpu_notifier(cpu_callback, 0);
2442 return 0;
2443}
2444
2445module_init(kswapd_init)
2446
2447#ifdef CONFIG_NUMA
2448
2449
2450
2451
2452
2453
2454int zone_reclaim_mode __read_mostly;
2455
2456#define RECLAIM_OFF 0
2457#define RECLAIM_ZONE (1<<0)
2458#define RECLAIM_WRITE (1<<1)
2459#define RECLAIM_SWAP (1<<2)
2460
2461
2462
2463
2464
2465
2466#define ZONE_RECLAIM_PRIORITY 4
2467
2468
2469
2470
2471
2472int sysctl_min_unmapped_ratio = 1;
2473
2474
2475
2476
2477
2478int sysctl_min_slab_ratio = 5;
2479
2480static inline unsigned long zone_unmapped_file_pages(struct zone *zone)
2481{
2482 unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED);
2483 unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) +
2484 zone_page_state(zone, NR_ACTIVE_FILE);
2485
2486
2487
2488
2489
2490
2491 return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0;
2492}
2493
2494
2495static long zone_pagecache_reclaimable(struct zone *zone)
2496{
2497 long nr_pagecache_reclaimable;
2498 long delta = 0;
2499
2500
2501
2502
2503
2504
2505
2506 if (zone_reclaim_mode & RECLAIM_SWAP)
2507 nr_pagecache_reclaimable = zone_page_state(zone, NR_FILE_PAGES);
2508 else
2509 nr_pagecache_reclaimable = zone_unmapped_file_pages(zone);
2510
2511
2512 if (!(zone_reclaim_mode & RECLAIM_WRITE))
2513 delta += zone_page_state(zone, NR_FILE_DIRTY);
2514
2515
2516 if (unlikely(delta > nr_pagecache_reclaimable))
2517 delta = nr_pagecache_reclaimable;
2518
2519 return nr_pagecache_reclaimable - delta;
2520}
2521
2522
2523
2524
2525static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2526{
2527
2528 const unsigned long nr_pages = 1 << order;
2529 struct task_struct *p = current;
2530 struct reclaim_state reclaim_state;
2531 int priority;
2532 struct scan_control sc = {
2533 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
2534 .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
2535 .may_swap = 1,
2536 .nr_to_reclaim = max_t(unsigned long, nr_pages,
2537 SWAP_CLUSTER_MAX),
2538 .gfp_mask = gfp_mask,
2539 .swappiness = vm_swappiness,
2540 .order = order,
2541 .isolate_pages = isolate_pages_global,
2542 };
2543 unsigned long slab_reclaimable;
2544
2545 disable_swap_token();
2546 cond_resched();
2547
2548
2549
2550
2551
2552 p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
2553 reclaim_state.reclaimed_slab = 0;
2554 p->reclaim_state = &reclaim_state;
2555
2556 if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) {
2557
2558
2559
2560
2561 priority = ZONE_RECLAIM_PRIORITY;
2562 do {
2563 note_zone_scanning_priority(zone, priority);
2564 shrink_zone(priority, zone, &sc);
2565 priority--;
2566 } while (priority >= 0 && sc.nr_reclaimed < nr_pages);
2567 }
2568
2569 slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
2570 if (slab_reclaimable > zone->min_slab_pages) {
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581 while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
2582 zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
2583 slab_reclaimable - nr_pages)
2584 ;
2585
2586
2587
2588
2589
2590 sc.nr_reclaimed += slab_reclaimable -
2591 zone_page_state(zone, NR_SLAB_RECLAIMABLE);
2592 }
2593
2594 p->reclaim_state = NULL;
2595 current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
2596 return sc.nr_reclaimed >= nr_pages;
2597}
2598
2599int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2600{
2601 int node_id;
2602 int ret;
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614 if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages &&
2615 zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
2616 return ZONE_RECLAIM_FULL;
2617
2618 if (zone_is_all_unreclaimable(zone))
2619 return ZONE_RECLAIM_FULL;
2620
2621
2622
2623
2624 if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
2625 return ZONE_RECLAIM_NOSCAN;
2626
2627
2628
2629
2630
2631
2632
2633 node_id = zone_to_nid(zone);
2634 if (node_state(node_id, N_CPU) && node_id != numa_node_id())
2635 return ZONE_RECLAIM_NOSCAN;
2636
2637 if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
2638 return ZONE_RECLAIM_NOSCAN;
2639
2640 ret = __zone_reclaim(zone, gfp_mask, order);
2641 zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
2642
2643 if (!ret)
2644 count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
2645
2646 return ret;
2647}
2648#endif
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664int page_evictable(struct page *page, struct vm_area_struct *vma)
2665{
2666
2667 if (mapping_unevictable(page_mapping(page)))
2668 return 0;
2669
2670 if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page)))
2671 return 0;
2672
2673 return 1;
2674}
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687static void check_move_unevictable_page(struct page *page, struct zone *zone)
2688{
2689 VM_BUG_ON(PageActive(page));
2690
2691retry:
2692 ClearPageUnevictable(page);
2693 if (page_evictable(page, NULL)) {
2694 enum lru_list l = page_lru_base_type(page);
2695
2696 __dec_zone_state(zone, NR_UNEVICTABLE);
2697 list_move(&page->lru, &zone->lru[l].list);
2698 mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
2699 __inc_zone_state(zone, NR_INACTIVE_ANON + l);
2700 __count_vm_event(UNEVICTABLE_PGRESCUED);
2701 } else {
2702
2703
2704
2705 SetPageUnevictable(page);
2706 list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
2707 mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
2708 if (page_evictable(page, NULL))
2709 goto retry;
2710 }
2711}
2712
2713
2714
2715
2716
2717
2718
2719
2720void scan_mapping_unevictable_pages(struct address_space *mapping)
2721{
2722 pgoff_t next = 0;
2723 pgoff_t end = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
2724 PAGE_CACHE_SHIFT;
2725 struct zone *zone;
2726 struct pagevec pvec;
2727
2728 if (mapping->nrpages == 0)
2729 return;
2730
2731 pagevec_init(&pvec, 0);
2732 while (next < end &&
2733 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
2734 int i;
2735 int pg_scanned = 0;
2736
2737 zone = NULL;
2738
2739 for (i = 0; i < pagevec_count(&pvec); i++) {
2740 struct page *page = pvec.pages[i];
2741 pgoff_t page_index = page->index;
2742 struct zone *pagezone = page_zone(page);
2743
2744 pg_scanned++;
2745 if (page_index > next)
2746 next = page_index;
2747 next++;
2748
2749 if (pagezone != zone) {
2750 if (zone)
2751 spin_unlock_irq(&zone->lru_lock);
2752 zone = pagezone;
2753 spin_lock_irq(&zone->lru_lock);
2754 }
2755
2756 if (PageLRU(page) && PageUnevictable(page))
2757 check_move_unevictable_page(page, zone);
2758 }
2759 if (zone)
2760 spin_unlock_irq(&zone->lru_lock);
2761 pagevec_release(&pvec);
2762
2763 count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
2764 }
2765
2766}
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL
2779static void scan_zone_unevictable_pages(struct zone *zone)
2780{
2781 struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list;
2782 unsigned long scan;
2783 unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE);
2784
2785 while (nr_to_scan > 0) {
2786 unsigned long batch_size = min(nr_to_scan,
2787 SCAN_UNEVICTABLE_BATCH_SIZE);
2788
2789 spin_lock_irq(&zone->lru_lock);
2790 for (scan = 0; scan < batch_size; scan++) {
2791 struct page *page = lru_to_page(l_unevictable);
2792
2793 if (!trylock_page(page))
2794 continue;
2795
2796 prefetchw_prev_lru_page(page, l_unevictable, flags);
2797
2798 if (likely(PageLRU(page) && PageUnevictable(page)))
2799 check_move_unevictable_page(page, zone);
2800
2801 unlock_page(page);
2802 }
2803 spin_unlock_irq(&zone->lru_lock);
2804
2805 nr_to_scan -= batch_size;
2806 }
2807}
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821static void scan_all_zones_unevictable_pages(void)
2822{
2823 struct zone *zone;
2824
2825 for_each_zone(zone) {
2826 scan_zone_unevictable_pages(zone);
2827 }
2828}
2829
2830
2831
2832
2833
2834unsigned long scan_unevictable_pages;
2835
2836int scan_unevictable_handler(struct ctl_table *table, int write,
2837 void __user *buffer,
2838 size_t *length, loff_t *ppos)
2839{
2840 proc_doulongvec_minmax(table, write, buffer, length, ppos);
2841
2842 if (write && *(unsigned long *)table->data)
2843 scan_all_zones_unevictable_pages();
2844
2845 scan_unevictable_pages = 0;
2846 return 0;
2847}
2848
2849
2850
2851
2852
2853
2854static ssize_t read_scan_unevictable_node(struct sys_device *dev,
2855 struct sysdev_attribute *attr,
2856 char *buf)
2857{
2858 return sprintf(buf, "0\n");
2859}
2860
2861static ssize_t write_scan_unevictable_node(struct sys_device *dev,
2862 struct sysdev_attribute *attr,
2863 const char *buf, size_t count)
2864{
2865 struct zone *node_zones = NODE_DATA(dev->id)->node_zones;
2866 struct zone *zone;
2867 unsigned long res;
2868 unsigned long req = strict_strtoul(buf, 10, &res);
2869
2870 if (!req)
2871 return 1;
2872
2873 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
2874 if (!populated_zone(zone))
2875 continue;
2876 scan_zone_unevictable_pages(zone);
2877 }
2878 return 1;
2879}
2880
2881
2882static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
2883 read_scan_unevictable_node,
2884 write_scan_unevictable_node);
2885
2886int scan_unevictable_register_node(struct node *node)
2887{
2888 return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
2889}
2890
2891void scan_unevictable_unregister_node(struct node *node)
2892{
2893 sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
2894}
2895
2896