1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/mm.h>
15#include <linux/slab.h>
16#include <linux/kernel_stat.h>
17#include <linux/swap.h>
18#include <linux/swapctl.h>
19#include <linux/smp_lock.h>
20#include <linux/pagemap.h>
21#include <linux/init.h>
22#include <linux/highmem.h>
23#include <linux/file.h>
24#include <linux/writeback.h>
25#include <linux/suspend.h>
26#include <linux/buffer_head.h>
27
28#include <asm/pgalloc.h>
29#include <asm/tlbflush.h>
30#include <linux/swapops.h>
31
32
33
34
35
36
37
38#define DEF_PRIORITY (6)
39
40static inline int is_page_cache_freeable(struct page * page)
41{
42 return page_count(page) - !!PagePrivate(page) == 1;
43}
44
45
46static inline int page_mapping_inuse(struct page * page)
47{
48 struct address_space *mapping = page->mapping;
49
50
51 if (page->pte.chain)
52 return 1;
53
54
55 if (!mapping)
56 return 0;
57
58
59 if (!list_empty(&mapping->i_mmap) || !list_empty(&mapping->i_mmap_shared))
60 return 1;
61
62 return 0;
63}
64
65static int
66shrink_cache(int nr_pages, zone_t *classzone,
67 unsigned int gfp_mask, int priority, int max_scan)
68{
69 struct list_head * entry;
70 struct address_space *mapping;
71
72 spin_lock(&pagemap_lru_lock);
73 while (--max_scan >= 0 &&
74 (entry = inactive_list.prev) != &inactive_list) {
75 struct page *page;
76 int may_enter_fs;
77
78 if (need_resched()) {
79 spin_unlock(&pagemap_lru_lock);
80 __set_current_state(TASK_RUNNING);
81 schedule();
82 spin_lock(&pagemap_lru_lock);
83 continue;
84 }
85
86 page = list_entry(entry, struct page, lru);
87
88 if (unlikely(!PageLRU(page)))
89 BUG();
90 if (unlikely(PageActive(page)))
91 BUG();
92
93 list_del(entry);
94 list_add(entry, &inactive_list);
95 KERNEL_STAT_INC(pgscan);
96
97
98
99
100
101 if (unlikely(!page_count(page)))
102 continue;
103
104 if (!memclass(page_zone(page), classzone))
105 continue;
106
107
108
109
110
111 may_enter_fs = (gfp_mask & __GFP_FS) ||
112 (PageSwapCache(page) && (gfp_mask & __GFP_IO));
113
114
115
116
117 if (unlikely(PageWriteback(page))) {
118 if (may_enter_fs) {
119 page_cache_get(page);
120 spin_unlock(&pagemap_lru_lock);
121 wait_on_page_writeback(page);
122 page_cache_release(page);
123 spin_lock(&pagemap_lru_lock);
124 }
125 continue;
126 }
127
128 if (TestSetPageLocked(page))
129 continue;
130
131 if (PageWriteback(page)) {
132 unlock_page(page);
133 continue;
134 }
135
136
137
138
139
140 pte_chain_lock(page);
141 if (page_referenced(page) && page_mapping_inuse(page)) {
142 del_page_from_inactive_list(page);
143 add_page_to_active_list(page);
144 pte_chain_unlock(page);
145 unlock_page(page);
146 KERNEL_STAT_INC(pgactivate);
147 continue;
148 }
149
150
151
152
153
154
155
156 if (page->pte.chain && !page->mapping && !PagePrivate(page)) {
157 page_cache_get(page);
158 pte_chain_unlock(page);
159 spin_unlock(&pagemap_lru_lock);
160 if (!add_to_swap(page)) {
161 activate_page(page);
162 unlock_page(page);
163 page_cache_release(page);
164 spin_lock(&pagemap_lru_lock);
165 continue;
166 }
167 page_cache_release(page);
168 spin_lock(&pagemap_lru_lock);
169 pte_chain_lock(page);
170 }
171
172
173
174
175
176 if (page->pte.chain) {
177 switch (try_to_unmap(page)) {
178 case SWAP_ERROR:
179 case SWAP_FAIL:
180 goto page_active;
181 case SWAP_AGAIN:
182 pte_chain_unlock(page);
183 unlock_page(page);
184 continue;
185 case SWAP_SUCCESS:
186 ;
187 }
188 }
189 pte_chain_unlock(page);
190 mapping = page->mapping;
191
192 if (PageDirty(page) && is_page_cache_freeable(page) &&
193 page->mapping && may_enter_fs) {
194
195
196
197
198
199
200
201
202 int (*writeback)(struct page *, int *);
203 const int nr_pages = SWAP_CLUSTER_MAX;
204 int nr_to_write = nr_pages;
205
206 writeback = mapping->a_ops->vm_writeback;
207 if (writeback == NULL)
208 writeback = generic_vm_writeback;
209 page_cache_get(page);
210 spin_unlock(&pagemap_lru_lock);
211 (*writeback)(page, &nr_to_write);
212 max_scan -= (nr_pages - nr_to_write);
213 page_cache_release(page);
214 spin_lock(&pagemap_lru_lock);
215 continue;
216 }
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232 if (PagePrivate(page)) {
233 spin_unlock(&pagemap_lru_lock);
234
235
236 page_cache_get(page);
237
238 if (try_to_release_page(page, gfp_mask)) {
239 if (!mapping) {
240
241 unlock_page(page);
242 page_cache_release(page);
243
244 spin_lock(&pagemap_lru_lock);
245 if (--nr_pages)
246 continue;
247 break;
248 } else {
249
250
251
252
253
254 page_cache_release(page);
255
256 spin_lock(&pagemap_lru_lock);
257 }
258 } else {
259
260 unlock_page(page);
261 page_cache_release(page);
262
263 spin_lock(&pagemap_lru_lock);
264 continue;
265 }
266 }
267
268
269
270
271 if (mapping) {
272 write_lock(&mapping->page_lock);
273 if (is_page_cache_freeable(page))
274 goto page_freeable;
275 write_unlock(&mapping->page_lock);
276 }
277 unlock_page(page);
278 continue;
279page_freeable:
280
281
282
283
284 if (PageDirty(page)) {
285 write_unlock(&mapping->page_lock);
286 unlock_page(page);
287 continue;
288 }
289
290
291 if (likely(!PageSwapCache(page))) {
292 __remove_inode_page(page);
293 write_unlock(&mapping->page_lock);
294 } else {
295 swp_entry_t swap;
296 swap.val = page->index;
297 __delete_from_swap_cache(page);
298 write_unlock(&mapping->page_lock);
299 swap_free(swap);
300 }
301
302 __lru_cache_del(page);
303 unlock_page(page);
304
305
306 page_cache_release(page);
307 KERNEL_STAT_INC(pgsteal);
308 if (--nr_pages)
309 continue;
310 goto out;
311page_active:
312
313
314
315
316
317 del_page_from_inactive_list(page);
318 add_page_to_active_list(page);
319 pte_chain_unlock(page);
320 unlock_page(page);
321 KERNEL_STAT_INC(pgactivate);
322 }
323out: spin_unlock(&pagemap_lru_lock);
324 return nr_pages;
325}
326
327
328
329
330
331
332
333
334static void refill_inactive(int nr_pages)
335{
336 struct list_head * entry;
337
338 spin_lock(&pagemap_lru_lock);
339 entry = active_list.prev;
340 while (nr_pages-- && entry != &active_list) {
341 struct page * page;
342
343 page = list_entry(entry, struct page, lru);
344 entry = entry->prev;
345
346 KERNEL_STAT_INC(pgscan);
347
348 pte_chain_lock(page);
349 if (page->pte.chain && page_referenced(page)) {
350 list_del(&page->lru);
351 list_add(&page->lru, &active_list);
352 pte_chain_unlock(page);
353 continue;
354 }
355 del_page_from_active_list(page);
356 add_page_to_inactive_list(page);
357 pte_chain_unlock(page);
358 KERNEL_STAT_INC(pgdeactivate);
359 }
360 spin_unlock(&pagemap_lru_lock);
361}
362
363static int FASTCALL(shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages));
364static int shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages)
365{
366 int chunk_size = nr_pages;
367 unsigned long ratio;
368 struct page_state ps;
369 int max_scan;
370
371 nr_pages -= kmem_cache_reap(gfp_mask);
372 if (nr_pages <= 0)
373 return 0;
374
375 nr_pages = chunk_size;
376
377
378
379
380 get_page_state(&ps);
381 ratio = (unsigned long)nr_pages * ps.nr_active /
382 ((ps.nr_inactive | 1) * 2);
383 refill_inactive(ratio);
384 max_scan = ps.nr_inactive / priority;
385 nr_pages = shrink_cache(nr_pages, classzone,
386 gfp_mask, priority, max_scan);
387 if (nr_pages <= 0)
388 return 0;
389
390 wakeup_bdflush();
391
392 shrink_dcache_memory(priority, gfp_mask);
393
394
395 shrink_icache_memory(1, gfp_mask);
396#ifdef CONFIG_QUOTA
397 shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
398#endif
399
400 return nr_pages;
401}
402
403int try_to_free_pages(zone_t *classzone, unsigned int gfp_mask, unsigned int order)
404{
405 int priority = DEF_PRIORITY;
406 int nr_pages = SWAP_CLUSTER_MAX;
407
408 KERNEL_STAT_INC(pageoutrun);
409
410 do {
411 nr_pages = shrink_caches(classzone, priority, gfp_mask, nr_pages);
412 if (nr_pages <= 0)
413 return 1;
414 } while (--priority);
415
416
417
418
419
420 out_of_memory();
421 return 0;
422}
423
424DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
425
426static int check_classzone_need_balance(zone_t * classzone)
427{
428 zone_t * first_classzone;
429
430 first_classzone = classzone->zone_pgdat->node_zones;
431 while (classzone >= first_classzone) {
432 if (classzone->free_pages > classzone->pages_high)
433 return 0;
434 classzone--;
435 }
436 return 1;
437}
438
439static int kswapd_balance_pgdat(pg_data_t * pgdat)
440{
441 int need_more_balance = 0, i;
442 zone_t * zone;
443
444 for (i = pgdat->nr_zones-1; i >= 0; i--) {
445 zone = pgdat->node_zones + i;
446 cond_resched();
447 if (!zone->need_balance)
448 continue;
449 if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) {
450 zone->need_balance = 0;
451 __set_current_state(TASK_INTERRUPTIBLE);
452 schedule_timeout(HZ);
453 continue;
454 }
455 if (check_classzone_need_balance(zone))
456 need_more_balance = 1;
457 else
458 zone->need_balance = 0;
459 }
460
461 return need_more_balance;
462}
463
464static void kswapd_balance(void)
465{
466 int need_more_balance;
467 pg_data_t * pgdat;
468
469 do {
470 need_more_balance = 0;
471 pgdat = pgdat_list;
472 do
473 need_more_balance |= kswapd_balance_pgdat(pgdat);
474 while ((pgdat = pgdat->node_next));
475 } while (need_more_balance);
476}
477
478static int kswapd_can_sleep_pgdat(pg_data_t * pgdat)
479{
480 zone_t * zone;
481 int i;
482
483 for (i = pgdat->nr_zones-1; i >= 0; i--) {
484 zone = pgdat->node_zones + i;
485 if (!zone->need_balance)
486 continue;
487 return 0;
488 }
489
490 return 1;
491}
492
493static int kswapd_can_sleep(void)
494{
495 pg_data_t * pgdat;
496
497 pgdat = pgdat_list;
498 do {
499 if (kswapd_can_sleep_pgdat(pgdat))
500 continue;
501 return 0;
502 } while ((pgdat = pgdat->node_next));
503
504 return 1;
505}
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520int kswapd(void *unused)
521{
522 struct task_struct *tsk = current;
523 DECLARE_WAITQUEUE(wait, tsk);
524
525 daemonize();
526 strcpy(tsk->comm, "kswapd");
527 sigfillset(&tsk->blocked);
528
529
530
531
532
533
534
535
536
537
538
539
540
541 tsk->flags |= PF_MEMALLOC;
542
543
544
545
546 for (;;) {
547 if (current->flags & PF_FREEZE)
548 refrigerator(PF_IOTHREAD);
549 __set_current_state(TASK_INTERRUPTIBLE);
550 add_wait_queue(&kswapd_wait, &wait);
551
552 mb();
553 if (kswapd_can_sleep())
554 schedule();
555
556 __set_current_state(TASK_RUNNING);
557 remove_wait_queue(&kswapd_wait, &wait);
558
559
560
561
562
563
564 kswapd_balance();
565 blk_run_queues();
566 }
567}
568
569static int __init kswapd_init(void)
570{
571 printk("Starting kswapd\n");
572 swap_setup();
573 kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
574 return 0;
575}
576
577module_init(kswapd_init)
578