1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/mm.h>
15#include <linux/slab.h>
16#include <linux/kernel_stat.h>
17#include <linux/swap.h>
18#include <linux/smp_lock.h>
19#include <linux/pagemap.h>
20#include <linux/init.h>
21#include <linux/highmem.h>
22#include <linux/file.h>
23#include <linux/writeback.h>
24#include <linux/suspend.h>
25#include <linux/buffer_head.h>
26
27#include <asm/pgalloc.h>
28#include <asm/tlbflush.h>
29#include <linux/swapops.h>
30
31
32
33
34
35
36
37#define DEF_PRIORITY (6)
38
39static inline int is_page_cache_freeable(struct page * page)
40{
41 return page_count(page) - !!PagePrivate(page) == 1;
42}
43
44
45static inline int page_mapping_inuse(struct page * page)
46{
47 struct address_space *mapping = page->mapping;
48
49
50 if (page->pte.chain)
51 return 1;
52
53
54 if (!mapping)
55 return 0;
56
57
58 if (!list_empty(&mapping->i_mmap) || !list_empty(&mapping->i_mmap_shared))
59 return 1;
60
61 return 0;
62}
63
64static int
65shrink_cache(int nr_pages, zone_t *classzone,
66 unsigned int gfp_mask, int priority, int max_scan)
67{
68 struct list_head * entry;
69 struct address_space *mapping;
70
71 spin_lock(&pagemap_lru_lock);
72 while (--max_scan >= 0 &&
73 (entry = inactive_list.prev) != &inactive_list) {
74 struct page *page;
75 int may_enter_fs;
76
77 if (need_resched()) {
78 spin_unlock(&pagemap_lru_lock);
79 __set_current_state(TASK_RUNNING);
80 schedule();
81 spin_lock(&pagemap_lru_lock);
82 continue;
83 }
84
85 page = list_entry(entry, struct page, lru);
86
87 if (unlikely(!PageLRU(page)))
88 BUG();
89 if (unlikely(PageActive(page)))
90 BUG();
91
92 list_del(entry);
93 list_add(entry, &inactive_list);
94 KERNEL_STAT_INC(pgscan);
95
96
97
98
99
100 if (unlikely(!page_count(page)))
101 continue;
102
103 if (!memclass(page_zone(page), classzone))
104 continue;
105
106
107
108
109
110 may_enter_fs = (gfp_mask & __GFP_FS) ||
111 (PageSwapCache(page) && (gfp_mask & __GFP_IO));
112
113
114
115
116 if (unlikely(PageWriteback(page))) {
117 if (may_enter_fs) {
118 page_cache_get(page);
119 spin_unlock(&pagemap_lru_lock);
120 wait_on_page_writeback(page);
121 page_cache_release(page);
122 spin_lock(&pagemap_lru_lock);
123 }
124 continue;
125 }
126
127 if (TestSetPageLocked(page))
128 continue;
129
130 if (PageWriteback(page)) {
131 unlock_page(page);
132 continue;
133 }
134
135
136
137
138
139 pte_chain_lock(page);
140 if (page_referenced(page) && page_mapping_inuse(page)) {
141 del_page_from_inactive_list(page);
142 add_page_to_active_list(page);
143 pte_chain_unlock(page);
144 unlock_page(page);
145 KERNEL_STAT_INC(pgactivate);
146 continue;
147 }
148
149
150
151
152
153
154
155 if (page->pte.chain && !page->mapping && !PagePrivate(page)) {
156 page_cache_get(page);
157 pte_chain_unlock(page);
158 spin_unlock(&pagemap_lru_lock);
159 if (!add_to_swap(page)) {
160 activate_page(page);
161 unlock_page(page);
162 page_cache_release(page);
163 spin_lock(&pagemap_lru_lock);
164 continue;
165 }
166 page_cache_release(page);
167 spin_lock(&pagemap_lru_lock);
168 pte_chain_lock(page);
169 }
170
171
172
173
174
175 if (page->pte.chain) {
176 switch (try_to_unmap(page)) {
177 case SWAP_ERROR:
178 case SWAP_FAIL:
179 goto page_active;
180 case SWAP_AGAIN:
181 pte_chain_unlock(page);
182 unlock_page(page);
183 continue;
184 case SWAP_SUCCESS:
185 ;
186 }
187 }
188 pte_chain_unlock(page);
189 mapping = page->mapping;
190
191 if (PageDirty(page) && is_page_cache_freeable(page) &&
192 page->mapping && may_enter_fs) {
193
194
195
196
197
198
199
200
201 int (*writeback)(struct page *, int *);
202 const int cluster_size = SWAP_CLUSTER_MAX;
203 int nr_to_write = cluster_size;
204
205 writeback = mapping->a_ops->vm_writeback;
206 if (writeback == NULL)
207 writeback = generic_vm_writeback;
208 page_cache_get(page);
209 spin_unlock(&pagemap_lru_lock);
210 (*writeback)(page, &nr_to_write);
211 max_scan -= (cluster_size - nr_to_write);
212 page_cache_release(page);
213 spin_lock(&pagemap_lru_lock);
214 continue;
215 }
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231 if (PagePrivate(page)) {
232 spin_unlock(&pagemap_lru_lock);
233
234
235 page_cache_get(page);
236
237 if (try_to_release_page(page, gfp_mask)) {
238 if (!mapping) {
239
240 unlock_page(page);
241 page_cache_release(page);
242
243 spin_lock(&pagemap_lru_lock);
244 if (--nr_pages)
245 continue;
246 break;
247 } else {
248
249
250
251
252
253 page_cache_release(page);
254
255 spin_lock(&pagemap_lru_lock);
256 }
257 } else {
258
259 unlock_page(page);
260 page_cache_release(page);
261
262 spin_lock(&pagemap_lru_lock);
263 continue;
264 }
265 }
266
267
268
269
270 if (mapping) {
271 write_lock(&mapping->page_lock);
272 if (is_page_cache_freeable(page))
273 goto page_freeable;
274 write_unlock(&mapping->page_lock);
275 }
276 unlock_page(page);
277 continue;
278page_freeable:
279
280
281
282
283 if (PageDirty(page)) {
284 write_unlock(&mapping->page_lock);
285 unlock_page(page);
286 continue;
287 }
288
289
290 if (likely(!PageSwapCache(page))) {
291 __remove_inode_page(page);
292 write_unlock(&mapping->page_lock);
293 } else {
294 swp_entry_t swap;
295 swap.val = page->index;
296 __delete_from_swap_cache(page);
297 write_unlock(&mapping->page_lock);
298 swap_free(swap);
299 }
300
301 __lru_cache_del(page);
302 unlock_page(page);
303
304
305 page_cache_release(page);
306 KERNEL_STAT_INC(pgsteal);
307 if (--nr_pages)
308 continue;
309 goto out;
310page_active:
311
312
313
314
315
316 del_page_from_inactive_list(page);
317 add_page_to_active_list(page);
318 pte_chain_unlock(page);
319 unlock_page(page);
320 KERNEL_STAT_INC(pgactivate);
321 }
322out: spin_unlock(&pagemap_lru_lock);
323 return nr_pages;
324}
325
326
327
328
329
330
331
332
333static void refill_inactive(int nr_pages)
334{
335 struct list_head * entry;
336
337 spin_lock(&pagemap_lru_lock);
338 entry = active_list.prev;
339 while (nr_pages-- && entry != &active_list) {
340 struct page * page;
341
342 page = list_entry(entry, struct page, lru);
343 entry = entry->prev;
344
345 KERNEL_STAT_INC(pgscan);
346
347 pte_chain_lock(page);
348 if (page->pte.chain && page_referenced(page)) {
349 list_del(&page->lru);
350 list_add(&page->lru, &active_list);
351 pte_chain_unlock(page);
352 continue;
353 }
354 del_page_from_active_list(page);
355 add_page_to_inactive_list(page);
356 pte_chain_unlock(page);
357 KERNEL_STAT_INC(pgdeactivate);
358 }
359 spin_unlock(&pagemap_lru_lock);
360}
361
362static int FASTCALL(shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages));
363static int shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages)
364{
365 int chunk_size = nr_pages;
366 unsigned long ratio;
367 struct page_state ps;
368 int max_scan;
369
370 nr_pages -= kmem_cache_reap(gfp_mask);
371 if (nr_pages <= 0)
372 return 0;
373
374 nr_pages = chunk_size;
375
376
377
378
379 get_page_state(&ps);
380 ratio = (unsigned long)nr_pages * ps.nr_active /
381 ((ps.nr_inactive | 1) * 2);
382 refill_inactive(ratio);
383 max_scan = ps.nr_inactive / priority;
384 nr_pages = shrink_cache(nr_pages, classzone,
385 gfp_mask, priority, max_scan);
386 if (nr_pages <= 0)
387 return 0;
388
389 wakeup_bdflush();
390
391 shrink_dcache_memory(priority, gfp_mask);
392
393
394 shrink_icache_memory(1, gfp_mask);
395#ifdef CONFIG_QUOTA
396 shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
397#endif
398
399 return nr_pages;
400}
401
402int try_to_free_pages(zone_t *classzone, unsigned int gfp_mask, unsigned int order)
403{
404 int priority = DEF_PRIORITY;
405 int nr_pages = SWAP_CLUSTER_MAX;
406
407 KERNEL_STAT_INC(pageoutrun);
408
409 do {
410 nr_pages = shrink_caches(classzone, priority, gfp_mask, nr_pages);
411 if (nr_pages <= 0)
412 return 1;
413 } while (--priority);
414
415
416
417
418
419 out_of_memory();
420 return 0;
421}
422
423DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
424
425static int check_classzone_need_balance(zone_t * classzone)
426{
427 zone_t * first_classzone;
428
429 first_classzone = classzone->zone_pgdat->node_zones;
430 while (classzone >= first_classzone) {
431 if (classzone->free_pages > classzone->pages_high)
432 return 0;
433 classzone--;
434 }
435 return 1;
436}
437
438static int kswapd_balance_pgdat(pg_data_t * pgdat)
439{
440 int need_more_balance = 0, i;
441 zone_t * zone;
442
443 for (i = pgdat->nr_zones-1; i >= 0; i--) {
444 zone = pgdat->node_zones + i;
445 cond_resched();
446 if (!zone->need_balance)
447 continue;
448 if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) {
449 zone->need_balance = 0;
450 __set_current_state(TASK_INTERRUPTIBLE);
451 schedule_timeout(HZ);
452 continue;
453 }
454 if (check_classzone_need_balance(zone))
455 need_more_balance = 1;
456 else
457 zone->need_balance = 0;
458 }
459
460 return need_more_balance;
461}
462
463static void kswapd_balance(void)
464{
465 int need_more_balance;
466 pg_data_t * pgdat;
467
468 do {
469 need_more_balance = 0;
470 pgdat = pgdat_list;
471 do
472 need_more_balance |= kswapd_balance_pgdat(pgdat);
473 while ((pgdat = pgdat->pgdat_next));
474 } while (need_more_balance);
475}
476
477static int kswapd_can_sleep_pgdat(pg_data_t * pgdat)
478{
479 zone_t * zone;
480 int i;
481
482 for (i = pgdat->nr_zones-1; i >= 0; i--) {
483 zone = pgdat->node_zones + i;
484 if (!zone->need_balance)
485 continue;
486 return 0;
487 }
488
489 return 1;
490}
491
492static int kswapd_can_sleep(void)
493{
494 pg_data_t * pgdat;
495
496 pgdat = pgdat_list;
497 do {
498 if (kswapd_can_sleep_pgdat(pgdat))
499 continue;
500 return 0;
501 } while ((pgdat = pgdat->pgdat_next));
502
503 return 1;
504}
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519int kswapd(void *unused)
520{
521 struct task_struct *tsk = current;
522 DECLARE_WAITQUEUE(wait, tsk);
523
524 daemonize();
525 strcpy(tsk->comm, "kswapd");
526 sigfillset(&tsk->blocked);
527
528
529
530
531
532
533
534
535
536
537
538
539
540 tsk->flags |= PF_MEMALLOC;
541
542
543
544
545 for (;;) {
546 if (current->flags & PF_FREEZE)
547 refrigerator(PF_IOTHREAD);
548 __set_current_state(TASK_INTERRUPTIBLE);
549 add_wait_queue(&kswapd_wait, &wait);
550
551 mb();
552 if (kswapd_can_sleep())
553 schedule();
554
555 __set_current_state(TASK_RUNNING);
556 remove_wait_queue(&kswapd_wait, &wait);
557
558
559
560
561
562
563 kswapd_balance();
564 blk_run_queues();
565 }
566}
567
568static int __init kswapd_init(void)
569{
570 printk("Starting kswapd\n");
571 swap_setup();
572 kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
573 return 0;
574}
575
576module_init(kswapd_init)
577