1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/slab.h>
14#include <linux/kernel_stat.h>
15#include <linux/swap.h>
16#include <linux/swapctl.h>
17#include <linux/smp_lock.h>
18#include <linux/pagemap.h>
19#include <linux/init.h>
20
21#include <asm/pgtable.h>
22
23
24
25
26
27
28
29
30
31
32
33
34static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
35 unsigned long address, pte_t * page_table, int gfp_mask)
36{
37 pte_t pte;
38 unsigned long entry;
39 unsigned long page;
40 struct page * page_map;
41
42 pte = *page_table;
43 if (!pte_present(pte))
44 return 0;
45 page = pte_page(pte);
46 if (MAP_NR(page) >= max_mapnr)
47 return 0;
48 page_map = mem_map + MAP_NR(page);
49
50 if (pte_young(pte)) {
51
52
53
54
55 set_pte(page_table, pte_mkold(pte));
56 flush_tlb_page(vma, address);
57 set_bit(PG_referenced, &page_map->flags);
58 return 0;
59 }
60
61 if (PageReserved(page_map)
62 || PageLocked(page_map)
63 || ((gfp_mask & __GFP_DMA) && !PageDMA(page_map)))
64 return 0;
65
66
67
68
69
70
71
72
73
74 if (PageSwapCache(page_map)) {
75 entry = page_map->offset;
76 swap_duplicate(entry);
77 set_pte(page_table, __pte(entry));
78drop_pte:
79 vma->vm_mm->rss--;
80 flush_tlb_page(vma, address);
81 __free_page(page_map);
82 return 0;
83 }
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98 if (!pte_dirty(pte)) {
99 if (page_map->inode && pgcache_under_min())
100
101 return 0;
102 flush_cache_page(vma, address);
103 pte_clear(page_table);
104 goto drop_pte;
105 }
106
107
108
109
110
111
112 if (!(gfp_mask & __GFP_IO) || current->fs_locks)
113 return 0;
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134 flush_cache_page(vma, address);
135 if (vma->vm_ops && vma->vm_ops->swapout) {
136 pid_t pid = tsk->pid;
137 pte_clear(page_table);
138 flush_tlb_page(vma, address);
139 vma->vm_mm->rss--;
140
141 if (vma->vm_ops->swapout(vma, page_map))
142 kill_proc(pid, SIGBUS, 1);
143 __free_page(page_map);
144 return 1;
145 }
146
147
148
149
150
151
152
153 entry = get_swap_page();
154 if (!entry)
155 return 0;
156
157 vma->vm_mm->rss--;
158 tsk->nswap++;
159 set_pte(page_table, __pte(entry));
160 flush_tlb_page(vma, address);
161 swap_duplicate(entry);
162 add_to_swap_cache(page_map, entry);
163
164
165 set_bit(PG_locked, &page_map->flags);
166
167
168 rw_swap_page(WRITE, entry, (char *) page, 0);
169
170 __free_page(page_map);
171 return 1;
172}
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
189 pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
190{
191 pte_t * pte;
192 unsigned long pmd_end;
193
194 if (pmd_none(*dir))
195 return 0;
196 if (pmd_bad(*dir)) {
197 printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
198 pmd_clear(dir);
199 return 0;
200 }
201
202 pte = pte_offset(dir, address);
203
204 pmd_end = (address + PMD_SIZE) & PMD_MASK;
205 if (end > pmd_end)
206 end = pmd_end;
207
208 do {
209 int result;
210 tsk->mm->swap_address = address + PAGE_SIZE;
211 result = try_to_swap_out(tsk, vma, address, pte, gfp_mask);
212 if (result)
213 return result;
214 if (current->need_resched)
215 return 2;
216 address += PAGE_SIZE;
217 pte++;
218 } while (address < end);
219 return 0;
220}
221
222static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
223 pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
224{
225 pmd_t * pmd;
226 unsigned long pgd_end;
227
228 if (pgd_none(*dir))
229 return 0;
230 if (pgd_bad(*dir)) {
231 printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
232 pgd_clear(dir);
233 return 0;
234 }
235
236 pmd = pmd_offset(dir, address);
237
238 pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
239 if (end > pgd_end)
240 end = pgd_end;
241
242 do {
243 int result = swap_out_pmd(tsk, vma, pmd, address, end, gfp_mask);
244 if (result)
245 return result;
246 address = (address + PMD_SIZE) & PMD_MASK;
247 pmd++;
248 } while (address < end);
249 return 0;
250}
251
252static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
253 unsigned long address, int gfp_mask)
254{
255 pgd_t *pgdir;
256 unsigned long end;
257
258
259 if (vma->vm_flags & VM_LOCKED)
260 return 0;
261
262 pgdir = pgd_offset(tsk->mm, address);
263
264 end = vma->vm_end;
265 while (address < end) {
266 int result = swap_out_pgd(tsk, vma, pgdir, address, end, gfp_mask);
267 if (result)
268 return result;
269 address = (address + PGDIR_SIZE) & PGDIR_MASK;
270 pgdir++;
271 }
272 return 0;
273}
274
275static int swap_out_process(struct task_struct * p, int gfp_mask)
276{
277 unsigned long address;
278 struct vm_area_struct* vma;
279
280
281
282
283 address = p->mm->swap_address;
284
285
286
287
288 vma = find_vma(p->mm, address);
289 if (vma) {
290 if (address < vma->vm_start)
291 address = vma->vm_start;
292
293 for (;;) {
294 int result = swap_out_vma(p, vma, address, gfp_mask);
295 if (result)
296 return result;
297 vma = vma->vm_next;
298 if (!vma)
299 break;
300 address = vma->vm_start;
301 }
302 }
303
304
305 p->mm->swap_cnt = 0;
306 p->mm->swap_address = 0;
307 return 0;
308}
309
310
311
312
313
314
315static int swap_out(unsigned int priority, int gfp_mask)
316{
317 struct task_struct * p, * pbest;
318 int assign = 0, counter;
319 unsigned long max_cnt;
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335 counter = nr_tasks / priority;
336 if (counter < 1)
337 counter = 1;
338
339 for (; counter >= 0; counter--) {
340 max_cnt = 0;
341 pbest = NULL;
342 select:
343 read_lock(&tasklist_lock);
344 p = init_task.next_task;
345 for (; p != &init_task; p = p->next_task) {
346 if (!p->swappable)
347 continue;
348 if (p->mm->rss <= 0)
349 continue;
350
351 if (assign == 1)
352 p->mm->swap_cnt = p->mm->rss;
353 if (p->mm->swap_cnt > max_cnt) {
354 max_cnt = p->mm->swap_cnt;
355 pbest = p;
356 }
357 }
358 read_unlock(&tasklist_lock);
359 if (assign == 1)
360 assign = 2;
361 if (!pbest) {
362 if (!assign) {
363 assign = 1;
364 goto select;
365 }
366 goto out;
367 }
368
369 switch (swap_out_process(pbest, gfp_mask)) {
370 case 1:
371 return 1;
372 case 2:
373 current->state = TASK_RUNNING;
374 schedule();
375 }
376 }
377out:
378 return 0;
379}
380
381
382
383
384
385
386
387
388
389
390int try_to_free_pages(unsigned int gfp_mask)
391{
392 int priority;
393 int count = SWAP_CLUSTER_MAX;
394
395 lock_kernel();
396
397
398 kmem_cache_reap(gfp_mask);
399
400 priority = 5;
401 do {
402
403
404
405
406 shrink_dcache_memory(priority, gfp_mask);
407
408 while (shrink_mmap(priority, gfp_mask)) {
409 if (!--count)
410 goto done;
411 }
412
413
414 if (gfp_mask & __GFP_IO && !current->fs_locks) {
415 while (shm_swap(priority, gfp_mask)) {
416 if (!--count)
417 goto done;
418 }
419 }
420
421
422 while (swap_out(priority, gfp_mask)) {
423 if (!--count)
424 goto done;
425 }
426
427 } while (--priority > 0);
428done:
429 unlock_kernel();
430
431
432 return priority > 0;
433}
434
435
436
437
438
439
440
441void __init kswapd_setup(void)
442{
443 int i;
444 char *revision="$Revision: 1.5 $", *s, *e;
445
446 swap_setup();
447
448 if ((s = strchr(revision, ':')) &&
449 (e = strchr(s, '$')))
450 s++, i = e - s;
451 else
452 s = revision, i = -1;
453 printk ("Starting kswapd v%.*s\n", i, s);
454}
455
456struct wait_queue * kswapd_wait;
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472int kswapd(void *unused)
473{
474 struct task_struct *tsk = current;
475
476 tsk->session = 1;
477 tsk->pgrp = 1;
478 strcpy(tsk->comm, "kswapd");
479 sigfillset(&tsk->blocked);
480
481
482
483
484
485
486
487
488
489
490
491
492
493 tsk->flags |= PF_MEMALLOC;
494
495 while (1) {
496
497
498
499
500
501
502
503
504 interruptible_sleep_on(&kswapd_wait);
505
506 while (nr_free_pages < freepages.high)
507 {
508 if (try_to_free_pages(GFP_KSWAPD))
509 {
510 if (tsk->need_resched)
511 schedule();
512 continue;
513 }
514 tsk->state = TASK_INTERRUPTIBLE;
515 schedule_timeout(10*HZ);
516 }
517 }
518}
519