1
2
3
4
5
6
7
8#include <linux/malloc.h>
9#include <linux/smp_lock.h>
10#include <linux/kernel_stat.h>
11#include <linux/swap.h>
12#include <linux/swapctl.h>
13#include <linux/blkdev.h>
14#include <linux/vmalloc.h>
15#include <linux/pagemap.h>
16#include <linux/shm.h>
17
18#include <asm/pgtable.h>
19
20unsigned int nr_swapfiles = 0;
21
22struct swap_list_t swap_list = {-1, -1};
23
24struct swap_info_struct swap_info[MAX_SWAPFILES];
25
26#define SWAPFILE_CLUSTER 256
27
28static inline int scan_swap_map(struct swap_info_struct *si)
29{
30 unsigned long offset;
31
32
33
34
35
36
37
38
39 if (si->cluster_nr) {
40 while (si->cluster_next <= si->highest_bit) {
41 offset = si->cluster_next++;
42 if (si->swap_map[offset])
43 continue;
44 if (test_bit(offset, si->swap_lockmap))
45 continue;
46 si->cluster_nr--;
47 goto got_page;
48 }
49 }
50 si->cluster_nr = SWAPFILE_CLUSTER;
51 for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
52 if (si->swap_map[offset])
53 continue;
54 if (test_bit(offset, si->swap_lockmap))
55 continue;
56 si->lowest_bit = offset;
57got_page:
58 si->swap_map[offset] = 1;
59 nr_swap_pages--;
60 if (offset == si->highest_bit)
61 si->highest_bit--;
62 si->cluster_next = offset;
63 return offset;
64 }
65 return 0;
66}
67
68unsigned long get_swap_page(void)
69{
70 struct swap_info_struct * p;
71 unsigned long offset, entry;
72 int type, wrapped = 0;
73
74 type = swap_list.next;
75 if (type < 0)
76 return 0;
77 if (nr_swap_pages == 0)
78 return 0;
79
80 while (1) {
81 p = &swap_info[type];
82 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
83 offset = scan_swap_map(p);
84 if (offset) {
85 entry = SWP_ENTRY(type,offset);
86 type = swap_info[type].next;
87 if (type < 0 ||
88 p->prio != swap_info[type].prio)
89 {
90 swap_list.next = swap_list.head;
91 }
92 else
93 {
94 swap_list.next = type;
95 }
96 return entry;
97 }
98 }
99 type = p->next;
100 if (!wrapped) {
101 if (type < 0 || p->prio != swap_info[type].prio) {
102 type = swap_list.head;
103 wrapped = 1;
104 }
105 } else if (type < 0) {
106 return 0;
107 }
108 }
109}
110
111
112void swap_free(unsigned long entry)
113{
114 struct swap_info_struct * p;
115 unsigned long offset, type;
116
117 if (!entry)
118 goto out;
119
120 type = SWP_TYPE(entry);
121 if (type & SHM_SWP_TYPE)
122 goto out;
123 if (type >= nr_swapfiles)
124 goto bad_nofile;
125 p = & swap_info[type];
126 if (!(p->flags & SWP_USED))
127 goto bad_device;
128 if (p->prio > swap_info[swap_list.next].prio)
129 swap_list.next = swap_list.head;
130 offset = SWP_OFFSET(entry);
131 if (offset >= p->max)
132 goto bad_offset;
133 if (!p->swap_map[offset])
134 goto bad_free;
135 if (p->swap_map[offset] < SWAP_MAP_MAX) {
136 if (!--p->swap_map[offset])
137 {
138 if (offset < p->lowest_bit)
139 p->lowest_bit = offset;
140 if (offset > p->highest_bit)
141 p->highest_bit = offset;
142 nr_swap_pages++;
143 }
144 }
145#ifdef DEBUG_SWAP
146 printk("DebugVM: swap_free(entry %08lx, count now %d)\n",
147 entry, p->swap_map[offset]);
148#endif
149out:
150 return;
151
152bad_nofile:
153 printk("swap_free: Trying to free nonexistent swap-page\n");
154 goto out;
155bad_device:
156 printk("swap_free: Trying to free swap from unused swap-device\n");
157 goto out;
158bad_offset:
159 printk("swap_free: offset exceeds max\n");
160 goto out;
161bad_free:
162 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
163 goto out;
164}
165
166
167
168
169
170
171
172
173
174
175static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
176 pte_t *dir, unsigned long entry, unsigned long page)
177{
178 pte_t pte = *dir;
179
180 if (pte_none(pte))
181 return;
182 if (pte_present(pte)) {
183
184
185
186 if (pte_page(pte) != page)
187 return;
188
189 set_pte(dir, pte_mkdirty(pte));
190 return;
191 }
192 if (pte_val(pte) != entry)
193 return;
194 set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
195 swap_free(entry);
196 atomic_inc(&mem_map[MAP_NR(page)].count);
197 ++vma->vm_mm->rss;
198}
199
200static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
201 unsigned long address, unsigned long size, unsigned long offset,
202 unsigned long entry, unsigned long page)
203{
204 pte_t * pte;
205 unsigned long end;
206
207 if (pmd_none(*dir))
208 return;
209 if (pmd_bad(*dir)) {
210 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
211 pmd_clear(dir);
212 return;
213 }
214 pte = pte_offset(dir, address);
215 offset += address & PMD_MASK;
216 address &= ~PMD_MASK;
217 end = address + size;
218 if (end > PMD_SIZE)
219 end = PMD_SIZE;
220 do {
221 unuse_pte(vma, offset+address-vma->vm_start, pte, entry, page);
222 address += PAGE_SIZE;
223 pte++;
224 } while (address < end);
225}
226
227static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
228 unsigned long address, unsigned long size,
229 unsigned long entry, unsigned long page)
230{
231 pmd_t * pmd;
232 unsigned long offset, end;
233
234 if (pgd_none(*dir))
235 return;
236 if (pgd_bad(*dir)) {
237 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
238 pgd_clear(dir);
239 return;
240 }
241 pmd = pmd_offset(dir, address);
242 offset = address & PGDIR_MASK;
243 address &= ~PGDIR_MASK;
244 end = address + size;
245 if (end > PGDIR_SIZE)
246 end = PGDIR_SIZE;
247 do {
248 unuse_pmd(vma, pmd, address, end - address, offset, entry,
249 page);
250 address = (address + PMD_SIZE) & PMD_MASK;
251 pmd++;
252 } while (address < end);
253}
254
255static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
256 unsigned long entry, unsigned long page)
257{
258 unsigned long start = vma->vm_start, end = vma->vm_end;
259
260 while (start < end) {
261 unuse_pgd(vma, pgdir, start, end - start, entry, page);
262 start = (start + PGDIR_SIZE) & PGDIR_MASK;
263 pgdir++;
264 }
265}
266
267static void unuse_process(struct mm_struct * mm, unsigned long entry,
268 unsigned long page)
269{
270 struct vm_area_struct* vma;
271
272
273
274
275 if (!mm || mm == &init_mm)
276 return;
277 for (vma = mm->mmap; vma; vma = vma->vm_next) {
278 pgd_t * pgd = pgd_offset(mm, vma->vm_start);
279 unuse_vma(vma, pgd, entry, page);
280 }
281 return;
282}
283
284
285
286
287
288
289static int try_to_unuse(unsigned int type)
290{
291 struct swap_info_struct * si = &swap_info[type];
292 struct task_struct *p;
293 struct page *page_map;
294 unsigned long entry, page;
295 int i;
296
297 while (1) {
298
299
300
301 for (i = 1; i < si->max ; i++) {
302 if (si->swap_map[i] > 0 && si->swap_map[i] != SWAP_MAP_BAD) {
303 goto found_entry;
304 }
305 }
306 break;
307
308 found_entry:
309 entry = SWP_ENTRY(type, i);
310
311
312
313
314 page_map = read_swap_cache(entry);
315 if (!page_map) {
316
317
318
319 if (si->swap_map[i] == 0)
320 continue;
321 return -ENOMEM;
322 }
323 page = page_address(page_map);
324 read_lock(&tasklist_lock);
325 for_each_task(p)
326 unuse_process(p->mm, entry, page);
327 read_unlock(&tasklist_lock);
328 shm_unuse(entry, page);
329
330
331 if (PageSwapCache(page_map))
332 delete_from_swap_cache(page_map);
333 __free_page(page_map);
334
335
336
337 if (si->swap_map[i] != 0) {
338 if (si->swap_map[i] != SWAP_MAP_MAX)
339 printk(KERN_ERR
340 "try_to_unuse: entry %08lx count=%d\n",
341 entry, si->swap_map[i]);
342 si->swap_map[i] = 0;
343 nr_swap_pages++;
344 }
345 }
346 return 0;
347}
348
349asmlinkage int sys_swapoff(const char * specialfile)
350{
351 struct swap_info_struct * p = NULL;
352 struct dentry * dentry;
353 struct file filp;
354 int i, type, prev;
355 int err = -EPERM;
356
357 lock_kernel();
358 if (!capable(CAP_SYS_ADMIN))
359 goto out;
360
361 dentry = namei(specialfile);
362 err = PTR_ERR(dentry);
363 if (IS_ERR(dentry))
364 goto out;
365
366 prev = -1;
367 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
368 p = swap_info + type;
369 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
370 if (p->swap_file == dentry)
371 break;
372 if (S_ISBLK(dentry->d_inode->i_mode) &&
373 p->swap_device == dentry->d_inode->i_rdev)
374 break;
375 }
376 prev = type;
377 }
378 err = -EINVAL;
379 if (type < 0)
380 goto out_dput;
381
382 if (prev < 0) {
383 swap_list.head = p->next;
384 } else {
385 swap_info[prev].next = p->next;
386 }
387 if (type == swap_list.next) {
388
389 swap_list.next = swap_list.head;
390 }
391 p->flags = SWP_USED;
392 err = try_to_unuse(type);
393 if (err) {
394
395 for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next)
396 if (p->prio >= swap_info[i].prio)
397 break;
398 p->next = i;
399 if (prev < 0)
400 swap_list.head = swap_list.next = p - swap_info;
401 else
402 swap_info[prev].next = p - swap_info;
403 p->flags = SWP_WRITEOK;
404 goto out_dput;
405 }
406 if(p->swap_device){
407 memset(&filp, 0, sizeof(filp));
408 filp.f_dentry = dentry;
409 filp.f_mode = 3;
410
411 if( !blkdev_open(dentry->d_inode, &filp) &&
412 filp.f_op && filp.f_op->release){
413 filp.f_op->release(dentry->d_inode,&filp);
414 filp.f_op->release(dentry->d_inode,&filp);
415 }
416 }
417 dput(dentry);
418
419 dentry = p->swap_file;
420 p->swap_file = NULL;
421 nr_swap_pages -= p->pages;
422 p->swap_device = 0;
423 vfree(p->swap_map);
424 p->swap_map = NULL;
425 vfree(p->swap_lockmap);
426 p->swap_lockmap = NULL;
427 p->flags = 0;
428 err = 0;
429
430out_dput:
431 dput(dentry);
432out:
433 unlock_kernel();
434 return err;
435}
436
437int get_swaparea_info(char *buf)
438{
439 char * page = (char *) __get_free_page(GFP_KERNEL);
440 struct swap_info_struct *ptr = swap_info;
441 int i, j, len = 0, usedswap;
442
443 if (!page)
444 return -ENOMEM;
445
446 len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n");
447 for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
448 if ((ptr->flags & SWP_USED) && ptr->swap_map) {
449 char * path = d_path(ptr->swap_file, page, PAGE_SIZE);
450
451 len += sprintf(buf + len, "%-31s ", path);
452
453 if (!ptr->swap_device)
454 len += sprintf(buf + len, "file\t\t");
455 else
456 len += sprintf(buf + len, "partition\t");
457
458 usedswap = 0;
459 for (j = 0; j < ptr->max; ++j)
460 switch (ptr->swap_map[j]) {
461 case SWAP_MAP_BAD:
462 case 0:
463 continue;
464 default:
465 usedswap++;
466 }
467 len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10),
468 usedswap << (PAGE_SHIFT - 10), ptr->prio);
469 }
470 }
471 free_page((unsigned long) page);
472 return len;
473}
474
475
476
477
478
479
480asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
481{
482 struct swap_info_struct * p;
483 struct dentry * swap_dentry;
484 unsigned int type;
485 int i, j, prev;
486 int error = -EPERM;
487 struct file filp;
488 static int least_priority = 0;
489 union swap_header *swap_header = 0;
490 int swap_header_version;
491 int lock_map_size = PAGE_SIZE;
492 int nr_good_pages = 0;
493 unsigned long tmp_lock_map = 0;
494 int swapfilesize;
495
496 lock_kernel();
497 if (!capable(CAP_SYS_ADMIN))
498 goto out;
499 memset(&filp, 0, sizeof(filp));
500 p = swap_info;
501 for (type = 0 ; type < nr_swapfiles ; type++,p++)
502 if (!(p->flags & SWP_USED))
503 break;
504 if (type >= MAX_SWAPFILES)
505 goto out;
506 if (type >= nr_swapfiles)
507 nr_swapfiles = type+1;
508 p->flags = SWP_USED;
509 p->swap_file = NULL;
510 p->swap_device = 0;
511 p->swap_map = NULL;
512 p->swap_lockmap = NULL;
513 p->lowest_bit = 0;
514 p->highest_bit = 0;
515 p->cluster_nr = 0;
516 p->max = 1;
517 p->next = -1;
518 if (swap_flags & SWAP_FLAG_PREFER) {
519 p->prio =
520 (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
521 } else {
522 p->prio = --least_priority;
523 }
524 swap_dentry = namei(specialfile);
525 error = PTR_ERR(swap_dentry);
526 if (IS_ERR(swap_dentry))
527 goto bad_swap_2;
528
529 p->swap_file = swap_dentry;
530 error = -EINVAL;
531
532 if (S_ISBLK(swap_dentry->d_inode->i_mode)) {
533 kdev_t dev = swap_dentry->d_inode->i_rdev;
534
535 p->swap_device = dev;
536 set_blocksize(dev, PAGE_SIZE);
537
538 filp.f_dentry = swap_dentry;
539 filp.f_mode = 3;
540 error = blkdev_open(swap_dentry->d_inode, &filp);
541 if (error)
542 goto bad_swap_2;
543 set_blocksize(dev, PAGE_SIZE);
544 error = -ENODEV;
545 if (!dev || (blk_size[MAJOR(dev)] &&
546 !blk_size[MAJOR(dev)][MINOR(dev)]))
547 goto bad_swap;
548 error = -EBUSY;
549 for (i = 0 ; i < nr_swapfiles ; i++) {
550 if (i == type)
551 continue;
552 if (dev == swap_info[i].swap_device)
553 goto bad_swap;
554 }
555 swapfilesize = 0;
556 if (blk_size[MAJOR(dev)])
557 swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
558 >> (PAGE_SHIFT - 10);
559 } else if (S_ISREG(swap_dentry->d_inode->i_mode)) {
560 error = -EBUSY;
561 for (i = 0 ; i < nr_swapfiles ; i++) {
562 if (i == type || !swap_info[i].swap_file)
563 continue;
564 if (swap_dentry->d_inode == swap_info[i].swap_file->d_inode)
565 goto bad_swap;
566 }
567 swapfilesize = swap_dentry->d_inode->i_size >> PAGE_SHIFT;
568 } else
569 goto bad_swap;
570
571 swap_header = (void *) __get_free_page(GFP_USER);
572 if (!swap_header) {
573 printk("Unable to start swapping: out of memory :-)\n");
574 error = -ENOMEM;
575 goto bad_swap;
576 }
577
578 p->swap_lockmap = (char *) &tmp_lock_map;
579 rw_swap_page_nocache(READ, SWP_ENTRY(type,0), (char *) swap_header);
580 p->swap_lockmap = NULL;
581
582 if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
583 swap_header_version = 1;
584 else if (!memcmp("SWAPSPACE2",swap_header->magic.magic,10))
585 swap_header_version = 2;
586 else {
587 printk("Unable to find swap-space signature\n");
588 error = -EINVAL;
589 goto bad_swap;
590 }
591
592 switch (swap_header_version) {
593 case 1:
594 memset(((char *) swap_header)+PAGE_SIZE-10,0,10);
595 j = 0;
596 p->lowest_bit = 0;
597 p->highest_bit = 0;
598 for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
599 if (test_bit(i,(char *) swap_header)) {
600 if (!p->lowest_bit)
601 p->lowest_bit = i;
602 p->highest_bit = i;
603 p->max = i+1;
604 j++;
605 }
606 }
607 nr_good_pages = j;
608 p->swap_map = vmalloc(p->max * sizeof(short));
609 if (!p->swap_map) {
610 error = -ENOMEM;
611 goto bad_swap;
612 }
613 for (i = 1 ; i < p->max ; i++) {
614 if (test_bit(i,(char *) swap_header))
615 p->swap_map[i] = 0;
616 else
617 p->swap_map[i] = SWAP_MAP_BAD;
618 }
619 break;
620
621 case 2:
622
623
624 if (swap_header->info.version != 1) {
625 printk(KERN_WARNING
626 "Unable to handle swap header version %d\n",
627 swap_header->info.version);
628 error = -EINVAL;
629 goto bad_swap;
630 }
631
632 p->lowest_bit = 1;
633 p->highest_bit = swap_header->info.last_page - 1;
634 p->max = swap_header->info.last_page;
635
636 error = -EINVAL;
637 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
638 goto bad_swap;
639 if (p->max >= SWP_OFFSET(SWP_ENTRY(0,~0UL)))
640 goto bad_swap;
641
642
643 if (!(p->swap_map = vmalloc (p->max * sizeof(short)))) {
644 error = -ENOMEM;
645 goto bad_swap;
646 }
647
648 error = 0;
649 memset(p->swap_map, 0, p->max * sizeof(short));
650 for (i=0; i<swap_header->info.nr_badpages; i++) {
651 int page = swap_header->info.badpages[i];
652 if (page <= 0 || page >= swap_header->info.last_page)
653 error = -EINVAL;
654 else
655 p->swap_map[page] = SWAP_MAP_BAD;
656 }
657 nr_good_pages = swap_header->info.last_page -
658 swap_header->info.nr_badpages - 1;
659 lock_map_size = (p->max + 7) / 8;
660 if (error)
661 goto bad_swap;
662 }
663
664 if (swapfilesize && p->max > swapfilesize) {
665 printk(KERN_WARNING
666 "Swap area shorter than signature indicates\n");
667 error = -EINVAL;
668 goto bad_swap;
669 }
670 if (!nr_good_pages) {
671 printk(KERN_WARNING "Empty swap-file\n");
672 error = -EINVAL;
673 goto bad_swap;
674 }
675 p->swap_map[0] = SWAP_MAP_BAD;
676 if (!(p->swap_lockmap = vmalloc (lock_map_size))) {
677 error = -ENOMEM;
678 goto bad_swap;
679 }
680 memset(p->swap_lockmap,0,lock_map_size);
681 p->flags = SWP_WRITEOK;
682 p->pages = nr_good_pages;
683 nr_swap_pages += nr_good_pages;
684 printk(KERN_INFO "Adding Swap: %dk swap-space (priority %d)\n",
685 nr_good_pages<<(PAGE_SHIFT-10), p->prio);
686
687
688 prev = -1;
689 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
690 if (p->prio >= swap_info[i].prio) {
691 break;
692 }
693 prev = i;
694 }
695 p->next = i;
696 if (prev < 0) {
697 swap_list.head = swap_list.next = p - swap_info;
698 } else {
699 swap_info[prev].next = p - swap_info;
700 }
701 error = 0;
702 goto out;
703bad_swap:
704 if(filp.f_op && filp.f_op->release)
705 filp.f_op->release(filp.f_dentry->d_inode,&filp);
706bad_swap_2:
707 if (p->swap_lockmap)
708 vfree(p->swap_lockmap);
709 if (p->swap_map)
710 vfree(p->swap_map);
711 dput(p->swap_file);
712 p->swap_device = 0;
713 p->swap_file = NULL;
714 p->swap_map = NULL;
715 p->swap_lockmap = NULL;
716 p->flags = 0;
717 if (!(swap_flags & SWAP_FLAG_PREFER))
718 ++least_priority;
719out:
720 if (swap_header)
721 free_page((long) swap_header);
722 unlock_kernel();
723 return error;
724}
725
726void si_swapinfo(struct sysinfo *val)
727{
728 unsigned int i, j;
729
730 val->freeswap = val->totalswap = 0;
731 for (i = 0; i < nr_swapfiles; i++) {
732 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
733 continue;
734 for (j = 0; j < swap_info[i].max; ++j)
735 switch (swap_info[i].swap_map[j]) {
736 case SWAP_MAP_BAD:
737 continue;
738 case 0:
739 ++val->freeswap;
740 default:
741 ++val->totalswap;
742 }
743 }
744 val->freeswap <<= PAGE_SHIFT;
745 val->totalswap <<= PAGE_SHIFT;
746 return;
747}
748