1
2
3
4
5
6
7
8#include <linux/config.h>
9#include <linux/mm.h>
10#include <linux/hugetlb.h>
11#include <linux/mman.h>
12#include <linux/slab.h>
13#include <linux/kernel_stat.h>
14#include <linux/swap.h>
15#include <linux/vmalloc.h>
16#include <linux/pagemap.h>
17#include <linux/namei.h>
18#include <linux/shm.h>
19#include <linux/blkdev.h>
20#include <linux/writeback.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/init.h>
24#include <linux/module.h>
25#include <linux/rmap.h>
26#include <linux/security.h>
27#include <linux/acct.h>
28#include <linux/backing-dev.h>
29#include <linux/syscalls.h>
30
31#include <asm/pgtable.h>
32#include <asm/tlbflush.h>
33#include <linux/swapops.h>
34
35DEFINE_SPINLOCK(swaplock);
36unsigned int nr_swapfiles;
37long total_swap_pages;
38static int swap_overflow;
39
40EXPORT_SYMBOL(total_swap_pages);
41
42static const char Bad_file[] = "Bad swap file entry ";
43static const char Unused_file[] = "Unused swap file entry ";
44static const char Bad_offset[] = "Bad swap offset entry ";
45static const char Unused_offset[] = "Unused swap offset entry ";
46
47struct swap_list_t swap_list = {-1, -1};
48
49struct swap_info_struct swap_info[MAX_SWAPFILES];
50
51static DECLARE_MUTEX(swapon_sem);
52
53
54
55
56
57
58static DECLARE_RWSEM(swap_unplug_sem);
59
60#define SWAPFILE_CLUSTER 256
61
62void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
63{
64 swp_entry_t entry;
65
66 down_read(&swap_unplug_sem);
67 entry.val = page->private;
68 if (PageSwapCache(page)) {
69 struct block_device *bdev = swap_info[swp_type(entry)].bdev;
70 struct backing_dev_info *bdi;
71
72
73
74
75
76
77
78
79
80 WARN_ON(page_count(page) <= 1);
81
82 bdi = bdev->bd_inode->i_mapping->backing_dev_info;
83 bdi->unplug_io_fn(bdi, page);
84 }
85 up_read(&swap_unplug_sem);
86}
87
88static inline int scan_swap_map(struct swap_info_struct *si)
89{
90 unsigned long offset;
91
92
93
94
95
96
97
98
99 if (si->cluster_nr) {
100 while (si->cluster_next <= si->highest_bit) {
101 offset = si->cluster_next++;
102 if (si->swap_map[offset])
103 continue;
104 si->cluster_nr--;
105 goto got_page;
106 }
107 }
108 si->cluster_nr = SWAPFILE_CLUSTER;
109
110
111 offset = si->lowest_bit;
112 check_next_cluster:
113 if (offset+SWAPFILE_CLUSTER-1 <= si->highest_bit)
114 {
115 unsigned long nr;
116 for (nr = offset; nr < offset+SWAPFILE_CLUSTER; nr++)
117 if (si->swap_map[nr])
118 {
119 offset = nr+1;
120 goto check_next_cluster;
121 }
122
123
124
125 goto got_page;
126 }
127
128 for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
129 if (si->swap_map[offset])
130 continue;
131 si->lowest_bit = offset+1;
132 got_page:
133 if (offset == si->lowest_bit)
134 si->lowest_bit++;
135 if (offset == si->highest_bit)
136 si->highest_bit--;
137 if (si->lowest_bit > si->highest_bit) {
138 si->lowest_bit = si->max;
139 si->highest_bit = 0;
140 }
141 si->swap_map[offset] = 1;
142 si->inuse_pages++;
143 nr_swap_pages--;
144 si->cluster_next = offset+1;
145 return offset;
146 }
147 si->lowest_bit = si->max;
148 si->highest_bit = 0;
149 return 0;
150}
151
152swp_entry_t get_swap_page(void)
153{
154 struct swap_info_struct * p;
155 unsigned long offset;
156 swp_entry_t entry;
157 int type, wrapped = 0;
158
159 entry.val = 0;
160 swap_list_lock();
161 type = swap_list.next;
162 if (type < 0)
163 goto out;
164 if (nr_swap_pages <= 0)
165 goto out;
166
167 while (1) {
168 p = &swap_info[type];
169 if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) {
170 swap_device_lock(p);
171 offset = scan_swap_map(p);
172 swap_device_unlock(p);
173 if (offset) {
174 entry = swp_entry(type,offset);
175 type = swap_info[type].next;
176 if (type < 0 ||
177 p->prio != swap_info[type].prio) {
178 swap_list.next = swap_list.head;
179 } else {
180 swap_list.next = type;
181 }
182 goto out;
183 }
184 }
185 type = p->next;
186 if (!wrapped) {
187 if (type < 0 || p->prio != swap_info[type].prio) {
188 type = swap_list.head;
189 wrapped = 1;
190 }
191 } else
192 if (type < 0)
193 goto out;
194 }
195out:
196 swap_list_unlock();
197 return entry;
198}
199
200static struct swap_info_struct * swap_info_get(swp_entry_t entry)
201{
202 struct swap_info_struct * p;
203 unsigned long offset, type;
204
205 if (!entry.val)
206 goto out;
207 type = swp_type(entry);
208 if (type >= nr_swapfiles)
209 goto bad_nofile;
210 p = & swap_info[type];
211 if (!(p->flags & SWP_USED))
212 goto bad_device;
213 offset = swp_offset(entry);
214 if (offset >= p->max)
215 goto bad_offset;
216 if (!p->swap_map[offset])
217 goto bad_free;
218 swap_list_lock();
219 if (p->prio > swap_info[swap_list.next].prio)
220 swap_list.next = type;
221 swap_device_lock(p);
222 return p;
223
224bad_free:
225 printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);
226 goto out;
227bad_offset:
228 printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val);
229 goto out;
230bad_device:
231 printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val);
232 goto out;
233bad_nofile:
234 printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);
235out:
236 return NULL;
237}
238
239static void swap_info_put(struct swap_info_struct * p)
240{
241 swap_device_unlock(p);
242 swap_list_unlock();
243}
244
245static int swap_entry_free(struct swap_info_struct *p, unsigned long offset)
246{
247 int count = p->swap_map[offset];
248
249 if (count < SWAP_MAP_MAX) {
250 count--;
251 p->swap_map[offset] = count;
252 if (!count) {
253 if (offset < p->lowest_bit)
254 p->lowest_bit = offset;
255 if (offset > p->highest_bit)
256 p->highest_bit = offset;
257 nr_swap_pages++;
258 p->inuse_pages--;
259 }
260 }
261 return count;
262}
263
264
265
266
267
268void swap_free(swp_entry_t entry)
269{
270 struct swap_info_struct * p;
271
272 p = swap_info_get(entry);
273 if (p) {
274 swap_entry_free(p, swp_offset(entry));
275 swap_info_put(p);
276 }
277}
278
279
280
281
282
283static int exclusive_swap_page(struct page *page)
284{
285 int retval = 0;
286 struct swap_info_struct * p;
287 swp_entry_t entry;
288
289 entry.val = page->private;
290 p = swap_info_get(entry);
291 if (p) {
292
293 if (p->swap_map[swp_offset(entry)] == 1) {
294
295 spin_lock_irq(&swapper_space.tree_lock);
296 if (page_count(page) == 2)
297 retval = 1;
298 spin_unlock_irq(&swapper_space.tree_lock);
299 }
300 swap_info_put(p);
301 }
302 return retval;
303}
304
305
306
307
308
309
310
311
312
313int can_share_swap_page(struct page *page)
314{
315 int retval = 0;
316
317 if (!PageLocked(page))
318 BUG();
319 switch (page_count(page)) {
320 case 3:
321 if (!PagePrivate(page))
322 break;
323
324 case 2:
325 if (!PageSwapCache(page))
326 break;
327 retval = exclusive_swap_page(page);
328 break;
329 case 1:
330 if (PageReserved(page))
331 break;
332 retval = 1;
333 }
334 return retval;
335}
336
337
338
339
340
341int remove_exclusive_swap_page(struct page *page)
342{
343 int retval;
344 struct swap_info_struct * p;
345 swp_entry_t entry;
346
347 BUG_ON(PagePrivate(page));
348 BUG_ON(!PageLocked(page));
349
350 if (!PageSwapCache(page))
351 return 0;
352 if (PageWriteback(page))
353 return 0;
354 if (page_count(page) != 2)
355 return 0;
356
357 entry.val = page->private;
358 p = swap_info_get(entry);
359 if (!p)
360 return 0;
361
362
363 retval = 0;
364 if (p->swap_map[swp_offset(entry)] == 1) {
365
366 spin_lock_irq(&swapper_space.tree_lock);
367 if ((page_count(page) == 2) && !PageWriteback(page)) {
368 __delete_from_swap_cache(page);
369 SetPageDirty(page);
370 retval = 1;
371 }
372 spin_unlock_irq(&swapper_space.tree_lock);
373 }
374 swap_info_put(p);
375
376 if (retval) {
377 swap_free(entry);
378 page_cache_release(page);
379 }
380
381 return retval;
382}
383
384
385
386
387
388void free_swap_and_cache(swp_entry_t entry)
389{
390 struct swap_info_struct * p;
391 struct page *page = NULL;
392
393 p = swap_info_get(entry);
394 if (p) {
395 if (swap_entry_free(p, swp_offset(entry)) == 1) {
396 spin_lock_irq(&swapper_space.tree_lock);
397 page = radix_tree_lookup(&swapper_space.page_tree,
398 entry.val);
399 if (page && TestSetPageLocked(page))
400 page = NULL;
401 spin_unlock_irq(&swapper_space.tree_lock);
402 }
403 swap_info_put(p);
404 }
405 if (page) {
406 int one_user;
407
408 BUG_ON(PagePrivate(page));
409 page_cache_get(page);
410 one_user = (page_count(page) == 2);
411
412 if (!PageWriteback(page) && (one_user || vm_swap_full())) {
413 delete_from_swap_cache(page);
414 SetPageDirty(page);
415 }
416 unlock_page(page);
417 page_cache_release(page);
418 }
419}
420
421
422
423
424
425
426
427
428
429
430
431static void
432unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
433 swp_entry_t entry, struct page *page)
434{
435 vma->vm_mm->rss++;
436 get_page(page);
437 set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
438 page_add_anon_rmap(page, vma, address);
439 swap_free(entry);
440 acct_update_integrals();
441 update_mem_hiwater();
442}
443
444
445static unsigned long unuse_pmd(struct vm_area_struct *vma, pmd_t *dir,
446 unsigned long address, unsigned long end,
447 swp_entry_t entry, struct page *page)
448{
449 pte_t *pte;
450 pte_t swp_pte = swp_entry_to_pte(entry);
451
452 if (pmd_none(*dir))
453 return 0;
454 if (pmd_bad(*dir)) {
455 pmd_ERROR(*dir);
456 pmd_clear(dir);
457 return 0;
458 }
459 pte = pte_offset_map(dir, address);
460 do {
461
462
463
464
465 if (unlikely(pte_same(*pte, swp_pte))) {
466 unuse_pte(vma, address, pte, entry, page);
467 pte_unmap(pte);
468
469
470
471
472
473 activate_page(page);
474
475
476 return 1 + address;
477 }
478 address += PAGE_SIZE;
479 pte++;
480 } while (address < end);
481 pte_unmap(pte - 1);
482 return 0;
483}
484
485
486static unsigned long unuse_pud(struct vm_area_struct *vma, pud_t *pud,
487 unsigned long address, unsigned long end,
488 swp_entry_t entry, struct page *page)
489{
490 pmd_t *pmd;
491 unsigned long next;
492 unsigned long foundaddr;
493
494 if (pud_none(*pud))
495 return 0;
496 if (pud_bad(*pud)) {
497 pud_ERROR(*pud);
498 pud_clear(pud);
499 return 0;
500 }
501 pmd = pmd_offset(pud, address);
502 do {
503 next = (address + PMD_SIZE) & PMD_MASK;
504 if (next > end || !next)
505 next = end;
506 foundaddr = unuse_pmd(vma, pmd, address, next, entry, page);
507 if (foundaddr)
508 return foundaddr;
509 address = next;
510 pmd++;
511 } while (address < end);
512 return 0;
513}
514
515
516static unsigned long unuse_pgd(struct vm_area_struct *vma, pgd_t *pgd,
517 unsigned long address, unsigned long end,
518 swp_entry_t entry, struct page *page)
519{
520 pud_t *pud;
521 unsigned long next;
522 unsigned long foundaddr;
523
524 if (pgd_none(*pgd))
525 return 0;
526 if (pgd_bad(*pgd)) {
527 pgd_ERROR(*pgd);
528 pgd_clear(pgd);
529 return 0;
530 }
531 pud = pud_offset(pgd, address);
532 do {
533 next = (address + PUD_SIZE) & PUD_MASK;
534 if (next > end || !next)
535 next = end;
536 foundaddr = unuse_pud(vma, pud, address, next, entry, page);
537 if (foundaddr)
538 return foundaddr;
539 address = next;
540 pud++;
541 } while (address < end);
542 return 0;
543}
544
545
546static unsigned long unuse_vma(struct vm_area_struct *vma,
547 swp_entry_t entry, struct page *page)
548{
549 pgd_t *pgd;
550 unsigned long address, next, end;
551 unsigned long foundaddr;
552
553 if (page->mapping) {
554 address = page_address_in_vma(page, vma);
555 if (address == -EFAULT)
556 return 0;
557 else
558 end = address + PAGE_SIZE;
559 } else {
560 address = vma->vm_start;
561 end = vma->vm_end;
562 }
563 pgd = pgd_offset(vma->vm_mm, address);
564 do {
565 next = (address + PGDIR_SIZE) & PGDIR_MASK;
566 if (next > end || !next)
567 next = end;
568 foundaddr = unuse_pgd(vma, pgd, address, next, entry, page);
569 if (foundaddr)
570 return foundaddr;
571 address = next;
572 pgd++;
573 } while (address < end);
574 return 0;
575}
576
577static int unuse_process(struct mm_struct * mm,
578 swp_entry_t entry, struct page* page)
579{
580 struct vm_area_struct* vma;
581 unsigned long foundaddr = 0;
582
583
584
585
586 if (!down_read_trylock(&mm->mmap_sem)) {
587
588
589
590
591 unlock_page(page);
592 down_read(&mm->mmap_sem);
593 lock_page(page);
594 }
595 spin_lock(&mm->page_table_lock);
596 for (vma = mm->mmap; vma; vma = vma->vm_next) {
597 if (vma->anon_vma) {
598 foundaddr = unuse_vma(vma, entry, page);
599 if (foundaddr)
600 break;
601 }
602 }
603 spin_unlock(&mm->page_table_lock);
604 up_read(&mm->mmap_sem);
605
606
607
608
609 return 0;
610}
611
612
613
614
615
616static int find_next_to_unuse(struct swap_info_struct *si, int prev)
617{
618 int max = si->max;
619 int i = prev;
620 int count;
621
622
623
624
625
626
627
628 for (;;) {
629 if (++i >= max) {
630 if (!prev) {
631 i = 0;
632 break;
633 }
634
635
636
637
638 max = prev + 1;
639 prev = 0;
640 i = 1;
641 }
642 count = si->swap_map[i];
643 if (count && count != SWAP_MAP_BAD)
644 break;
645 }
646 return i;
647}
648
649
650
651
652
653
654static int try_to_unuse(unsigned int type)
655{
656 struct swap_info_struct * si = &swap_info[type];
657 struct mm_struct *start_mm;
658 unsigned short *swap_map;
659 unsigned short swcount;
660 struct page *page;
661 swp_entry_t entry;
662 int i = 0;
663 int retval = 0;
664 int reset_overflow = 0;
665 int shmem;
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682 start_mm = &init_mm;
683 atomic_inc(&init_mm.mm_users);
684
685
686
687
688
689
690 while ((i = find_next_to_unuse(si, i)) != 0) {
691 if (signal_pending(current)) {
692 retval = -EINTR;
693 break;
694 }
695
696
697
698
699
700
701 swap_map = &si->swap_map[i];
702 entry = swp_entry(type, i);
703 page = read_swap_cache_async(entry, NULL, 0);
704 if (!page) {
705
706
707
708
709
710
711 if (!*swap_map)
712 continue;
713 retval = -ENOMEM;
714 break;
715 }
716
717
718
719
720 if (atomic_read(&start_mm->mm_users) == 1) {
721 mmput(start_mm);
722 start_mm = &init_mm;
723 atomic_inc(&init_mm.mm_users);
724 }
725
726
727
728
729
730
731
732
733
734 wait_on_page_locked(page);
735 wait_on_page_writeback(page);
736 lock_page(page);
737 wait_on_page_writeback(page);
738
739
740
741
742
743
744 shmem = 0;
745 swcount = *swap_map;
746 if (swcount > 1) {
747 if (start_mm == &init_mm)
748 shmem = shmem_unuse(entry, page);
749 else
750 retval = unuse_process(start_mm, entry, page);
751 }
752 if (*swap_map > 1) {
753 int set_start_mm = (*swap_map >= swcount);
754 struct list_head *p = &start_mm->mmlist;
755 struct mm_struct *new_start_mm = start_mm;
756 struct mm_struct *prev_mm = start_mm;
757 struct mm_struct *mm;
758
759 atomic_inc(&new_start_mm->mm_users);
760 atomic_inc(&prev_mm->mm_users);
761 spin_lock(&mmlist_lock);
762 while (*swap_map > 1 && !retval &&
763 (p = p->next) != &start_mm->mmlist) {
764 mm = list_entry(p, struct mm_struct, mmlist);
765 if (atomic_inc_return(&mm->mm_users) == 1) {
766 atomic_dec(&mm->mm_users);
767 continue;
768 }
769 spin_unlock(&mmlist_lock);
770 mmput(prev_mm);
771 prev_mm = mm;
772
773 cond_resched();
774
775 swcount = *swap_map;
776 if (swcount <= 1)
777 ;
778 else if (mm == &init_mm) {
779 set_start_mm = 1;
780 shmem = shmem_unuse(entry, page);
781 } else
782 retval = unuse_process(mm, entry, page);
783 if (set_start_mm && *swap_map < swcount) {
784 mmput(new_start_mm);
785 atomic_inc(&mm->mm_users);
786 new_start_mm = mm;
787 set_start_mm = 0;
788 }
789 spin_lock(&mmlist_lock);
790 }
791 spin_unlock(&mmlist_lock);
792 mmput(prev_mm);
793 mmput(start_mm);
794 start_mm = new_start_mm;
795 }
796 if (retval) {
797 unlock_page(page);
798 page_cache_release(page);
799 break;
800 }
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815 if (*swap_map == SWAP_MAP_MAX) {
816 swap_device_lock(si);
817 *swap_map = 1;
818 swap_device_unlock(si);
819 reset_overflow = 1;
820 }
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841 if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
842 struct writeback_control wbc = {
843 .sync_mode = WB_SYNC_NONE,
844 };
845
846 swap_writepage(page, &wbc);
847 lock_page(page);
848 wait_on_page_writeback(page);
849 }
850 if (PageSwapCache(page)) {
851 if (shmem)
852 swap_duplicate(entry);
853 else
854 delete_from_swap_cache(page);
855 }
856
857
858
859
860
861
862 SetPageDirty(page);
863 unlock_page(page);
864 page_cache_release(page);
865
866
867
868
869
870 cond_resched();
871 }
872
873 mmput(start_mm);
874 if (reset_overflow) {
875 printk(KERN_WARNING "swapoff: cleared swap entry overflow\n");
876 swap_overflow = 0;
877 }
878 return retval;
879}
880
881
882
883
884
885
886
887static void drain_mmlist(void)
888{
889 struct list_head *p, *next;
890 unsigned int i;
891
892 for (i = 0; i < nr_swapfiles; i++)
893 if (swap_info[i].inuse_pages)
894 return;
895 spin_lock(&mmlist_lock);
896 list_for_each_safe(p, next, &init_mm.mmlist)
897 list_del_init(p);
898 spin_unlock(&mmlist_lock);
899}
900
901
902
903
904
905sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset)
906{
907 struct swap_extent *se = sis->curr_swap_extent;
908 struct swap_extent *start_se = se;
909
910 for ( ; ; ) {
911 struct list_head *lh;
912
913 if (se->start_page <= offset &&
914 offset < (se->start_page + se->nr_pages)) {
915 return se->start_block + (offset - se->start_page);
916 }
917 lh = se->list.prev;
918 if (lh == &sis->extent_list)
919 lh = lh->prev;
920 se = list_entry(lh, struct swap_extent, list);
921 sis->curr_swap_extent = se;
922 BUG_ON(se == start_se);
923 }
924}
925
926
927
928
929static void destroy_swap_extents(struct swap_info_struct *sis)
930{
931 while (!list_empty(&sis->extent_list)) {
932 struct swap_extent *se;
933
934 se = list_entry(sis->extent_list.next,
935 struct swap_extent, list);
936 list_del(&se->list);
937 kfree(se);
938 }
939 sis->nr_extents = 0;
940}
941
942
943
944
945
946
947
948
949static int
950add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
951 unsigned long nr_pages, sector_t start_block)
952{
953 struct swap_extent *se;
954 struct swap_extent *new_se;
955 struct list_head *lh;
956
957 lh = sis->extent_list.next;
958 while (lh != &sis->extent_list) {
959 se = list_entry(lh, struct swap_extent, list);
960 if (se->start_block + se->nr_pages == start_block &&
961 se->start_page + se->nr_pages == start_page) {
962
963 se->nr_pages += nr_pages;
964 return 0;
965 }
966 lh = lh->next;
967 }
968
969
970
971
972 new_se = kmalloc(sizeof(*se), GFP_KERNEL);
973 if (new_se == NULL)
974 return -ENOMEM;
975 new_se->start_page = start_page;
976 new_se->nr_pages = nr_pages;
977 new_se->start_block = start_block;
978
979 lh = sis->extent_list.prev;
980 while (lh != &sis->extent_list) {
981 se = list_entry(lh, struct swap_extent, list);
982 if (se->start_block > start_block)
983 break;
984 lh = lh->prev;
985 }
986 list_add_tail(&new_se->list, lh);
987 sis->nr_extents++;
988 return 0;
989}
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022static int setup_swap_extents(struct swap_info_struct *sis)
1023{
1024 struct inode *inode;
1025 unsigned blocks_per_page;
1026 unsigned long page_no;
1027 unsigned blkbits;
1028 sector_t probe_block;
1029 sector_t last_block;
1030 int ret;
1031
1032 inode = sis->swap_file->f_mapping->host;
1033 if (S_ISBLK(inode->i_mode)) {
1034 ret = add_swap_extent(sis, 0, sis->max, 0);
1035 goto done;
1036 }
1037
1038 blkbits = inode->i_blkbits;
1039 blocks_per_page = PAGE_SIZE >> blkbits;
1040
1041
1042
1043
1044
1045 probe_block = 0;
1046 page_no = 0;
1047 last_block = i_size_read(inode) >> blkbits;
1048 while ((probe_block + blocks_per_page) <= last_block &&
1049 page_no < sis->max) {
1050 unsigned block_in_page;
1051 sector_t first_block;
1052
1053 first_block = bmap(inode, probe_block);
1054 if (first_block == 0)
1055 goto bad_bmap;
1056
1057
1058
1059
1060 if (first_block & (blocks_per_page - 1)) {
1061 probe_block++;
1062 goto reprobe;
1063 }
1064
1065 for (block_in_page = 1; block_in_page < blocks_per_page;
1066 block_in_page++) {
1067 sector_t block;
1068
1069 block = bmap(inode, probe_block + block_in_page);
1070 if (block == 0)
1071 goto bad_bmap;
1072 if (block != first_block + block_in_page) {
1073
1074 probe_block++;
1075 goto reprobe;
1076 }
1077 }
1078
1079
1080
1081
1082 ret = add_swap_extent(sis, page_no, 1,
1083 first_block >> (PAGE_SHIFT - blkbits));
1084 if (ret)
1085 goto out;
1086 page_no++;
1087 probe_block += blocks_per_page;
1088reprobe:
1089 continue;
1090 }
1091 ret = 0;
1092 if (page_no == 0)
1093 ret = -EINVAL;
1094 sis->max = page_no;
1095 sis->highest_bit = page_no - 1;
1096done:
1097 sis->curr_swap_extent = list_entry(sis->extent_list.prev,
1098 struct swap_extent, list);
1099 goto out;
1100bad_bmap:
1101 printk(KERN_ERR "swapon: swapfile has holes\n");
1102 ret = -EINVAL;
1103out:
1104 return ret;
1105}
1106
1107#if 0
1108#include <linux/backing-dev.h>
1109int page_queue_congested(struct page *page)
1110{
1111 struct backing_dev_info *bdi;
1112
1113 BUG_ON(!PageLocked(page));
1114
1115 if (PageSwapCache(page)) {
1116 swp_entry_t entry = { .val = page->private };
1117 struct swap_info_struct *sis;
1118
1119 sis = get_swap_info_struct(swp_type(entry));
1120 bdi = sis->bdev->bd_inode->i_mapping->backing_dev_info;
1121 } else
1122 bdi = page->mapping->backing_dev_info;
1123 return bdi_write_congested(bdi);
1124}
1125#endif
1126
1127asmlinkage long sys_swapoff(const char __user * specialfile)
1128{
1129 struct swap_info_struct * p = NULL;
1130 unsigned short *swap_map;
1131 struct file *swap_file, *victim;
1132 struct address_space *mapping;
1133 struct inode *inode;
1134 char * pathname;
1135 int i, type, prev;
1136 int err;
1137
1138 if (!capable(CAP_SYS_ADMIN))
1139 return -EPERM;
1140
1141 pathname = getname(specialfile);
1142 err = PTR_ERR(pathname);
1143 if (IS_ERR(pathname))
1144 goto out;
1145
1146 victim = filp_open(pathname, O_RDWR|O_LARGEFILE, 0);
1147 putname(pathname);
1148 err = PTR_ERR(victim);
1149 if (IS_ERR(victim))
1150 goto out;
1151
1152 mapping = victim->f_mapping;
1153 prev = -1;
1154 swap_list_lock();
1155 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1156 p = swap_info + type;
1157 if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) {
1158 if (p->swap_file->f_mapping == mapping)
1159 break;
1160 }
1161 prev = type;
1162 }
1163 if (type < 0) {
1164 err = -EINVAL;
1165 swap_list_unlock();
1166 goto out_dput;
1167 }
1168 if (!security_vm_enough_memory(p->pages))
1169 vm_unacct_memory(p->pages);
1170 else {
1171 err = -ENOMEM;
1172 swap_list_unlock();
1173 goto out_dput;
1174 }
1175 if (prev < 0) {
1176 swap_list.head = p->next;
1177 } else {
1178 swap_info[prev].next = p->next;
1179 }
1180 if (type == swap_list.next) {
1181
1182 swap_list.next = swap_list.head;
1183 }
1184 nr_swap_pages -= p->pages;
1185 total_swap_pages -= p->pages;
1186 p->flags &= ~SWP_WRITEOK;
1187 swap_list_unlock();
1188 current->flags |= PF_SWAPOFF;
1189 err = try_to_unuse(type);
1190 current->flags &= ~PF_SWAPOFF;
1191
1192
1193 down_write(&swap_unplug_sem);
1194 up_write(&swap_unplug_sem);
1195
1196 if (err) {
1197
1198 swap_list_lock();
1199 for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next)
1200 if (p->prio >= swap_info[i].prio)
1201 break;
1202 p->next = i;
1203 if (prev < 0)
1204 swap_list.head = swap_list.next = p - swap_info;
1205 else
1206 swap_info[prev].next = p - swap_info;
1207 nr_swap_pages += p->pages;
1208 total_swap_pages += p->pages;
1209 p->flags |= SWP_WRITEOK;
1210 swap_list_unlock();
1211 goto out_dput;
1212 }
1213 down(&swapon_sem);
1214 swap_list_lock();
1215 drain_mmlist();
1216 swap_device_lock(p);
1217 swap_file = p->swap_file;
1218 p->swap_file = NULL;
1219 p->max = 0;
1220 swap_map = p->swap_map;
1221 p->swap_map = NULL;
1222 p->flags = 0;
1223 destroy_swap_extents(p);
1224 swap_device_unlock(p);
1225 swap_list_unlock();
1226 up(&swapon_sem);
1227 vfree(swap_map);
1228 inode = mapping->host;
1229 if (S_ISBLK(inode->i_mode)) {
1230 struct block_device *bdev = I_BDEV(inode);
1231 set_blocksize(bdev, p->old_block_size);
1232 bd_release(bdev);
1233 } else {
1234 down(&inode->i_sem);
1235 inode->i_flags &= ~S_SWAPFILE;
1236 up(&inode->i_sem);
1237 }
1238 filp_close(swap_file, NULL);
1239 err = 0;
1240
1241out_dput:
1242 filp_close(victim, NULL);
1243out:
1244 return err;
1245}
1246
1247#ifdef CONFIG_PROC_FS
1248
1249static void *swap_start(struct seq_file *swap, loff_t *pos)
1250{
1251 struct swap_info_struct *ptr = swap_info;
1252 int i;
1253 loff_t l = *pos;
1254
1255 down(&swapon_sem);
1256
1257 for (i = 0; i < nr_swapfiles; i++, ptr++) {
1258 if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
1259 continue;
1260 if (!l--)
1261 return ptr;
1262 }
1263
1264 return NULL;
1265}
1266
1267static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
1268{
1269 struct swap_info_struct *ptr = v;
1270 struct swap_info_struct *endptr = swap_info + nr_swapfiles;
1271
1272 for (++ptr; ptr < endptr; ptr++) {
1273 if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
1274 continue;
1275 ++*pos;
1276 return ptr;
1277 }
1278
1279 return NULL;
1280}
1281
1282static void swap_stop(struct seq_file *swap, void *v)
1283{
1284 up(&swapon_sem);
1285}
1286
1287static int swap_show(struct seq_file *swap, void *v)
1288{
1289 struct swap_info_struct *ptr = v;
1290 struct file *file;
1291 int len;
1292
1293 if (v == swap_info)
1294 seq_puts(swap, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
1295
1296 file = ptr->swap_file;
1297 len = seq_path(swap, file->f_vfsmnt, file->f_dentry, " \t\n\\");
1298 seq_printf(swap, "%*s%s\t%d\t%ld\t%d\n",
1299 len < 40 ? 40 - len : 1, " ",
1300 S_ISBLK(file->f_dentry->d_inode->i_mode) ?
1301 "partition" : "file\t",
1302 ptr->pages << (PAGE_SHIFT - 10),
1303 ptr->inuse_pages << (PAGE_SHIFT - 10),
1304 ptr->prio);
1305 return 0;
1306}
1307
1308static struct seq_operations swaps_op = {
1309 .start = swap_start,
1310 .next = swap_next,
1311 .stop = swap_stop,
1312 .show = swap_show
1313};
1314
1315static int swaps_open(struct inode *inode, struct file *file)
1316{
1317 return seq_open(file, &swaps_op);
1318}
1319
1320static struct file_operations proc_swaps_operations = {
1321 .open = swaps_open,
1322 .read = seq_read,
1323 .llseek = seq_lseek,
1324 .release = seq_release,
1325};
1326
1327static int __init procswaps_init(void)
1328{
1329 struct proc_dir_entry *entry;
1330
1331 entry = create_proc_entry("swaps", 0, NULL);
1332 if (entry)
1333 entry->proc_fops = &proc_swaps_operations;
1334 return 0;
1335}
1336__initcall(procswaps_init);
1337#endif
1338
1339
1340
1341
1342
1343
1344asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1345{
1346 struct swap_info_struct * p;
1347 char *name = NULL;
1348 struct block_device *bdev = NULL;
1349 struct file *swap_file = NULL;
1350 struct address_space *mapping;
1351 unsigned int type;
1352 int i, prev;
1353 int error;
1354 static int least_priority;
1355 union swap_header *swap_header = NULL;
1356 int swap_header_version;
1357 int nr_good_pages = 0;
1358 unsigned long maxpages = 1;
1359 int swapfilesize;
1360 unsigned short *swap_map;
1361 struct page *page = NULL;
1362 struct inode *inode = NULL;
1363 int did_down = 0;
1364
1365 if (!capable(CAP_SYS_ADMIN))
1366 return -EPERM;
1367 swap_list_lock();
1368 p = swap_info;
1369 for (type = 0 ; type < nr_swapfiles ; type++,p++)
1370 if (!(p->flags & SWP_USED))
1371 break;
1372 error = -EPERM;
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385 if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) {
1386 swap_list_unlock();
1387 goto out;
1388 }
1389 if (type >= nr_swapfiles)
1390 nr_swapfiles = type+1;
1391 INIT_LIST_HEAD(&p->extent_list);
1392 p->flags = SWP_USED;
1393 p->nr_extents = 0;
1394 p->swap_file = NULL;
1395 p->old_block_size = 0;
1396 p->swap_map = NULL;
1397 p->lowest_bit = 0;
1398 p->highest_bit = 0;
1399 p->cluster_nr = 0;
1400 p->inuse_pages = 0;
1401 spin_lock_init(&p->sdev_lock);
1402 p->next = -1;
1403 if (swap_flags & SWAP_FLAG_PREFER) {
1404 p->prio =
1405 (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
1406 } else {
1407 p->prio = --least_priority;
1408 }
1409 swap_list_unlock();
1410 name = getname(specialfile);
1411 error = PTR_ERR(name);
1412 if (IS_ERR(name)) {
1413 name = NULL;
1414 goto bad_swap_2;
1415 }
1416 swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
1417 error = PTR_ERR(swap_file);
1418 if (IS_ERR(swap_file)) {
1419 swap_file = NULL;
1420 goto bad_swap_2;
1421 }
1422
1423 p->swap_file = swap_file;
1424 mapping = swap_file->f_mapping;
1425 inode = mapping->host;
1426
1427 error = -EBUSY;
1428 for (i = 0; i < nr_swapfiles; i++) {
1429 struct swap_info_struct *q = &swap_info[i];
1430
1431 if (i == type || !q->swap_file)
1432 continue;
1433 if (mapping == q->swap_file->f_mapping)
1434 goto bad_swap;
1435 }
1436
1437 error = -EINVAL;
1438 if (S_ISBLK(inode->i_mode)) {
1439 bdev = I_BDEV(inode);
1440 error = bd_claim(bdev, sys_swapon);
1441 if (error < 0) {
1442 bdev = NULL;
1443 goto bad_swap;
1444 }
1445 p->old_block_size = block_size(bdev);
1446 error = set_blocksize(bdev, PAGE_SIZE);
1447 if (error < 0)
1448 goto bad_swap;
1449 p->bdev = bdev;
1450 } else if (S_ISREG(inode->i_mode)) {
1451 p->bdev = inode->i_sb->s_bdev;
1452 down(&inode->i_sem);
1453 did_down = 1;
1454 if (IS_SWAPFILE(inode)) {
1455 error = -EBUSY;
1456 goto bad_swap;
1457 }
1458 } else {
1459 goto bad_swap;
1460 }
1461
1462 swapfilesize = i_size_read(inode) >> PAGE_SHIFT;
1463
1464
1465
1466
1467 if (!mapping->a_ops->readpage) {
1468 error = -EINVAL;
1469 goto bad_swap;
1470 }
1471 page = read_cache_page(mapping, 0,
1472 (filler_t *)mapping->a_ops->readpage, swap_file);
1473 if (IS_ERR(page)) {
1474 error = PTR_ERR(page);
1475 goto bad_swap;
1476 }
1477 wait_on_page_locked(page);
1478 if (!PageUptodate(page))
1479 goto bad_swap;
1480 kmap(page);
1481 swap_header = page_address(page);
1482
1483 if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
1484 swap_header_version = 1;
1485 else if (!memcmp("SWAPSPACE2",swap_header->magic.magic,10))
1486 swap_header_version = 2;
1487 else {
1488 printk("Unable to find swap-space signature\n");
1489 error = -EINVAL;
1490 goto bad_swap;
1491 }
1492
1493 switch (swap_header_version) {
1494 case 1:
1495 printk(KERN_ERR "version 0 swap is no longer supported. "
1496 "Use mkswap -v1 %s\n", name);
1497 error = -EINVAL;
1498 goto bad_swap;
1499 case 2:
1500
1501
1502 if (swap_header->info.version != 1) {
1503 printk(KERN_WARNING
1504 "Unable to handle swap header version %d\n",
1505 swap_header->info.version);
1506 error = -EINVAL;
1507 goto bad_swap;
1508 }
1509
1510 p->lowest_bit = 1;
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525 maxpages = swp_offset(pte_to_swp_entry(swp_entry_to_pte(swp_entry(0,~0UL)))) - 1;
1526 if (maxpages > swap_header->info.last_page)
1527 maxpages = swap_header->info.last_page;
1528 p->highest_bit = maxpages - 1;
1529
1530 error = -EINVAL;
1531 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
1532 goto bad_swap;
1533
1534
1535 if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) {
1536 error = -ENOMEM;
1537 goto bad_swap;
1538 }
1539
1540 error = 0;
1541 memset(p->swap_map, 0, maxpages * sizeof(short));
1542 for (i=0; i<swap_header->info.nr_badpages; i++) {
1543 int page = swap_header->info.badpages[i];
1544 if (page <= 0 || page >= swap_header->info.last_page)
1545 error = -EINVAL;
1546 else
1547 p->swap_map[page] = SWAP_MAP_BAD;
1548 }
1549 nr_good_pages = swap_header->info.last_page -
1550 swap_header->info.nr_badpages -
1551 1 ;
1552 if (error)
1553 goto bad_swap;
1554 }
1555
1556 if (swapfilesize && maxpages > swapfilesize) {
1557 printk(KERN_WARNING
1558 "Swap area shorter than signature indicates\n");
1559 error = -EINVAL;
1560 goto bad_swap;
1561 }
1562 if (!nr_good_pages) {
1563 printk(KERN_WARNING "Empty swap-file\n");
1564 error = -EINVAL;
1565 goto bad_swap;
1566 }
1567 p->swap_map[0] = SWAP_MAP_BAD;
1568 p->max = maxpages;
1569 p->pages = nr_good_pages;
1570
1571 error = setup_swap_extents(p);
1572 if (error)
1573 goto bad_swap;
1574
1575 down(&swapon_sem);
1576 swap_list_lock();
1577 swap_device_lock(p);
1578 p->flags = SWP_ACTIVE;
1579 nr_swap_pages += nr_good_pages;
1580 total_swap_pages += nr_good_pages;
1581 printk(KERN_INFO "Adding %dk swap on %s. Priority:%d extents:%d\n",
1582 nr_good_pages<<(PAGE_SHIFT-10), name,
1583 p->prio, p->nr_extents);
1584
1585
1586 prev = -1;
1587 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1588 if (p->prio >= swap_info[i].prio) {
1589 break;
1590 }
1591 prev = i;
1592 }
1593 p->next = i;
1594 if (prev < 0) {
1595 swap_list.head = swap_list.next = p - swap_info;
1596 } else {
1597 swap_info[prev].next = p - swap_info;
1598 }
1599 swap_device_unlock(p);
1600 swap_list_unlock();
1601 up(&swapon_sem);
1602 error = 0;
1603 goto out;
1604bad_swap:
1605 if (bdev) {
1606 set_blocksize(bdev, p->old_block_size);
1607 bd_release(bdev);
1608 }
1609bad_swap_2:
1610 swap_list_lock();
1611 swap_map = p->swap_map;
1612 p->swap_file = NULL;
1613 p->swap_map = NULL;
1614 p->flags = 0;
1615 if (!(swap_flags & SWAP_FLAG_PREFER))
1616 ++least_priority;
1617 swap_list_unlock();
1618 destroy_swap_extents(p);
1619 vfree(swap_map);
1620 if (swap_file)
1621 filp_close(swap_file, NULL);
1622out:
1623 if (page && !IS_ERR(page)) {
1624 kunmap(page);
1625 page_cache_release(page);
1626 }
1627 if (name)
1628 putname(name);
1629 if (did_down) {
1630 if (!error)
1631 inode->i_flags |= S_SWAPFILE;
1632 up(&inode->i_sem);
1633 }
1634 return error;
1635}
1636
1637void si_swapinfo(struct sysinfo *val)
1638{
1639 unsigned int i;
1640 unsigned long nr_to_be_unused = 0;
1641
1642 swap_list_lock();
1643 for (i = 0; i < nr_swapfiles; i++) {
1644 if (!(swap_info[i].flags & SWP_USED) ||
1645 (swap_info[i].flags & SWP_WRITEOK))
1646 continue;
1647 nr_to_be_unused += swap_info[i].inuse_pages;
1648 }
1649 val->freeswap = nr_swap_pages + nr_to_be_unused;
1650 val->totalswap = total_swap_pages + nr_to_be_unused;
1651 swap_list_unlock();
1652}
1653
1654
1655
1656
1657
1658
1659
1660int swap_duplicate(swp_entry_t entry)
1661{
1662 struct swap_info_struct * p;
1663 unsigned long offset, type;
1664 int result = 0;
1665
1666 type = swp_type(entry);
1667 if (type >= nr_swapfiles)
1668 goto bad_file;
1669 p = type + swap_info;
1670 offset = swp_offset(entry);
1671
1672 swap_device_lock(p);
1673 if (offset < p->max && p->swap_map[offset]) {
1674 if (p->swap_map[offset] < SWAP_MAP_MAX - 1) {
1675 p->swap_map[offset]++;
1676 result = 1;
1677 } else if (p->swap_map[offset] <= SWAP_MAP_MAX) {
1678 if (swap_overflow++ < 5)
1679 printk(KERN_WARNING "swap_dup: swap entry overflow\n");
1680 p->swap_map[offset] = SWAP_MAP_MAX;
1681 result = 1;
1682 }
1683 }
1684 swap_device_unlock(p);
1685out:
1686 return result;
1687
1688bad_file:
1689 printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
1690 goto out;
1691}
1692
1693struct swap_info_struct *
1694get_swap_info_struct(unsigned type)
1695{
1696 return &swap_info[type];
1697}
1698
1699
1700
1701
1702
1703int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
1704{
1705 int ret = 0, i = 1 << page_cluster;
1706 unsigned long toff;
1707 struct swap_info_struct *swapdev = swp_type(entry) + swap_info;
1708
1709 if (!page_cluster)
1710 return 0;
1711 toff = (swp_offset(entry) >> page_cluster) << page_cluster;
1712 if (!toff)
1713 toff++, i--;
1714 *offset = toff;
1715
1716 swap_device_lock(swapdev);
1717 do {
1718
1719 if (toff >= swapdev->max)
1720 break;
1721
1722 if (!swapdev->swap_map[toff])
1723 break;
1724 if (swapdev->swap_map[toff] == SWAP_MAP_BAD)
1725 break;
1726 toff++;
1727 ret++;
1728 } while (--i);
1729 swap_device_unlock(swapdev);
1730 return ret;
1731}
1732