1
2
3
4
5
6
7
8#include <linux/mm.h>
9#include <linux/hugetlb.h>
10#include <linux/mman.h>
11#include <linux/slab.h>
12#include <linux/kernel_stat.h>
13#include <linux/swap.h>
14#include <linux/vmalloc.h>
15#include <linux/pagemap.h>
16#include <linux/namei.h>
17#include <linux/shm.h>
18#include <linux/blkdev.h>
19#include <linux/writeback.h>
20#include <linux/proc_fs.h>
21#include <linux/seq_file.h>
22#include <linux/init.h>
23#include <linux/module.h>
24#include <linux/rmap.h>
25#include <linux/security.h>
26#include <linux/backing-dev.h>
27#include <linux/mutex.h>
28#include <linux/capability.h>
29#include <linux/syscalls.h>
30
31#include <asm/pgtable.h>
32#include <asm/tlbflush.h>
33#include <linux/swapops.h>
34
35DEFINE_SPINLOCK(swap_lock);
36unsigned int nr_swapfiles;
37long total_swap_pages;
38static int swap_overflow;
39
40static const char Bad_file[] = "Bad swap file entry ";
41static const char Unused_file[] = "Unused swap file entry ";
42static const char Bad_offset[] = "Bad swap offset entry ";
43static const char Unused_offset[] = "Unused swap offset entry ";
44
45struct swap_list_t swap_list = {-1, -1};
46
47static struct swap_info_struct swap_info[MAX_SWAPFILES];
48
49static DEFINE_MUTEX(swapon_mutex);
50
51
52
53
54
55
56static DECLARE_RWSEM(swap_unplug_sem);
57
58void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
59{
60 swp_entry_t entry;
61
62 down_read(&swap_unplug_sem);
63 entry.val = page_private(page);
64 if (PageSwapCache(page)) {
65 struct block_device *bdev = swap_info[swp_type(entry)].bdev;
66 struct backing_dev_info *bdi;
67
68
69
70
71
72
73
74
75
76 WARN_ON(page_count(page) <= 1);
77
78 bdi = bdev->bd_inode->i_mapping->backing_dev_info;
79 blk_run_backing_dev(bdi, page);
80 }
81 up_read(&swap_unplug_sem);
82}
83
84#define SWAPFILE_CLUSTER 256
85#define LATENCY_LIMIT 256
86
87static inline unsigned long scan_swap_map(struct swap_info_struct *si)
88{
89 unsigned long offset, last_in_cluster;
90 int latency_ration = LATENCY_LIMIT;
91
92
93
94
95
96
97
98
99
100
101
102 si->flags += SWP_SCANNING;
103 if (unlikely(!si->cluster_nr)) {
104 si->cluster_nr = SWAPFILE_CLUSTER - 1;
105 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER)
106 goto lowest;
107 spin_unlock(&swap_lock);
108
109 offset = si->lowest_bit;
110 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
111
112
113 for (; last_in_cluster <= si->highest_bit; offset++) {
114 if (si->swap_map[offset])
115 last_in_cluster = offset + SWAPFILE_CLUSTER;
116 else if (offset == last_in_cluster) {
117 spin_lock(&swap_lock);
118 si->cluster_next = offset-SWAPFILE_CLUSTER+1;
119 goto cluster;
120 }
121 if (unlikely(--latency_ration < 0)) {
122 cond_resched();
123 latency_ration = LATENCY_LIMIT;
124 }
125 }
126 spin_lock(&swap_lock);
127 goto lowest;
128 }
129
130 si->cluster_nr--;
131cluster:
132 offset = si->cluster_next;
133 if (offset > si->highest_bit)
134lowest: offset = si->lowest_bit;
135checks: if (!(si->flags & SWP_WRITEOK))
136 goto no_page;
137 if (!si->highest_bit)
138 goto no_page;
139 if (!si->swap_map[offset]) {
140 if (offset == si->lowest_bit)
141 si->lowest_bit++;
142 if (offset == si->highest_bit)
143 si->highest_bit--;
144 si->inuse_pages++;
145 if (si->inuse_pages == si->pages) {
146 si->lowest_bit = si->max;
147 si->highest_bit = 0;
148 }
149 si->swap_map[offset] = 1;
150 si->cluster_next = offset + 1;
151 si->flags -= SWP_SCANNING;
152 return offset;
153 }
154
155 spin_unlock(&swap_lock);
156 while (++offset <= si->highest_bit) {
157 if (!si->swap_map[offset]) {
158 spin_lock(&swap_lock);
159 goto checks;
160 }
161 if (unlikely(--latency_ration < 0)) {
162 cond_resched();
163 latency_ration = LATENCY_LIMIT;
164 }
165 }
166 spin_lock(&swap_lock);
167 goto lowest;
168
169no_page:
170 si->flags -= SWP_SCANNING;
171 return 0;
172}
173
174swp_entry_t get_swap_page(void)
175{
176 struct swap_info_struct *si;
177 pgoff_t offset;
178 int type, next;
179 int wrapped = 0;
180
181 spin_lock(&swap_lock);
182 if (nr_swap_pages <= 0)
183 goto noswap;
184 nr_swap_pages--;
185
186 for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
187 si = swap_info + type;
188 next = si->next;
189 if (next < 0 ||
190 (!wrapped && si->prio != swap_info[next].prio)) {
191 next = swap_list.head;
192 wrapped++;
193 }
194
195 if (!si->highest_bit)
196 continue;
197 if (!(si->flags & SWP_WRITEOK))
198 continue;
199
200 swap_list.next = next;
201 offset = scan_swap_map(si);
202 if (offset) {
203 spin_unlock(&swap_lock);
204 return swp_entry(type, offset);
205 }
206 next = swap_list.next;
207 }
208
209 nr_swap_pages++;
210noswap:
211 spin_unlock(&swap_lock);
212 return (swp_entry_t) {0};
213}
214
215swp_entry_t get_swap_page_of_type(int type)
216{
217 struct swap_info_struct *si;
218 pgoff_t offset;
219
220 spin_lock(&swap_lock);
221 si = swap_info + type;
222 if (si->flags & SWP_WRITEOK) {
223 nr_swap_pages--;
224 offset = scan_swap_map(si);
225 if (offset) {
226 spin_unlock(&swap_lock);
227 return swp_entry(type, offset);
228 }
229 nr_swap_pages++;
230 }
231 spin_unlock(&swap_lock);
232 return (swp_entry_t) {0};
233}
234
235static struct swap_info_struct * swap_info_get(swp_entry_t entry)
236{
237 struct swap_info_struct * p;
238 unsigned long offset, type;
239
240 if (!entry.val)
241 goto out;
242 type = swp_type(entry);
243 if (type >= nr_swapfiles)
244 goto bad_nofile;
245 p = & swap_info[type];
246 if (!(p->flags & SWP_USED))
247 goto bad_device;
248 offset = swp_offset(entry);
249 if (offset >= p->max)
250 goto bad_offset;
251 if (!p->swap_map[offset])
252 goto bad_free;
253 spin_lock(&swap_lock);
254 return p;
255
256bad_free:
257 printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);
258 goto out;
259bad_offset:
260 printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val);
261 goto out;
262bad_device:
263 printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val);
264 goto out;
265bad_nofile:
266 printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);
267out:
268 return NULL;
269}
270
271static int swap_entry_free(struct swap_info_struct *p, unsigned long offset)
272{
273 int count = p->swap_map[offset];
274
275 if (count < SWAP_MAP_MAX) {
276 count--;
277 p->swap_map[offset] = count;
278 if (!count) {
279 if (offset < p->lowest_bit)
280 p->lowest_bit = offset;
281 if (offset > p->highest_bit)
282 p->highest_bit = offset;
283 if (p->prio > swap_info[swap_list.next].prio)
284 swap_list.next = p - swap_info;
285 nr_swap_pages++;
286 p->inuse_pages--;
287 }
288 }
289 return count;
290}
291
292
293
294
295
296void swap_free(swp_entry_t entry)
297{
298 struct swap_info_struct * p;
299
300 p = swap_info_get(entry);
301 if (p) {
302 swap_entry_free(p, swp_offset(entry));
303 spin_unlock(&swap_lock);
304 }
305}
306
307
308
309
310static inline int page_swapcount(struct page *page)
311{
312 int count = 0;
313 struct swap_info_struct *p;
314 swp_entry_t entry;
315
316 entry.val = page_private(page);
317 p = swap_info_get(entry);
318 if (p) {
319
320 count = p->swap_map[swp_offset(entry)] - 1;
321 spin_unlock(&swap_lock);
322 }
323 return count;
324}
325
326
327
328
329
330int can_share_swap_page(struct page *page)
331{
332 int count;
333
334 BUG_ON(!PageLocked(page));
335 count = page_mapcount(page);
336 if (count <= 1 && PageSwapCache(page))
337 count += page_swapcount(page);
338 return count == 1;
339}
340
341
342
343
344
345int remove_exclusive_swap_page(struct page *page)
346{
347 int retval;
348 struct swap_info_struct * p;
349 swp_entry_t entry;
350
351 BUG_ON(PagePrivate(page));
352 BUG_ON(!PageLocked(page));
353
354 if (!PageSwapCache(page))
355 return 0;
356 if (PageWriteback(page))
357 return 0;
358 if (page_count(page) != 2)
359 return 0;
360
361 entry.val = page_private(page);
362 p = swap_info_get(entry);
363 if (!p)
364 return 0;
365
366
367 retval = 0;
368 if (p->swap_map[swp_offset(entry)] == 1) {
369
370 write_lock_irq(&swapper_space.tree_lock);
371 if ((page_count(page) == 2) && !PageWriteback(page)) {
372 __delete_from_swap_cache(page);
373 SetPageDirty(page);
374 retval = 1;
375 }
376 write_unlock_irq(&swapper_space.tree_lock);
377 }
378 spin_unlock(&swap_lock);
379
380 if (retval) {
381 swap_free(entry);
382 page_cache_release(page);
383 }
384
385 return retval;
386}
387
388
389
390
391
392void free_swap_and_cache(swp_entry_t entry)
393{
394 struct swap_info_struct * p;
395 struct page *page = NULL;
396
397 if (is_migration_entry(entry))
398 return;
399
400 p = swap_info_get(entry);
401 if (p) {
402 if (swap_entry_free(p, swp_offset(entry)) == 1) {
403 page = find_get_page(&swapper_space, entry.val);
404 if (page && unlikely(TestSetPageLocked(page))) {
405 page_cache_release(page);
406 page = NULL;
407 }
408 }
409 spin_unlock(&swap_lock);
410 }
411 if (page) {
412 int one_user;
413
414 BUG_ON(PagePrivate(page));
415 one_user = (page_count(page) == 2);
416
417
418 if (PageSwapCache(page) && !PageWriteback(page) &&
419 (one_user || vm_swap_full())) {
420 delete_from_swap_cache(page);
421 SetPageDirty(page);
422 }
423 unlock_page(page);
424 page_cache_release(page);
425 }
426}
427
428#ifdef CONFIG_SOFTWARE_SUSPEND
429
430
431
432
433
434
435
436
437int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
438{
439 struct block_device *bdev = NULL;
440 int i;
441
442 if (device)
443 bdev = bdget(device);
444
445 spin_lock(&swap_lock);
446 for (i = 0; i < nr_swapfiles; i++) {
447 struct swap_info_struct *sis = swap_info + i;
448
449 if (!(sis->flags & SWP_WRITEOK))
450 continue;
451
452 if (!bdev) {
453 if (bdev_p)
454 *bdev_p = sis->bdev;
455
456 spin_unlock(&swap_lock);
457 return i;
458 }
459 if (bdev == sis->bdev) {
460 struct swap_extent *se;
461
462 se = list_entry(sis->extent_list.next,
463 struct swap_extent, list);
464 if (se->start_block == offset) {
465 if (bdev_p)
466 *bdev_p = sis->bdev;
467
468 spin_unlock(&swap_lock);
469 bdput(bdev);
470 return i;
471 }
472 }
473 }
474 spin_unlock(&swap_lock);
475 if (bdev)
476 bdput(bdev);
477
478 return -ENODEV;
479}
480
481
482
483
484
485
486
487unsigned int count_swap_pages(int type, int free)
488{
489 unsigned int n = 0;
490
491 if (type < nr_swapfiles) {
492 spin_lock(&swap_lock);
493 if (swap_info[type].flags & SWP_WRITEOK) {
494 n = swap_info[type].pages;
495 if (free)
496 n -= swap_info[type].inuse_pages;
497 }
498 spin_unlock(&swap_lock);
499 }
500 return n;
501}
502#endif
503
504
505
506
507
508
509static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
510 unsigned long addr, swp_entry_t entry, struct page *page)
511{
512 inc_mm_counter(vma->vm_mm, anon_rss);
513 get_page(page);
514 set_pte_at(vma->vm_mm, addr, pte,
515 pte_mkold(mk_pte(page, vma->vm_page_prot)));
516 page_add_anon_rmap(page, vma, addr);
517 swap_free(entry);
518
519
520
521
522 activate_page(page);
523}
524
525static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
526 unsigned long addr, unsigned long end,
527 swp_entry_t entry, struct page *page)
528{
529 pte_t swp_pte = swp_entry_to_pte(entry);
530 pte_t *pte;
531 spinlock_t *ptl;
532 int found = 0;
533
534 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
535 do {
536
537
538
539
540 if (unlikely(pte_same(*pte, swp_pte))) {
541 unuse_pte(vma, pte++, addr, entry, page);
542 found = 1;
543 break;
544 }
545 } while (pte++, addr += PAGE_SIZE, addr != end);
546 pte_unmap_unlock(pte - 1, ptl);
547 return found;
548}
549
550static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
551 unsigned long addr, unsigned long end,
552 swp_entry_t entry, struct page *page)
553{
554 pmd_t *pmd;
555 unsigned long next;
556
557 pmd = pmd_offset(pud, addr);
558 do {
559 next = pmd_addr_end(addr, end);
560 if (pmd_none_or_clear_bad(pmd))
561 continue;
562 if (unuse_pte_range(vma, pmd, addr, next, entry, page))
563 return 1;
564 } while (pmd++, addr = next, addr != end);
565 return 0;
566}
567
568static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
569 unsigned long addr, unsigned long end,
570 swp_entry_t entry, struct page *page)
571{
572 pud_t *pud;
573 unsigned long next;
574
575 pud = pud_offset(pgd, addr);
576 do {
577 next = pud_addr_end(addr, end);
578 if (pud_none_or_clear_bad(pud))
579 continue;
580 if (unuse_pmd_range(vma, pud, addr, next, entry, page))
581 return 1;
582 } while (pud++, addr = next, addr != end);
583 return 0;
584}
585
586static int unuse_vma(struct vm_area_struct *vma,
587 swp_entry_t entry, struct page *page)
588{
589 pgd_t *pgd;
590 unsigned long addr, end, next;
591
592 if (page->mapping) {
593 addr = page_address_in_vma(page, vma);
594 if (addr == -EFAULT)
595 return 0;
596 else
597 end = addr + PAGE_SIZE;
598 } else {
599 addr = vma->vm_start;
600 end = vma->vm_end;
601 }
602
603 pgd = pgd_offset(vma->vm_mm, addr);
604 do {
605 next = pgd_addr_end(addr, end);
606 if (pgd_none_or_clear_bad(pgd))
607 continue;
608 if (unuse_pud_range(vma, pgd, addr, next, entry, page))
609 return 1;
610 } while (pgd++, addr = next, addr != end);
611 return 0;
612}
613
614static int unuse_mm(struct mm_struct *mm,
615 swp_entry_t entry, struct page *page)
616{
617 struct vm_area_struct *vma;
618
619 if (!down_read_trylock(&mm->mmap_sem)) {
620
621
622
623
624 activate_page(page);
625 unlock_page(page);
626 down_read(&mm->mmap_sem);
627 lock_page(page);
628 }
629 for (vma = mm->mmap; vma; vma = vma->vm_next) {
630 if (vma->anon_vma && unuse_vma(vma, entry, page))
631 break;
632 }
633 up_read(&mm->mmap_sem);
634
635
636
637
638 return 0;
639}
640
641
642
643
644
645static unsigned int find_next_to_unuse(struct swap_info_struct *si,
646 unsigned int prev)
647{
648 unsigned int max = si->max;
649 unsigned int i = prev;
650 int count;
651
652
653
654
655
656
657
658 for (;;) {
659 if (++i >= max) {
660 if (!prev) {
661 i = 0;
662 break;
663 }
664
665
666
667
668 max = prev + 1;
669 prev = 0;
670 i = 1;
671 }
672 count = si->swap_map[i];
673 if (count && count != SWAP_MAP_BAD)
674 break;
675 }
676 return i;
677}
678
679
680
681
682
683
684static int try_to_unuse(unsigned int type)
685{
686 struct swap_info_struct * si = &swap_info[type];
687 struct mm_struct *start_mm;
688 unsigned short *swap_map;
689 unsigned short swcount;
690 struct page *page;
691 swp_entry_t entry;
692 unsigned int i = 0;
693 int retval = 0;
694 int reset_overflow = 0;
695 int shmem;
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712 start_mm = &init_mm;
713 atomic_inc(&init_mm.mm_users);
714
715
716
717
718
719
720 while ((i = find_next_to_unuse(si, i)) != 0) {
721 if (signal_pending(current)) {
722 retval = -EINTR;
723 break;
724 }
725
726
727
728
729
730
731 swap_map = &si->swap_map[i];
732 entry = swp_entry(type, i);
733 page = read_swap_cache_async(entry, NULL, 0);
734 if (!page) {
735
736
737
738
739
740
741 if (!*swap_map)
742 continue;
743 retval = -ENOMEM;
744 break;
745 }
746
747
748
749
750 if (atomic_read(&start_mm->mm_users) == 1) {
751 mmput(start_mm);
752 start_mm = &init_mm;
753 atomic_inc(&init_mm.mm_users);
754 }
755
756
757
758
759
760
761
762
763
764 wait_on_page_locked(page);
765 wait_on_page_writeback(page);
766 lock_page(page);
767 wait_on_page_writeback(page);
768
769
770
771
772
773
774 shmem = 0;
775 swcount = *swap_map;
776 if (swcount > 1) {
777 if (start_mm == &init_mm)
778 shmem = shmem_unuse(entry, page);
779 else
780 retval = unuse_mm(start_mm, entry, page);
781 }
782 if (*swap_map > 1) {
783 int set_start_mm = (*swap_map >= swcount);
784 struct list_head *p = &start_mm->mmlist;
785 struct mm_struct *new_start_mm = start_mm;
786 struct mm_struct *prev_mm = start_mm;
787 struct mm_struct *mm;
788
789 atomic_inc(&new_start_mm->mm_users);
790 atomic_inc(&prev_mm->mm_users);
791 spin_lock(&mmlist_lock);
792 while (*swap_map > 1 && !retval &&
793 (p = p->next) != &start_mm->mmlist) {
794 mm = list_entry(p, struct mm_struct, mmlist);
795 if (!atomic_inc_not_zero(&mm->mm_users))
796 continue;
797 spin_unlock(&mmlist_lock);
798 mmput(prev_mm);
799 prev_mm = mm;
800
801 cond_resched();
802
803 swcount = *swap_map;
804 if (swcount <= 1)
805 ;
806 else if (mm == &init_mm) {
807 set_start_mm = 1;
808 shmem = shmem_unuse(entry, page);
809 } else
810 retval = unuse_mm(mm, entry, page);
811 if (set_start_mm && *swap_map < swcount) {
812 mmput(new_start_mm);
813 atomic_inc(&mm->mm_users);
814 new_start_mm = mm;
815 set_start_mm = 0;
816 }
817 spin_lock(&mmlist_lock);
818 }
819 spin_unlock(&mmlist_lock);
820 mmput(prev_mm);
821 mmput(start_mm);
822 start_mm = new_start_mm;
823 }
824 if (retval) {
825 unlock_page(page);
826 page_cache_release(page);
827 break;
828 }
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843 if (*swap_map == SWAP_MAP_MAX) {
844 spin_lock(&swap_lock);
845 *swap_map = 1;
846 spin_unlock(&swap_lock);
847 reset_overflow = 1;
848 }
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869 if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
870 struct writeback_control wbc = {
871 .sync_mode = WB_SYNC_NONE,
872 };
873
874 swap_writepage(page, &wbc);
875 lock_page(page);
876 wait_on_page_writeback(page);
877 }
878 if (PageSwapCache(page)) {
879 if (shmem)
880 swap_duplicate(entry);
881 else
882 delete_from_swap_cache(page);
883 }
884
885
886
887
888
889
890 SetPageDirty(page);
891 unlock_page(page);
892 page_cache_release(page);
893
894
895
896
897
898 cond_resched();
899 }
900
901 mmput(start_mm);
902 if (reset_overflow) {
903 printk(KERN_WARNING "swapoff: cleared swap entry overflow\n");
904 swap_overflow = 0;
905 }
906 return retval;
907}
908
909
910
911
912
913
914
915static void drain_mmlist(void)
916{
917 struct list_head *p, *next;
918 unsigned int i;
919
920 for (i = 0; i < nr_swapfiles; i++)
921 if (swap_info[i].inuse_pages)
922 return;
923 spin_lock(&mmlist_lock);
924 list_for_each_safe(p, next, &init_mm.mmlist)
925 list_del_init(p);
926 spin_unlock(&mmlist_lock);
927}
928
929
930
931
932
933sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset)
934{
935 struct swap_extent *se = sis->curr_swap_extent;
936 struct swap_extent *start_se = se;
937
938 for ( ; ; ) {
939 struct list_head *lh;
940
941 if (se->start_page <= offset &&
942 offset < (se->start_page + se->nr_pages)) {
943 return se->start_block + (offset - se->start_page);
944 }
945 lh = se->list.next;
946 if (lh == &sis->extent_list)
947 lh = lh->next;
948 se = list_entry(lh, struct swap_extent, list);
949 sis->curr_swap_extent = se;
950 BUG_ON(se == start_se);
951 }
952}
953
954#ifdef CONFIG_SOFTWARE_SUSPEND
955
956
957
958
959sector_t swapdev_block(int swap_type, pgoff_t offset)
960{
961 struct swap_info_struct *sis;
962
963 if (swap_type >= nr_swapfiles)
964 return 0;
965
966 sis = swap_info + swap_type;
967 return (sis->flags & SWP_WRITEOK) ? map_swap_page(sis, offset) : 0;
968}
969#endif
970
971
972
973
974static void destroy_swap_extents(struct swap_info_struct *sis)
975{
976 while (!list_empty(&sis->extent_list)) {
977 struct swap_extent *se;
978
979 se = list_entry(sis->extent_list.next,
980 struct swap_extent, list);
981 list_del(&se->list);
982 kfree(se);
983 }
984}
985
986
987
988
989
990
991
992static int
993add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
994 unsigned long nr_pages, sector_t start_block)
995{
996 struct swap_extent *se;
997 struct swap_extent *new_se;
998 struct list_head *lh;
999
1000 lh = sis->extent_list.prev;
1001 if (lh != &sis->extent_list) {
1002 se = list_entry(lh, struct swap_extent, list);
1003 BUG_ON(se->start_page + se->nr_pages != start_page);
1004 if (se->start_block + se->nr_pages == start_block) {
1005
1006 se->nr_pages += nr_pages;
1007 return 0;
1008 }
1009 }
1010
1011
1012
1013
1014 new_se = kmalloc(sizeof(*se), GFP_KERNEL);
1015 if (new_se == NULL)
1016 return -ENOMEM;
1017 new_se->start_page = start_page;
1018 new_se->nr_pages = nr_pages;
1019 new_se->start_block = start_block;
1020
1021 list_add_tail(&new_se->list, &sis->extent_list);
1022 return 1;
1023}
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
1057{
1058 struct inode *inode;
1059 unsigned blocks_per_page;
1060 unsigned long page_no;
1061 unsigned blkbits;
1062 sector_t probe_block;
1063 sector_t last_block;
1064 sector_t lowest_block = -1;
1065 sector_t highest_block = 0;
1066 int nr_extents = 0;
1067 int ret;
1068
1069 inode = sis->swap_file->f_mapping->host;
1070 if (S_ISBLK(inode->i_mode)) {
1071 ret = add_swap_extent(sis, 0, sis->max, 0);
1072 *span = sis->pages;
1073 goto done;
1074 }
1075
1076 blkbits = inode->i_blkbits;
1077 blocks_per_page = PAGE_SIZE >> blkbits;
1078
1079
1080
1081
1082
1083 probe_block = 0;
1084 page_no = 0;
1085 last_block = i_size_read(inode) >> blkbits;
1086 while ((probe_block + blocks_per_page) <= last_block &&
1087 page_no < sis->max) {
1088 unsigned block_in_page;
1089 sector_t first_block;
1090
1091 first_block = bmap(inode, probe_block);
1092 if (first_block == 0)
1093 goto bad_bmap;
1094
1095
1096
1097
1098 if (first_block & (blocks_per_page - 1)) {
1099 probe_block++;
1100 goto reprobe;
1101 }
1102
1103 for (block_in_page = 1; block_in_page < blocks_per_page;
1104 block_in_page++) {
1105 sector_t block;
1106
1107 block = bmap(inode, probe_block + block_in_page);
1108 if (block == 0)
1109 goto bad_bmap;
1110 if (block != first_block + block_in_page) {
1111
1112 probe_block++;
1113 goto reprobe;
1114 }
1115 }
1116
1117 first_block >>= (PAGE_SHIFT - blkbits);
1118 if (page_no) {
1119 if (first_block < lowest_block)
1120 lowest_block = first_block;
1121 if (first_block > highest_block)
1122 highest_block = first_block;
1123 }
1124
1125
1126
1127
1128 ret = add_swap_extent(sis, page_no, 1, first_block);
1129 if (ret < 0)
1130 goto out;
1131 nr_extents += ret;
1132 page_no++;
1133 probe_block += blocks_per_page;
1134reprobe:
1135 continue;
1136 }
1137 ret = nr_extents;
1138 *span = 1 + highest_block - lowest_block;
1139 if (page_no == 0)
1140 page_no = 1;
1141 sis->max = page_no;
1142 sis->pages = page_no - 1;
1143 sis->highest_bit = page_no - 1;
1144done:
1145 sis->curr_swap_extent = list_entry(sis->extent_list.prev,
1146 struct swap_extent, list);
1147 goto out;
1148bad_bmap:
1149 printk(KERN_ERR "swapon: swapfile has holes\n");
1150 ret = -EINVAL;
1151out:
1152 return ret;
1153}
1154
1155#if 0
1156#include <linux/backing-dev.h>
1157int page_queue_congested(struct page *page)
1158{
1159 struct backing_dev_info *bdi;
1160
1161 BUG_ON(!PageLocked(page));
1162
1163 if (PageSwapCache(page)) {
1164 swp_entry_t entry = { .val = page_private(page) };
1165 struct swap_info_struct *sis;
1166
1167 sis = get_swap_info_struct(swp_type(entry));
1168 bdi = sis->bdev->bd_inode->i_mapping->backing_dev_info;
1169 } else
1170 bdi = page->mapping->backing_dev_info;
1171 return bdi_write_congested(bdi);
1172}
1173#endif
1174
1175asmlinkage long sys_swapoff(const char __user * specialfile)
1176{
1177 struct swap_info_struct * p = NULL;
1178 unsigned short *swap_map;
1179 struct file *swap_file, *victim;
1180 struct address_space *mapping;
1181 struct inode *inode;
1182 char * pathname;
1183 int i, type, prev;
1184 int err;
1185
1186 if (!capable(CAP_SYS_ADMIN))
1187 return -EPERM;
1188
1189 pathname = getname(specialfile);
1190 err = PTR_ERR(pathname);
1191 if (IS_ERR(pathname))
1192 goto out;
1193
1194 victim = filp_open(pathname, O_RDWR|O_LARGEFILE, 0);
1195 putname(pathname);
1196 err = PTR_ERR(victim);
1197 if (IS_ERR(victim))
1198 goto out;
1199
1200 mapping = victim->f_mapping;
1201 prev = -1;
1202 spin_lock(&swap_lock);
1203 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1204 p = swap_info + type;
1205 if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) {
1206 if (p->swap_file->f_mapping == mapping)
1207 break;
1208 }
1209 prev = type;
1210 }
1211 if (type < 0) {
1212 err = -EINVAL;
1213 spin_unlock(&swap_lock);
1214 goto out_dput;
1215 }
1216 if (!security_vm_enough_memory(p->pages))
1217 vm_unacct_memory(p->pages);
1218 else {
1219 err = -ENOMEM;
1220 spin_unlock(&swap_lock);
1221 goto out_dput;
1222 }
1223 if (prev < 0) {
1224 swap_list.head = p->next;
1225 } else {
1226 swap_info[prev].next = p->next;
1227 }
1228 if (type == swap_list.next) {
1229
1230 swap_list.next = swap_list.head;
1231 }
1232 nr_swap_pages -= p->pages;
1233 total_swap_pages -= p->pages;
1234 p->flags &= ~SWP_WRITEOK;
1235 spin_unlock(&swap_lock);
1236
1237 current->flags |= PF_SWAPOFF;
1238 err = try_to_unuse(type);
1239 current->flags &= ~PF_SWAPOFF;
1240
1241 if (err) {
1242
1243 spin_lock(&swap_lock);
1244 for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next)
1245 if (p->prio >= swap_info[i].prio)
1246 break;
1247 p->next = i;
1248 if (prev < 0)
1249 swap_list.head = swap_list.next = p - swap_info;
1250 else
1251 swap_info[prev].next = p - swap_info;
1252 nr_swap_pages += p->pages;
1253 total_swap_pages += p->pages;
1254 p->flags |= SWP_WRITEOK;
1255 spin_unlock(&swap_lock);
1256 goto out_dput;
1257 }
1258
1259
1260 down_write(&swap_unplug_sem);
1261 up_write(&swap_unplug_sem);
1262
1263 destroy_swap_extents(p);
1264 mutex_lock(&swapon_mutex);
1265 spin_lock(&swap_lock);
1266 drain_mmlist();
1267
1268
1269 p->highest_bit = 0;
1270 while (p->flags >= SWP_SCANNING) {
1271 spin_unlock(&swap_lock);
1272 schedule_timeout_uninterruptible(1);
1273 spin_lock(&swap_lock);
1274 }
1275
1276 swap_file = p->swap_file;
1277 p->swap_file = NULL;
1278 p->max = 0;
1279 swap_map = p->swap_map;
1280 p->swap_map = NULL;
1281 p->flags = 0;
1282 spin_unlock(&swap_lock);
1283 mutex_unlock(&swapon_mutex);
1284 vfree(swap_map);
1285 inode = mapping->host;
1286 if (S_ISBLK(inode->i_mode)) {
1287 struct block_device *bdev = I_BDEV(inode);
1288 set_blocksize(bdev, p->old_block_size);
1289 bd_release(bdev);
1290 } else {
1291 mutex_lock(&inode->i_mutex);
1292 inode->i_flags &= ~S_SWAPFILE;
1293 mutex_unlock(&inode->i_mutex);
1294 }
1295 filp_close(swap_file, NULL);
1296 err = 0;
1297
1298out_dput:
1299 filp_close(victim, NULL);
1300out:
1301 return err;
1302}
1303
1304#ifdef CONFIG_PROC_FS
1305
1306static void *swap_start(struct seq_file *swap, loff_t *pos)
1307{
1308 struct swap_info_struct *ptr = swap_info;
1309 int i;
1310 loff_t l = *pos;
1311
1312 mutex_lock(&swapon_mutex);
1313
1314 if (!l)
1315 return SEQ_START_TOKEN;
1316
1317 for (i = 0; i < nr_swapfiles; i++, ptr++) {
1318 if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
1319 continue;
1320 if (!--l)
1321 return ptr;
1322 }
1323
1324 return NULL;
1325}
1326
1327static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
1328{
1329 struct swap_info_struct *ptr;
1330 struct swap_info_struct *endptr = swap_info + nr_swapfiles;
1331
1332 if (v == SEQ_START_TOKEN)
1333 ptr = swap_info;
1334 else {
1335 ptr = v;
1336 ptr++;
1337 }
1338
1339 for (; ptr < endptr; ptr++) {
1340 if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
1341 continue;
1342 ++*pos;
1343 return ptr;
1344 }
1345
1346 return NULL;
1347}
1348
1349static void swap_stop(struct seq_file *swap, void *v)
1350{
1351 mutex_unlock(&swapon_mutex);
1352}
1353
1354static int swap_show(struct seq_file *swap, void *v)
1355{
1356 struct swap_info_struct *ptr = v;
1357 struct file *file;
1358 int len;
1359
1360 if (ptr == SEQ_START_TOKEN) {
1361 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
1362 return 0;
1363 }
1364
1365 file = ptr->swap_file;
1366 len = seq_path(swap, file->f_path.mnt, file->f_path.dentry, " \t\n\\");
1367 seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
1368 len < 40 ? 40 - len : 1, " ",
1369 S_ISBLK(file->f_path.dentry->d_inode->i_mode) ?
1370 "partition" : "file\t",
1371 ptr->pages << (PAGE_SHIFT - 10),
1372 ptr->inuse_pages << (PAGE_SHIFT - 10),
1373 ptr->prio);
1374 return 0;
1375}
1376
1377static const struct seq_operations swaps_op = {
1378 .start = swap_start,
1379 .next = swap_next,
1380 .stop = swap_stop,
1381 .show = swap_show
1382};
1383
1384static int swaps_open(struct inode *inode, struct file *file)
1385{
1386 return seq_open(file, &swaps_op);
1387}
1388
1389static const struct file_operations proc_swaps_operations = {
1390 .open = swaps_open,
1391 .read = seq_read,
1392 .llseek = seq_lseek,
1393 .release = seq_release,
1394};
1395
1396static int __init procswaps_init(void)
1397{
1398 struct proc_dir_entry *entry;
1399
1400 entry = create_proc_entry("swaps", 0, NULL);
1401 if (entry)
1402 entry->proc_fops = &proc_swaps_operations;
1403 return 0;
1404}
1405__initcall(procswaps_init);
1406#endif
1407
1408
1409
1410
1411
1412
1413asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1414{
1415 struct swap_info_struct * p;
1416 char *name = NULL;
1417 struct block_device *bdev = NULL;
1418 struct file *swap_file = NULL;
1419 struct address_space *mapping;
1420 unsigned int type;
1421 int i, prev;
1422 int error;
1423 static int least_priority;
1424 union swap_header *swap_header = NULL;
1425 int swap_header_version;
1426 unsigned int nr_good_pages = 0;
1427 int nr_extents = 0;
1428 sector_t span;
1429 unsigned long maxpages = 1;
1430 int swapfilesize;
1431 unsigned short *swap_map;
1432 struct page *page = NULL;
1433 struct inode *inode = NULL;
1434 int did_down = 0;
1435
1436 if (!capable(CAP_SYS_ADMIN))
1437 return -EPERM;
1438 spin_lock(&swap_lock);
1439 p = swap_info;
1440 for (type = 0 ; type < nr_swapfiles ; type++,p++)
1441 if (!(p->flags & SWP_USED))
1442 break;
1443 error = -EPERM;
1444 if (type >= MAX_SWAPFILES) {
1445 spin_unlock(&swap_lock);
1446 goto out;
1447 }
1448 if (type >= nr_swapfiles)
1449 nr_swapfiles = type+1;
1450 INIT_LIST_HEAD(&p->extent_list);
1451 p->flags = SWP_USED;
1452 p->swap_file = NULL;
1453 p->old_block_size = 0;
1454 p->swap_map = NULL;
1455 p->lowest_bit = 0;
1456 p->highest_bit = 0;
1457 p->cluster_nr = 0;
1458 p->inuse_pages = 0;
1459 p->next = -1;
1460 if (swap_flags & SWAP_FLAG_PREFER) {
1461 p->prio =
1462 (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
1463 } else {
1464 p->prio = --least_priority;
1465 }
1466 spin_unlock(&swap_lock);
1467 name = getname(specialfile);
1468 error = PTR_ERR(name);
1469 if (IS_ERR(name)) {
1470 name = NULL;
1471 goto bad_swap_2;
1472 }
1473 swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
1474 error = PTR_ERR(swap_file);
1475 if (IS_ERR(swap_file)) {
1476 swap_file = NULL;
1477 goto bad_swap_2;
1478 }
1479
1480 p->swap_file = swap_file;
1481 mapping = swap_file->f_mapping;
1482 inode = mapping->host;
1483
1484 error = -EBUSY;
1485 for (i = 0; i < nr_swapfiles; i++) {
1486 struct swap_info_struct *q = &swap_info[i];
1487
1488 if (i == type || !q->swap_file)
1489 continue;
1490 if (mapping == q->swap_file->f_mapping)
1491 goto bad_swap;
1492 }
1493
1494 error = -EINVAL;
1495 if (S_ISBLK(inode->i_mode)) {
1496 bdev = I_BDEV(inode);
1497 error = bd_claim(bdev, sys_swapon);
1498 if (error < 0) {
1499 bdev = NULL;
1500 error = -EINVAL;
1501 goto bad_swap;
1502 }
1503 p->old_block_size = block_size(bdev);
1504 error = set_blocksize(bdev, PAGE_SIZE);
1505 if (error < 0)
1506 goto bad_swap;
1507 p->bdev = bdev;
1508 } else if (S_ISREG(inode->i_mode)) {
1509 p->bdev = inode->i_sb->s_bdev;
1510 mutex_lock(&inode->i_mutex);
1511 did_down = 1;
1512 if (IS_SWAPFILE(inode)) {
1513 error = -EBUSY;
1514 goto bad_swap;
1515 }
1516 } else {
1517 goto bad_swap;
1518 }
1519
1520 swapfilesize = i_size_read(inode) >> PAGE_SHIFT;
1521
1522
1523
1524
1525 if (!mapping->a_ops->readpage) {
1526 error = -EINVAL;
1527 goto bad_swap;
1528 }
1529 page = read_mapping_page(mapping, 0, swap_file);
1530 if (IS_ERR(page)) {
1531 error = PTR_ERR(page);
1532 goto bad_swap;
1533 }
1534 kmap(page);
1535 swap_header = page_address(page);
1536
1537 if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
1538 swap_header_version = 1;
1539 else if (!memcmp("SWAPSPACE2",swap_header->magic.magic,10))
1540 swap_header_version = 2;
1541 else {
1542 printk(KERN_ERR "Unable to find swap-space signature\n");
1543 error = -EINVAL;
1544 goto bad_swap;
1545 }
1546
1547 switch (swap_header_version) {
1548 case 1:
1549 printk(KERN_ERR "version 0 swap is no longer supported. "
1550 "Use mkswap -v1 %s\n", name);
1551 error = -EINVAL;
1552 goto bad_swap;
1553 case 2:
1554
1555
1556 if (swap_header->info.version != 1) {
1557 printk(KERN_WARNING
1558 "Unable to handle swap header version %d\n",
1559 swap_header->info.version);
1560 error = -EINVAL;
1561 goto bad_swap;
1562 }
1563
1564 p->lowest_bit = 1;
1565 p->cluster_next = 1;
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581 maxpages = swp_offset(pte_to_swp_entry(swp_entry_to_pte(swp_entry(0,~0UL)))) - 1;
1582 if (maxpages > swap_header->info.last_page)
1583 maxpages = swap_header->info.last_page;
1584 p->highest_bit = maxpages - 1;
1585
1586 error = -EINVAL;
1587 if (!maxpages)
1588 goto bad_swap;
1589 if (swapfilesize && maxpages > swapfilesize) {
1590 printk(KERN_WARNING
1591 "Swap area shorter than signature indicates\n");
1592 goto bad_swap;
1593 }
1594 if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
1595 goto bad_swap;
1596 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
1597 goto bad_swap;
1598
1599
1600 if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) {
1601 error = -ENOMEM;
1602 goto bad_swap;
1603 }
1604
1605 error = 0;
1606 memset(p->swap_map, 0, maxpages * sizeof(short));
1607 for (i = 0; i < swap_header->info.nr_badpages; i++) {
1608 int page_nr = swap_header->info.badpages[i];
1609 if (page_nr <= 0 || page_nr >= swap_header->info.last_page)
1610 error = -EINVAL;
1611 else
1612 p->swap_map[page_nr] = SWAP_MAP_BAD;
1613 }
1614 nr_good_pages = swap_header->info.last_page -
1615 swap_header->info.nr_badpages -
1616 1 ;
1617 if (error)
1618 goto bad_swap;
1619 }
1620
1621 if (nr_good_pages) {
1622 p->swap_map[0] = SWAP_MAP_BAD;
1623 p->max = maxpages;
1624 p->pages = nr_good_pages;
1625 nr_extents = setup_swap_extents(p, &span);
1626 if (nr_extents < 0) {
1627 error = nr_extents;
1628 goto bad_swap;
1629 }
1630 nr_good_pages = p->pages;
1631 }
1632 if (!nr_good_pages) {
1633 printk(KERN_WARNING "Empty swap-file\n");
1634 error = -EINVAL;
1635 goto bad_swap;
1636 }
1637
1638 mutex_lock(&swapon_mutex);
1639 spin_lock(&swap_lock);
1640 p->flags = SWP_ACTIVE;
1641 nr_swap_pages += nr_good_pages;
1642 total_swap_pages += nr_good_pages;
1643
1644 printk(KERN_INFO "Adding %uk swap on %s. "
1645 "Priority:%d extents:%d across:%lluk\n",
1646 nr_good_pages<<(PAGE_SHIFT-10), name, p->prio,
1647 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10));
1648
1649
1650 prev = -1;
1651 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1652 if (p->prio >= swap_info[i].prio) {
1653 break;
1654 }
1655 prev = i;
1656 }
1657 p->next = i;
1658 if (prev < 0) {
1659 swap_list.head = swap_list.next = p - swap_info;
1660 } else {
1661 swap_info[prev].next = p - swap_info;
1662 }
1663 spin_unlock(&swap_lock);
1664 mutex_unlock(&swapon_mutex);
1665 error = 0;
1666 goto out;
1667bad_swap:
1668 if (bdev) {
1669 set_blocksize(bdev, p->old_block_size);
1670 bd_release(bdev);
1671 }
1672 destroy_swap_extents(p);
1673bad_swap_2:
1674 spin_lock(&swap_lock);
1675 swap_map = p->swap_map;
1676 p->swap_file = NULL;
1677 p->swap_map = NULL;
1678 p->flags = 0;
1679 if (!(swap_flags & SWAP_FLAG_PREFER))
1680 ++least_priority;
1681 spin_unlock(&swap_lock);
1682 vfree(swap_map);
1683 if (swap_file)
1684 filp_close(swap_file, NULL);
1685out:
1686 if (page && !IS_ERR(page)) {
1687 kunmap(page);
1688 page_cache_release(page);
1689 }
1690 if (name)
1691 putname(name);
1692 if (did_down) {
1693 if (!error)
1694 inode->i_flags |= S_SWAPFILE;
1695 mutex_unlock(&inode->i_mutex);
1696 }
1697 return error;
1698}
1699
1700void si_swapinfo(struct sysinfo *val)
1701{
1702 unsigned int i;
1703 unsigned long nr_to_be_unused = 0;
1704
1705 spin_lock(&swap_lock);
1706 for (i = 0; i < nr_swapfiles; i++) {
1707 if (!(swap_info[i].flags & SWP_USED) ||
1708 (swap_info[i].flags & SWP_WRITEOK))
1709 continue;
1710 nr_to_be_unused += swap_info[i].inuse_pages;
1711 }
1712 val->freeswap = nr_swap_pages + nr_to_be_unused;
1713 val->totalswap = total_swap_pages + nr_to_be_unused;
1714 spin_unlock(&swap_lock);
1715}
1716
1717
1718
1719
1720
1721
1722
1723int swap_duplicate(swp_entry_t entry)
1724{
1725 struct swap_info_struct * p;
1726 unsigned long offset, type;
1727 int result = 0;
1728
1729 if (is_migration_entry(entry))
1730 return 1;
1731
1732 type = swp_type(entry);
1733 if (type >= nr_swapfiles)
1734 goto bad_file;
1735 p = type + swap_info;
1736 offset = swp_offset(entry);
1737
1738 spin_lock(&swap_lock);
1739 if (offset < p->max && p->swap_map[offset]) {
1740 if (p->swap_map[offset] < SWAP_MAP_MAX - 1) {
1741 p->swap_map[offset]++;
1742 result = 1;
1743 } else if (p->swap_map[offset] <= SWAP_MAP_MAX) {
1744 if (swap_overflow++ < 5)
1745 printk(KERN_WARNING "swap_dup: swap entry overflow\n");
1746 p->swap_map[offset] = SWAP_MAP_MAX;
1747 result = 1;
1748 }
1749 }
1750 spin_unlock(&swap_lock);
1751out:
1752 return result;
1753
1754bad_file:
1755 printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
1756 goto out;
1757}
1758
1759struct swap_info_struct *
1760get_swap_info_struct(unsigned type)
1761{
1762 return &swap_info[type];
1763}
1764
1765
1766
1767
1768
1769int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
1770{
1771 int our_page_cluster = page_cluster;
1772 int ret = 0, i = 1 << our_page_cluster;
1773 unsigned long toff;
1774 struct swap_info_struct *swapdev = swp_type(entry) + swap_info;
1775
1776 if (!our_page_cluster)
1777 return 0;
1778 toff = (swp_offset(entry) >> our_page_cluster) << our_page_cluster;
1779 if (!toff)
1780 toff++, i--;
1781 *offset = toff;
1782
1783 spin_lock(&swap_lock);
1784 do {
1785
1786 if (toff >= swapdev->max)
1787 break;
1788
1789 if (!swapdev->swap_map[toff])
1790 break;
1791 if (swapdev->swap_map[toff] == SWAP_MAP_BAD)
1792 break;
1793 toff++;
1794 ret++;
1795 } while (--i);
1796 spin_unlock(&swap_lock);
1797 return ret;
1798}
1799