1
2
3
4
5
6
7
8#include <linux/mm.h>
9#include <linux/hugetlb.h>
10#include <linux/mman.h>
11#include <linux/slab.h>
12#include <linux/kernel_stat.h>
13#include <linux/swap.h>
14#include <linux/vmalloc.h>
15#include <linux/pagemap.h>
16#include <linux/namei.h>
17#include <linux/shm.h>
18#include <linux/blkdev.h>
19#include <linux/random.h>
20#include <linux/writeback.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/init.h>
24#include <linux/module.h>
25#include <linux/rmap.h>
26#include <linux/security.h>
27#include <linux/backing-dev.h>
28#include <linux/mutex.h>
29#include <linux/capability.h>
30#include <linux/syscalls.h>
31#include <linux/memcontrol.h>
32
33#include <asm/pgtable.h>
34#include <asm/tlbflush.h>
35#include <linux/swapops.h>
36#include <linux/page_cgroup.h>
37
38static DEFINE_SPINLOCK(swap_lock);
39static unsigned int nr_swapfiles;
40long nr_swap_pages;
41long total_swap_pages;
42static int swap_overflow;
43static int least_priority;
44
45static const char Bad_file[] = "Bad swap file entry ";
46static const char Unused_file[] = "Unused swap file entry ";
47static const char Bad_offset[] = "Bad swap offset entry ";
48static const char Unused_offset[] = "Unused swap offset entry ";
49
50static struct swap_list_t swap_list = {-1, -1};
51
52static struct swap_info_struct swap_info[MAX_SWAPFILES];
53
54static DEFINE_MUTEX(swapon_mutex);
55
56
57
58
59
60
61static DECLARE_RWSEM(swap_unplug_sem);
62
63void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
64{
65 swp_entry_t entry;
66
67 down_read(&swap_unplug_sem);
68 entry.val = page_private(page);
69 if (PageSwapCache(page)) {
70 struct block_device *bdev = swap_info[swp_type(entry)].bdev;
71 struct backing_dev_info *bdi;
72
73
74
75
76
77
78
79
80
81 WARN_ON(page_count(page) <= 1);
82
83 bdi = bdev->bd_inode->i_mapping->backing_dev_info;
84 blk_run_backing_dev(bdi, page);
85 }
86 up_read(&swap_unplug_sem);
87}
88
89
90
91
92
93static int discard_swap(struct swap_info_struct *si)
94{
95 struct swap_extent *se;
96 int err = 0;
97
98 list_for_each_entry(se, &si->extent_list, list) {
99 sector_t start_block = se->start_block << (PAGE_SHIFT - 9);
100 sector_t nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
101
102 if (se->start_page == 0) {
103
104 start_block += 1 << (PAGE_SHIFT - 9);
105 nr_blocks -= 1 << (PAGE_SHIFT - 9);
106 if (!nr_blocks)
107 continue;
108 }
109
110 err = blkdev_issue_discard(si->bdev, start_block,
111 nr_blocks, GFP_KERNEL);
112 if (err)
113 break;
114
115 cond_resched();
116 }
117 return err;
118}
119
120
121
122
123
124static void discard_swap_cluster(struct swap_info_struct *si,
125 pgoff_t start_page, pgoff_t nr_pages)
126{
127 struct swap_extent *se = si->curr_swap_extent;
128 int found_extent = 0;
129
130 while (nr_pages) {
131 struct list_head *lh;
132
133 if (se->start_page <= start_page &&
134 start_page < se->start_page + se->nr_pages) {
135 pgoff_t offset = start_page - se->start_page;
136 sector_t start_block = se->start_block + offset;
137 sector_t nr_blocks = se->nr_pages - offset;
138
139 if (nr_blocks > nr_pages)
140 nr_blocks = nr_pages;
141 start_page += nr_blocks;
142 nr_pages -= nr_blocks;
143
144 if (!found_extent++)
145 si->curr_swap_extent = se;
146
147 start_block <<= PAGE_SHIFT - 9;
148 nr_blocks <<= PAGE_SHIFT - 9;
149 if (blkdev_issue_discard(si->bdev, start_block,
150 nr_blocks, GFP_NOIO))
151 break;
152 }
153
154 lh = se->list.next;
155 if (lh == &si->extent_list)
156 lh = lh->next;
157 se = list_entry(lh, struct swap_extent, list);
158 }
159}
160
161static int wait_for_discard(void *word)
162{
163 schedule();
164 return 0;
165}
166
167#define SWAPFILE_CLUSTER 256
168#define LATENCY_LIMIT 256
169
170static inline unsigned long scan_swap_map(struct swap_info_struct *si)
171{
172 unsigned long offset;
173 unsigned long scan_base;
174 unsigned long last_in_cluster = 0;
175 int latency_ration = LATENCY_LIMIT;
176 int found_free_cluster = 0;
177
178
179
180
181
182
183
184
185
186
187
188
189 si->flags += SWP_SCANNING;
190 scan_base = offset = si->cluster_next;
191
192 if (unlikely(!si->cluster_nr--)) {
193 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
194 si->cluster_nr = SWAPFILE_CLUSTER - 1;
195 goto checks;
196 }
197 if (si->flags & SWP_DISCARDABLE) {
198
199
200
201
202
203
204
205 if (si->lowest_alloc)
206 goto checks;
207 si->lowest_alloc = si->max;
208 si->highest_alloc = 0;
209 }
210 spin_unlock(&swap_lock);
211
212
213
214
215
216
217
218
219
220 if (!(si->flags & SWP_SOLIDSTATE))
221 scan_base = offset = si->lowest_bit;
222 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
223
224
225 for (; last_in_cluster <= si->highest_bit; offset++) {
226 if (si->swap_map[offset])
227 last_in_cluster = offset + SWAPFILE_CLUSTER;
228 else if (offset == last_in_cluster) {
229 spin_lock(&swap_lock);
230 offset -= SWAPFILE_CLUSTER - 1;
231 si->cluster_next = offset;
232 si->cluster_nr = SWAPFILE_CLUSTER - 1;
233 found_free_cluster = 1;
234 goto checks;
235 }
236 if (unlikely(--latency_ration < 0)) {
237 cond_resched();
238 latency_ration = LATENCY_LIMIT;
239 }
240 }
241
242 offset = si->lowest_bit;
243 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
244
245
246 for (; last_in_cluster < scan_base; offset++) {
247 if (si->swap_map[offset])
248 last_in_cluster = offset + SWAPFILE_CLUSTER;
249 else if (offset == last_in_cluster) {
250 spin_lock(&swap_lock);
251 offset -= SWAPFILE_CLUSTER - 1;
252 si->cluster_next = offset;
253 si->cluster_nr = SWAPFILE_CLUSTER - 1;
254 found_free_cluster = 1;
255 goto checks;
256 }
257 if (unlikely(--latency_ration < 0)) {
258 cond_resched();
259 latency_ration = LATENCY_LIMIT;
260 }
261 }
262
263 offset = scan_base;
264 spin_lock(&swap_lock);
265 si->cluster_nr = SWAPFILE_CLUSTER - 1;
266 si->lowest_alloc = 0;
267 }
268
269checks:
270 if (!(si->flags & SWP_WRITEOK))
271 goto no_page;
272 if (!si->highest_bit)
273 goto no_page;
274 if (offset > si->highest_bit)
275 scan_base = offset = si->lowest_bit;
276 if (si->swap_map[offset])
277 goto scan;
278
279 if (offset == si->lowest_bit)
280 si->lowest_bit++;
281 if (offset == si->highest_bit)
282 si->highest_bit--;
283 si->inuse_pages++;
284 if (si->inuse_pages == si->pages) {
285 si->lowest_bit = si->max;
286 si->highest_bit = 0;
287 }
288 si->swap_map[offset] = 1;
289 si->cluster_next = offset + 1;
290 si->flags -= SWP_SCANNING;
291
292 if (si->lowest_alloc) {
293
294
295
296
297 if (found_free_cluster) {
298
299
300
301
302
303
304
305 if (offset < si->highest_alloc &&
306 si->lowest_alloc <= last_in_cluster)
307 last_in_cluster = si->lowest_alloc - 1;
308 si->flags |= SWP_DISCARDING;
309 spin_unlock(&swap_lock);
310
311 if (offset < last_in_cluster)
312 discard_swap_cluster(si, offset,
313 last_in_cluster - offset + 1);
314
315 spin_lock(&swap_lock);
316 si->lowest_alloc = 0;
317 si->flags &= ~SWP_DISCARDING;
318
319 smp_mb();
320 wake_up_bit(&si->flags, ilog2(SWP_DISCARDING));
321
322 } else if (si->flags & SWP_DISCARDING) {
323
324
325
326
327
328
329 spin_unlock(&swap_lock);
330 wait_on_bit(&si->flags, ilog2(SWP_DISCARDING),
331 wait_for_discard, TASK_UNINTERRUPTIBLE);
332 spin_lock(&swap_lock);
333 } else {
334
335
336
337
338
339 if (offset < si->lowest_alloc)
340 si->lowest_alloc = offset;
341 if (offset > si->highest_alloc)
342 si->highest_alloc = offset;
343 }
344 }
345 return offset;
346
347scan:
348 spin_unlock(&swap_lock);
349 while (++offset <= si->highest_bit) {
350 if (!si->swap_map[offset]) {
351 spin_lock(&swap_lock);
352 goto checks;
353 }
354 if (unlikely(--latency_ration < 0)) {
355 cond_resched();
356 latency_ration = LATENCY_LIMIT;
357 }
358 }
359 offset = si->lowest_bit;
360 while (++offset < scan_base) {
361 if (!si->swap_map[offset]) {
362 spin_lock(&swap_lock);
363 goto checks;
364 }
365 if (unlikely(--latency_ration < 0)) {
366 cond_resched();
367 latency_ration = LATENCY_LIMIT;
368 }
369 }
370 spin_lock(&swap_lock);
371
372no_page:
373 si->flags -= SWP_SCANNING;
374 return 0;
375}
376
377swp_entry_t get_swap_page(void)
378{
379 struct swap_info_struct *si;
380 pgoff_t offset;
381 int type, next;
382 int wrapped = 0;
383
384 spin_lock(&swap_lock);
385 if (nr_swap_pages <= 0)
386 goto noswap;
387 nr_swap_pages--;
388
389 for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
390 si = swap_info + type;
391 next = si->next;
392 if (next < 0 ||
393 (!wrapped && si->prio != swap_info[next].prio)) {
394 next = swap_list.head;
395 wrapped++;
396 }
397
398 if (!si->highest_bit)
399 continue;
400 if (!(si->flags & SWP_WRITEOK))
401 continue;
402
403 swap_list.next = next;
404 offset = scan_swap_map(si);
405 if (offset) {
406 spin_unlock(&swap_lock);
407 return swp_entry(type, offset);
408 }
409 next = swap_list.next;
410 }
411
412 nr_swap_pages++;
413noswap:
414 spin_unlock(&swap_lock);
415 return (swp_entry_t) {0};
416}
417
418swp_entry_t get_swap_page_of_type(int type)
419{
420 struct swap_info_struct *si;
421 pgoff_t offset;
422
423 spin_lock(&swap_lock);
424 si = swap_info + type;
425 if (si->flags & SWP_WRITEOK) {
426 nr_swap_pages--;
427 offset = scan_swap_map(si);
428 if (offset) {
429 spin_unlock(&swap_lock);
430 return swp_entry(type, offset);
431 }
432 nr_swap_pages++;
433 }
434 spin_unlock(&swap_lock);
435 return (swp_entry_t) {0};
436}
437
438static struct swap_info_struct * swap_info_get(swp_entry_t entry)
439{
440 struct swap_info_struct * p;
441 unsigned long offset, type;
442
443 if (!entry.val)
444 goto out;
445 type = swp_type(entry);
446 if (type >= nr_swapfiles)
447 goto bad_nofile;
448 p = & swap_info[type];
449 if (!(p->flags & SWP_USED))
450 goto bad_device;
451 offset = swp_offset(entry);
452 if (offset >= p->max)
453 goto bad_offset;
454 if (!p->swap_map[offset])
455 goto bad_free;
456 spin_lock(&swap_lock);
457 return p;
458
459bad_free:
460 printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);
461 goto out;
462bad_offset:
463 printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val);
464 goto out;
465bad_device:
466 printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val);
467 goto out;
468bad_nofile:
469 printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);
470out:
471 return NULL;
472}
473
474static int swap_entry_free(struct swap_info_struct *p, swp_entry_t ent)
475{
476 unsigned long offset = swp_offset(ent);
477 int count = p->swap_map[offset];
478
479 if (count < SWAP_MAP_MAX) {
480 count--;
481 p->swap_map[offset] = count;
482 if (!count) {
483 if (offset < p->lowest_bit)
484 p->lowest_bit = offset;
485 if (offset > p->highest_bit)
486 p->highest_bit = offset;
487 if (p->prio > swap_info[swap_list.next].prio)
488 swap_list.next = p - swap_info;
489 nr_swap_pages++;
490 p->inuse_pages--;
491 mem_cgroup_uncharge_swap(ent);
492 }
493 }
494 return count;
495}
496
497
498
499
500
501void swap_free(swp_entry_t entry)
502{
503 struct swap_info_struct * p;
504
505 p = swap_info_get(entry);
506 if (p) {
507 swap_entry_free(p, entry);
508 spin_unlock(&swap_lock);
509 }
510}
511
512
513
514
515static inline int page_swapcount(struct page *page)
516{
517 int count = 0;
518 struct swap_info_struct *p;
519 swp_entry_t entry;
520
521 entry.val = page_private(page);
522 p = swap_info_get(entry);
523 if (p) {
524
525 count = p->swap_map[swp_offset(entry)] - 1;
526 spin_unlock(&swap_lock);
527 }
528 return count;
529}
530
531
532
533
534
535
536
537int reuse_swap_page(struct page *page)
538{
539 int count;
540
541 VM_BUG_ON(!PageLocked(page));
542 count = page_mapcount(page);
543 if (count <= 1 && PageSwapCache(page)) {
544 count += page_swapcount(page);
545 if (count == 1 && !PageWriteback(page)) {
546 delete_from_swap_cache(page);
547 SetPageDirty(page);
548 }
549 }
550 return count == 1;
551}
552
553
554
555
556
557int try_to_free_swap(struct page *page)
558{
559 VM_BUG_ON(!PageLocked(page));
560
561 if (!PageSwapCache(page))
562 return 0;
563 if (PageWriteback(page))
564 return 0;
565 if (page_swapcount(page))
566 return 0;
567
568 delete_from_swap_cache(page);
569 SetPageDirty(page);
570 return 1;
571}
572
573
574
575
576
577int free_swap_and_cache(swp_entry_t entry)
578{
579 struct swap_info_struct *p;
580 struct page *page = NULL;
581
582 if (is_migration_entry(entry))
583 return 1;
584
585 p = swap_info_get(entry);
586 if (p) {
587 if (swap_entry_free(p, entry) == 1) {
588 page = find_get_page(&swapper_space, entry.val);
589 if (page && !trylock_page(page)) {
590 page_cache_release(page);
591 page = NULL;
592 }
593 }
594 spin_unlock(&swap_lock);
595 }
596 if (page) {
597
598
599
600
601 if (PageSwapCache(page) && !PageWriteback(page) &&
602 (!page_mapped(page) || vm_swap_full())) {
603 delete_from_swap_cache(page);
604 SetPageDirty(page);
605 }
606 unlock_page(page);
607 page_cache_release(page);
608 }
609 return p != NULL;
610}
611
612#ifdef CONFIG_HIBERNATION
613
614
615
616
617
618
619
620
621int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
622{
623 struct block_device *bdev = NULL;
624 int i;
625
626 if (device)
627 bdev = bdget(device);
628
629 spin_lock(&swap_lock);
630 for (i = 0; i < nr_swapfiles; i++) {
631 struct swap_info_struct *sis = swap_info + i;
632
633 if (!(sis->flags & SWP_WRITEOK))
634 continue;
635
636 if (!bdev) {
637 if (bdev_p)
638 *bdev_p = bdget(sis->bdev->bd_dev);
639
640 spin_unlock(&swap_lock);
641 return i;
642 }
643 if (bdev == sis->bdev) {
644 struct swap_extent *se;
645
646 se = list_entry(sis->extent_list.next,
647 struct swap_extent, list);
648 if (se->start_block == offset) {
649 if (bdev_p)
650 *bdev_p = bdget(sis->bdev->bd_dev);
651
652 spin_unlock(&swap_lock);
653 bdput(bdev);
654 return i;
655 }
656 }
657 }
658 spin_unlock(&swap_lock);
659 if (bdev)
660 bdput(bdev);
661
662 return -ENODEV;
663}
664
665
666
667
668
669
670
671unsigned int count_swap_pages(int type, int free)
672{
673 unsigned int n = 0;
674
675 if (type < nr_swapfiles) {
676 spin_lock(&swap_lock);
677 if (swap_info[type].flags & SWP_WRITEOK) {
678 n = swap_info[type].pages;
679 if (free)
680 n -= swap_info[type].inuse_pages;
681 }
682 spin_unlock(&swap_lock);
683 }
684 return n;
685}
686#endif
687
688
689
690
691
692
693static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
694 unsigned long addr, swp_entry_t entry, struct page *page)
695{
696 struct mem_cgroup *ptr = NULL;
697 spinlock_t *ptl;
698 pte_t *pte;
699 int ret = 1;
700
701 if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) {
702 ret = -ENOMEM;
703 goto out_nolock;
704 }
705
706 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
707 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
708 if (ret > 0)
709 mem_cgroup_cancel_charge_swapin(ptr);
710 ret = 0;
711 goto out;
712 }
713
714 inc_mm_counter(vma->vm_mm, anon_rss);
715 get_page(page);
716 set_pte_at(vma->vm_mm, addr, pte,
717 pte_mkold(mk_pte(page, vma->vm_page_prot)));
718 page_add_anon_rmap(page, vma, addr);
719 mem_cgroup_commit_charge_swapin(page, ptr);
720 swap_free(entry);
721
722
723
724
725 activate_page(page);
726out:
727 pte_unmap_unlock(pte, ptl);
728out_nolock:
729 return ret;
730}
731
732static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
733 unsigned long addr, unsigned long end,
734 swp_entry_t entry, struct page *page)
735{
736 pte_t swp_pte = swp_entry_to_pte(entry);
737 pte_t *pte;
738 int ret = 0;
739
740
741
742
743
744
745
746
747
748
749 pte = pte_offset_map(pmd, addr);
750 do {
751
752
753
754
755 if (unlikely(pte_same(*pte, swp_pte))) {
756 pte_unmap(pte);
757 ret = unuse_pte(vma, pmd, addr, entry, page);
758 if (ret)
759 goto out;
760 pte = pte_offset_map(pmd, addr);
761 }
762 } while (pte++, addr += PAGE_SIZE, addr != end);
763 pte_unmap(pte - 1);
764out:
765 return ret;
766}
767
768static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
769 unsigned long addr, unsigned long end,
770 swp_entry_t entry, struct page *page)
771{
772 pmd_t *pmd;
773 unsigned long next;
774 int ret;
775
776 pmd = pmd_offset(pud, addr);
777 do {
778 next = pmd_addr_end(addr, end);
779 if (pmd_none_or_clear_bad(pmd))
780 continue;
781 ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
782 if (ret)
783 return ret;
784 } while (pmd++, addr = next, addr != end);
785 return 0;
786}
787
788static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
789 unsigned long addr, unsigned long end,
790 swp_entry_t entry, struct page *page)
791{
792 pud_t *pud;
793 unsigned long next;
794 int ret;
795
796 pud = pud_offset(pgd, addr);
797 do {
798 next = pud_addr_end(addr, end);
799 if (pud_none_or_clear_bad(pud))
800 continue;
801 ret = unuse_pmd_range(vma, pud, addr, next, entry, page);
802 if (ret)
803 return ret;
804 } while (pud++, addr = next, addr != end);
805 return 0;
806}
807
808static int unuse_vma(struct vm_area_struct *vma,
809 swp_entry_t entry, struct page *page)
810{
811 pgd_t *pgd;
812 unsigned long addr, end, next;
813 int ret;
814
815 if (page->mapping) {
816 addr = page_address_in_vma(page, vma);
817 if (addr == -EFAULT)
818 return 0;
819 else
820 end = addr + PAGE_SIZE;
821 } else {
822 addr = vma->vm_start;
823 end = vma->vm_end;
824 }
825
826 pgd = pgd_offset(vma->vm_mm, addr);
827 do {
828 next = pgd_addr_end(addr, end);
829 if (pgd_none_or_clear_bad(pgd))
830 continue;
831 ret = unuse_pud_range(vma, pgd, addr, next, entry, page);
832 if (ret)
833 return ret;
834 } while (pgd++, addr = next, addr != end);
835 return 0;
836}
837
838static int unuse_mm(struct mm_struct *mm,
839 swp_entry_t entry, struct page *page)
840{
841 struct vm_area_struct *vma;
842 int ret = 0;
843
844 if (!down_read_trylock(&mm->mmap_sem)) {
845
846
847
848
849 activate_page(page);
850 unlock_page(page);
851 down_read(&mm->mmap_sem);
852 lock_page(page);
853 }
854 for (vma = mm->mmap; vma; vma = vma->vm_next) {
855 if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))
856 break;
857 }
858 up_read(&mm->mmap_sem);
859 return (ret < 0)? ret: 0;
860}
861
862
863
864
865
866static unsigned int find_next_to_unuse(struct swap_info_struct *si,
867 unsigned int prev)
868{
869 unsigned int max = si->max;
870 unsigned int i = prev;
871 int count;
872
873
874
875
876
877
878
879 for (;;) {
880 if (++i >= max) {
881 if (!prev) {
882 i = 0;
883 break;
884 }
885
886
887
888
889 max = prev + 1;
890 prev = 0;
891 i = 1;
892 }
893 count = si->swap_map[i];
894 if (count && count != SWAP_MAP_BAD)
895 break;
896 }
897 return i;
898}
899
900
901
902
903
904
905static int try_to_unuse(unsigned int type)
906{
907 struct swap_info_struct * si = &swap_info[type];
908 struct mm_struct *start_mm;
909 unsigned short *swap_map;
910 unsigned short swcount;
911 struct page *page;
912 swp_entry_t entry;
913 unsigned int i = 0;
914 int retval = 0;
915 int reset_overflow = 0;
916 int shmem;
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933 start_mm = &init_mm;
934 atomic_inc(&init_mm.mm_users);
935
936
937
938
939
940
941 while ((i = find_next_to_unuse(si, i)) != 0) {
942 if (signal_pending(current)) {
943 retval = -EINTR;
944 break;
945 }
946
947
948
949
950
951
952 swap_map = &si->swap_map[i];
953 entry = swp_entry(type, i);
954 page = read_swap_cache_async(entry,
955 GFP_HIGHUSER_MOVABLE, NULL, 0);
956 if (!page) {
957
958
959
960
961
962
963 if (!*swap_map)
964 continue;
965 retval = -ENOMEM;
966 break;
967 }
968
969
970
971
972 if (atomic_read(&start_mm->mm_users) == 1) {
973 mmput(start_mm);
974 start_mm = &init_mm;
975 atomic_inc(&init_mm.mm_users);
976 }
977
978
979
980
981
982
983
984
985
986 wait_on_page_locked(page);
987 wait_on_page_writeback(page);
988 lock_page(page);
989 wait_on_page_writeback(page);
990
991
992
993
994
995
996 shmem = 0;
997 swcount = *swap_map;
998 if (swcount > 1) {
999 if (start_mm == &init_mm)
1000 shmem = shmem_unuse(entry, page);
1001 else
1002 retval = unuse_mm(start_mm, entry, page);
1003 }
1004 if (*swap_map > 1) {
1005 int set_start_mm = (*swap_map >= swcount);
1006 struct list_head *p = &start_mm->mmlist;
1007 struct mm_struct *new_start_mm = start_mm;
1008 struct mm_struct *prev_mm = start_mm;
1009 struct mm_struct *mm;
1010
1011 atomic_inc(&new_start_mm->mm_users);
1012 atomic_inc(&prev_mm->mm_users);
1013 spin_lock(&mmlist_lock);
1014 while (*swap_map > 1 && !retval && !shmem &&
1015 (p = p->next) != &start_mm->mmlist) {
1016 mm = list_entry(p, struct mm_struct, mmlist);
1017 if (!atomic_inc_not_zero(&mm->mm_users))
1018 continue;
1019 spin_unlock(&mmlist_lock);
1020 mmput(prev_mm);
1021 prev_mm = mm;
1022
1023 cond_resched();
1024
1025 swcount = *swap_map;
1026 if (swcount <= 1)
1027 ;
1028 else if (mm == &init_mm) {
1029 set_start_mm = 1;
1030 shmem = shmem_unuse(entry, page);
1031 } else
1032 retval = unuse_mm(mm, entry, page);
1033 if (set_start_mm && *swap_map < swcount) {
1034 mmput(new_start_mm);
1035 atomic_inc(&mm->mm_users);
1036 new_start_mm = mm;
1037 set_start_mm = 0;
1038 }
1039 spin_lock(&mmlist_lock);
1040 }
1041 spin_unlock(&mmlist_lock);
1042 mmput(prev_mm);
1043 mmput(start_mm);
1044 start_mm = new_start_mm;
1045 }
1046 if (shmem) {
1047
1048 if (shmem > 0)
1049 continue;
1050 retval = shmem;
1051 break;
1052 }
1053 if (retval) {
1054 unlock_page(page);
1055 page_cache_release(page);
1056 break;
1057 }
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072 if (*swap_map == SWAP_MAP_MAX) {
1073 spin_lock(&swap_lock);
1074 *swap_map = 1;
1075 spin_unlock(&swap_lock);
1076 reset_overflow = 1;
1077 }
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
1093 struct writeback_control wbc = {
1094 .sync_mode = WB_SYNC_NONE,
1095 };
1096
1097 swap_writepage(page, &wbc);
1098 lock_page(page);
1099 wait_on_page_writeback(page);
1100 }
1101
1102
1103
1104
1105
1106
1107
1108
1109 if (PageSwapCache(page) &&
1110 likely(page_private(page) == entry.val))
1111 delete_from_swap_cache(page);
1112
1113
1114
1115
1116
1117
1118 SetPageDirty(page);
1119 unlock_page(page);
1120 page_cache_release(page);
1121
1122
1123
1124
1125
1126 cond_resched();
1127 }
1128
1129 mmput(start_mm);
1130 if (reset_overflow) {
1131 printk(KERN_WARNING "swapoff: cleared swap entry overflow\n");
1132 swap_overflow = 0;
1133 }
1134 return retval;
1135}
1136
1137
1138
1139
1140
1141
1142
1143static void drain_mmlist(void)
1144{
1145 struct list_head *p, *next;
1146 unsigned int i;
1147
1148 for (i = 0; i < nr_swapfiles; i++)
1149 if (swap_info[i].inuse_pages)
1150 return;
1151 spin_lock(&mmlist_lock);
1152 list_for_each_safe(p, next, &init_mm.mmlist)
1153 list_del_init(p);
1154 spin_unlock(&mmlist_lock);
1155}
1156
1157
1158
1159
1160
1161sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset)
1162{
1163 struct swap_extent *se = sis->curr_swap_extent;
1164 struct swap_extent *start_se = se;
1165
1166 for ( ; ; ) {
1167 struct list_head *lh;
1168
1169 if (se->start_page <= offset &&
1170 offset < (se->start_page + se->nr_pages)) {
1171 return se->start_block + (offset - se->start_page);
1172 }
1173 lh = se->list.next;
1174 if (lh == &sis->extent_list)
1175 lh = lh->next;
1176 se = list_entry(lh, struct swap_extent, list);
1177 sis->curr_swap_extent = se;
1178 BUG_ON(se == start_se);
1179 }
1180}
1181
1182#ifdef CONFIG_HIBERNATION
1183
1184
1185
1186
1187sector_t swapdev_block(int swap_type, pgoff_t offset)
1188{
1189 struct swap_info_struct *sis;
1190
1191 if (swap_type >= nr_swapfiles)
1192 return 0;
1193
1194 sis = swap_info + swap_type;
1195 return (sis->flags & SWP_WRITEOK) ? map_swap_page(sis, offset) : 0;
1196}
1197#endif
1198
1199
1200
1201
1202static void destroy_swap_extents(struct swap_info_struct *sis)
1203{
1204 while (!list_empty(&sis->extent_list)) {
1205 struct swap_extent *se;
1206
1207 se = list_entry(sis->extent_list.next,
1208 struct swap_extent, list);
1209 list_del(&se->list);
1210 kfree(se);
1211 }
1212}
1213
1214
1215
1216
1217
1218
1219
1220static int
1221add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
1222 unsigned long nr_pages, sector_t start_block)
1223{
1224 struct swap_extent *se;
1225 struct swap_extent *new_se;
1226 struct list_head *lh;
1227
1228 lh = sis->extent_list.prev;
1229 if (lh != &sis->extent_list) {
1230 se = list_entry(lh, struct swap_extent, list);
1231 BUG_ON(se->start_page + se->nr_pages != start_page);
1232 if (se->start_block + se->nr_pages == start_block) {
1233
1234 se->nr_pages += nr_pages;
1235 return 0;
1236 }
1237 }
1238
1239
1240
1241
1242 new_se = kmalloc(sizeof(*se), GFP_KERNEL);
1243 if (new_se == NULL)
1244 return -ENOMEM;
1245 new_se->start_page = start_page;
1246 new_se->nr_pages = nr_pages;
1247 new_se->start_block = start_block;
1248
1249 list_add_tail(&new_se->list, &sis->extent_list);
1250 return 1;
1251}
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
1285{
1286 struct inode *inode;
1287 unsigned blocks_per_page;
1288 unsigned long page_no;
1289 unsigned blkbits;
1290 sector_t probe_block;
1291 sector_t last_block;
1292 sector_t lowest_block = -1;
1293 sector_t highest_block = 0;
1294 int nr_extents = 0;
1295 int ret;
1296
1297 inode = sis->swap_file->f_mapping->host;
1298 if (S_ISBLK(inode->i_mode)) {
1299 ret = add_swap_extent(sis, 0, sis->max, 0);
1300 *span = sis->pages;
1301 goto done;
1302 }
1303
1304 blkbits = inode->i_blkbits;
1305 blocks_per_page = PAGE_SIZE >> blkbits;
1306
1307
1308
1309
1310
1311 probe_block = 0;
1312 page_no = 0;
1313 last_block = i_size_read(inode) >> blkbits;
1314 while ((probe_block + blocks_per_page) <= last_block &&
1315 page_no < sis->max) {
1316 unsigned block_in_page;
1317 sector_t first_block;
1318
1319 first_block = bmap(inode, probe_block);
1320 if (first_block == 0)
1321 goto bad_bmap;
1322
1323
1324
1325
1326 if (first_block & (blocks_per_page - 1)) {
1327 probe_block++;
1328 goto reprobe;
1329 }
1330
1331 for (block_in_page = 1; block_in_page < blocks_per_page;
1332 block_in_page++) {
1333 sector_t block;
1334
1335 block = bmap(inode, probe_block + block_in_page);
1336 if (block == 0)
1337 goto bad_bmap;
1338 if (block != first_block + block_in_page) {
1339
1340 probe_block++;
1341 goto reprobe;
1342 }
1343 }
1344
1345 first_block >>= (PAGE_SHIFT - blkbits);
1346 if (page_no) {
1347 if (first_block < lowest_block)
1348 lowest_block = first_block;
1349 if (first_block > highest_block)
1350 highest_block = first_block;
1351 }
1352
1353
1354
1355
1356 ret = add_swap_extent(sis, page_no, 1, first_block);
1357 if (ret < 0)
1358 goto out;
1359 nr_extents += ret;
1360 page_no++;
1361 probe_block += blocks_per_page;
1362reprobe:
1363 continue;
1364 }
1365 ret = nr_extents;
1366 *span = 1 + highest_block - lowest_block;
1367 if (page_no == 0)
1368 page_no = 1;
1369 sis->max = page_no;
1370 sis->pages = page_no - 1;
1371 sis->highest_bit = page_no - 1;
1372done:
1373 sis->curr_swap_extent = list_entry(sis->extent_list.prev,
1374 struct swap_extent, list);
1375 goto out;
1376bad_bmap:
1377 printk(KERN_ERR "swapon: swapfile has holes\n");
1378 ret = -EINVAL;
1379out:
1380 return ret;
1381}
1382
1383SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1384{
1385 struct swap_info_struct * p = NULL;
1386 unsigned short *swap_map;
1387 struct file *swap_file, *victim;
1388 struct address_space *mapping;
1389 struct inode *inode;
1390 char * pathname;
1391 int i, type, prev;
1392 int err;
1393
1394 if (!capable(CAP_SYS_ADMIN))
1395 return -EPERM;
1396
1397 pathname = getname(specialfile);
1398 err = PTR_ERR(pathname);
1399 if (IS_ERR(pathname))
1400 goto out;
1401
1402 victim = filp_open(pathname, O_RDWR|O_LARGEFILE, 0);
1403 putname(pathname);
1404 err = PTR_ERR(victim);
1405 if (IS_ERR(victim))
1406 goto out;
1407
1408 mapping = victim->f_mapping;
1409 prev = -1;
1410 spin_lock(&swap_lock);
1411 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1412 p = swap_info + type;
1413 if (p->flags & SWP_WRITEOK) {
1414 if (p->swap_file->f_mapping == mapping)
1415 break;
1416 }
1417 prev = type;
1418 }
1419 if (type < 0) {
1420 err = -EINVAL;
1421 spin_unlock(&swap_lock);
1422 goto out_dput;
1423 }
1424 if (!security_vm_enough_memory(p->pages))
1425 vm_unacct_memory(p->pages);
1426 else {
1427 err = -ENOMEM;
1428 spin_unlock(&swap_lock);
1429 goto out_dput;
1430 }
1431 if (prev < 0) {
1432 swap_list.head = p->next;
1433 } else {
1434 swap_info[prev].next = p->next;
1435 }
1436 if (type == swap_list.next) {
1437
1438 swap_list.next = swap_list.head;
1439 }
1440 if (p->prio < 0) {
1441 for (i = p->next; i >= 0; i = swap_info[i].next)
1442 swap_info[i].prio = p->prio--;
1443 least_priority++;
1444 }
1445 nr_swap_pages -= p->pages;
1446 total_swap_pages -= p->pages;
1447 p->flags &= ~SWP_WRITEOK;
1448 spin_unlock(&swap_lock);
1449
1450 current->flags |= PF_SWAPOFF;
1451 err = try_to_unuse(type);
1452 current->flags &= ~PF_SWAPOFF;
1453
1454 if (err) {
1455
1456 spin_lock(&swap_lock);
1457 if (p->prio < 0)
1458 p->prio = --least_priority;
1459 prev = -1;
1460 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1461 if (p->prio >= swap_info[i].prio)
1462 break;
1463 prev = i;
1464 }
1465 p->next = i;
1466 if (prev < 0)
1467 swap_list.head = swap_list.next = p - swap_info;
1468 else
1469 swap_info[prev].next = p - swap_info;
1470 nr_swap_pages += p->pages;
1471 total_swap_pages += p->pages;
1472 p->flags |= SWP_WRITEOK;
1473 spin_unlock(&swap_lock);
1474 goto out_dput;
1475 }
1476
1477
1478 down_write(&swap_unplug_sem);
1479 up_write(&swap_unplug_sem);
1480
1481 destroy_swap_extents(p);
1482 mutex_lock(&swapon_mutex);
1483 spin_lock(&swap_lock);
1484 drain_mmlist();
1485
1486
1487 p->highest_bit = 0;
1488 while (p->flags >= SWP_SCANNING) {
1489 spin_unlock(&swap_lock);
1490 schedule_timeout_uninterruptible(1);
1491 spin_lock(&swap_lock);
1492 }
1493
1494 swap_file = p->swap_file;
1495 p->swap_file = NULL;
1496 p->max = 0;
1497 swap_map = p->swap_map;
1498 p->swap_map = NULL;
1499 p->flags = 0;
1500 spin_unlock(&swap_lock);
1501 mutex_unlock(&swapon_mutex);
1502 vfree(swap_map);
1503
1504 swap_cgroup_swapoff(type);
1505
1506 inode = mapping->host;
1507 if (S_ISBLK(inode->i_mode)) {
1508 struct block_device *bdev = I_BDEV(inode);
1509 set_blocksize(bdev, p->old_block_size);
1510 bd_release(bdev);
1511 } else {
1512 mutex_lock(&inode->i_mutex);
1513 inode->i_flags &= ~S_SWAPFILE;
1514 mutex_unlock(&inode->i_mutex);
1515 }
1516 filp_close(swap_file, NULL);
1517 err = 0;
1518
1519out_dput:
1520 filp_close(victim, NULL);
1521out:
1522 return err;
1523}
1524
1525#ifdef CONFIG_PROC_FS
1526
1527static void *swap_start(struct seq_file *swap, loff_t *pos)
1528{
1529 struct swap_info_struct *ptr = swap_info;
1530 int i;
1531 loff_t l = *pos;
1532
1533 mutex_lock(&swapon_mutex);
1534
1535 if (!l)
1536 return SEQ_START_TOKEN;
1537
1538 for (i = 0; i < nr_swapfiles; i++, ptr++) {
1539 if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
1540 continue;
1541 if (!--l)
1542 return ptr;
1543 }
1544
1545 return NULL;
1546}
1547
1548static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
1549{
1550 struct swap_info_struct *ptr;
1551 struct swap_info_struct *endptr = swap_info + nr_swapfiles;
1552
1553 if (v == SEQ_START_TOKEN)
1554 ptr = swap_info;
1555 else {
1556 ptr = v;
1557 ptr++;
1558 }
1559
1560 for (; ptr < endptr; ptr++) {
1561 if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
1562 continue;
1563 ++*pos;
1564 return ptr;
1565 }
1566
1567 return NULL;
1568}
1569
1570static void swap_stop(struct seq_file *swap, void *v)
1571{
1572 mutex_unlock(&swapon_mutex);
1573}
1574
1575static int swap_show(struct seq_file *swap, void *v)
1576{
1577 struct swap_info_struct *ptr = v;
1578 struct file *file;
1579 int len;
1580
1581 if (ptr == SEQ_START_TOKEN) {
1582 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
1583 return 0;
1584 }
1585
1586 file = ptr->swap_file;
1587 len = seq_path(swap, &file->f_path, " \t\n\\");
1588 seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
1589 len < 40 ? 40 - len : 1, " ",
1590 S_ISBLK(file->f_path.dentry->d_inode->i_mode) ?
1591 "partition" : "file\t",
1592 ptr->pages << (PAGE_SHIFT - 10),
1593 ptr->inuse_pages << (PAGE_SHIFT - 10),
1594 ptr->prio);
1595 return 0;
1596}
1597
1598static const struct seq_operations swaps_op = {
1599 .start = swap_start,
1600 .next = swap_next,
1601 .stop = swap_stop,
1602 .show = swap_show
1603};
1604
1605static int swaps_open(struct inode *inode, struct file *file)
1606{
1607 return seq_open(file, &swaps_op);
1608}
1609
1610static const struct file_operations proc_swaps_operations = {
1611 .open = swaps_open,
1612 .read = seq_read,
1613 .llseek = seq_lseek,
1614 .release = seq_release,
1615};
1616
1617static int __init procswaps_init(void)
1618{
1619 proc_create("swaps", 0, NULL, &proc_swaps_operations);
1620 return 0;
1621}
1622__initcall(procswaps_init);
1623#endif
1624
1625#ifdef MAX_SWAPFILES_CHECK
1626static int __init max_swapfiles_check(void)
1627{
1628 MAX_SWAPFILES_CHECK();
1629 return 0;
1630}
1631late_initcall(max_swapfiles_check);
1632#endif
1633
1634
1635
1636
1637
1638
1639SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1640{
1641 struct swap_info_struct * p;
1642 char *name = NULL;
1643 struct block_device *bdev = NULL;
1644 struct file *swap_file = NULL;
1645 struct address_space *mapping;
1646 unsigned int type;
1647 int i, prev;
1648 int error;
1649 union swap_header *swap_header = NULL;
1650 unsigned int nr_good_pages = 0;
1651 int nr_extents = 0;
1652 sector_t span;
1653 unsigned long maxpages = 1;
1654 unsigned long swapfilepages;
1655 unsigned short *swap_map = NULL;
1656 struct page *page = NULL;
1657 struct inode *inode = NULL;
1658 int did_down = 0;
1659
1660 if (!capable(CAP_SYS_ADMIN))
1661 return -EPERM;
1662 spin_lock(&swap_lock);
1663 p = swap_info;
1664 for (type = 0 ; type < nr_swapfiles ; type++,p++)
1665 if (!(p->flags & SWP_USED))
1666 break;
1667 error = -EPERM;
1668 if (type >= MAX_SWAPFILES) {
1669 spin_unlock(&swap_lock);
1670 goto out;
1671 }
1672 if (type >= nr_swapfiles)
1673 nr_swapfiles = type+1;
1674 memset(p, 0, sizeof(*p));
1675 INIT_LIST_HEAD(&p->extent_list);
1676 p->flags = SWP_USED;
1677 p->next = -1;
1678 spin_unlock(&swap_lock);
1679 name = getname(specialfile);
1680 error = PTR_ERR(name);
1681 if (IS_ERR(name)) {
1682 name = NULL;
1683 goto bad_swap_2;
1684 }
1685 swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
1686 error = PTR_ERR(swap_file);
1687 if (IS_ERR(swap_file)) {
1688 swap_file = NULL;
1689 goto bad_swap_2;
1690 }
1691
1692 p->swap_file = swap_file;
1693 mapping = swap_file->f_mapping;
1694 inode = mapping->host;
1695
1696 error = -EBUSY;
1697 for (i = 0; i < nr_swapfiles; i++) {
1698 struct swap_info_struct *q = &swap_info[i];
1699
1700 if (i == type || !q->swap_file)
1701 continue;
1702 if (mapping == q->swap_file->f_mapping)
1703 goto bad_swap;
1704 }
1705
1706 error = -EINVAL;
1707 if (S_ISBLK(inode->i_mode)) {
1708 bdev = I_BDEV(inode);
1709 error = bd_claim(bdev, sys_swapon);
1710 if (error < 0) {
1711 bdev = NULL;
1712 error = -EINVAL;
1713 goto bad_swap;
1714 }
1715 p->old_block_size = block_size(bdev);
1716 error = set_blocksize(bdev, PAGE_SIZE);
1717 if (error < 0)
1718 goto bad_swap;
1719 p->bdev = bdev;
1720 } else if (S_ISREG(inode->i_mode)) {
1721 p->bdev = inode->i_sb->s_bdev;
1722 mutex_lock(&inode->i_mutex);
1723 did_down = 1;
1724 if (IS_SWAPFILE(inode)) {
1725 error = -EBUSY;
1726 goto bad_swap;
1727 }
1728 } else {
1729 goto bad_swap;
1730 }
1731
1732 swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
1733
1734
1735
1736
1737 if (!mapping->a_ops->readpage) {
1738 error = -EINVAL;
1739 goto bad_swap;
1740 }
1741 page = read_mapping_page(mapping, 0, swap_file);
1742 if (IS_ERR(page)) {
1743 error = PTR_ERR(page);
1744 goto bad_swap;
1745 }
1746 swap_header = kmap(page);
1747
1748 if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
1749 printk(KERN_ERR "Unable to find swap-space signature\n");
1750 error = -EINVAL;
1751 goto bad_swap;
1752 }
1753
1754
1755 if (swab32(swap_header->info.version) == 1) {
1756 swab32s(&swap_header->info.version);
1757 swab32s(&swap_header->info.last_page);
1758 swab32s(&swap_header->info.nr_badpages);
1759 for (i = 0; i < swap_header->info.nr_badpages; i++)
1760 swab32s(&swap_header->info.badpages[i]);
1761 }
1762
1763 if (swap_header->info.version != 1) {
1764 printk(KERN_WARNING
1765 "Unable to handle swap header version %d\n",
1766 swap_header->info.version);
1767 error = -EINVAL;
1768 goto bad_swap;
1769 }
1770
1771 p->lowest_bit = 1;
1772 p->cluster_next = 1;
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788 maxpages = swp_offset(pte_to_swp_entry(
1789 swp_entry_to_pte(swp_entry(0, ~0UL)))) - 1;
1790 if (maxpages > swap_header->info.last_page)
1791 maxpages = swap_header->info.last_page;
1792 p->highest_bit = maxpages - 1;
1793
1794 error = -EINVAL;
1795 if (!maxpages)
1796 goto bad_swap;
1797 if (swapfilepages && maxpages > swapfilepages) {
1798 printk(KERN_WARNING
1799 "Swap area shorter than signature indicates\n");
1800 goto bad_swap;
1801 }
1802 if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
1803 goto bad_swap;
1804 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
1805 goto bad_swap;
1806
1807
1808 swap_map = vmalloc(maxpages * sizeof(short));
1809 if (!swap_map) {
1810 error = -ENOMEM;
1811 goto bad_swap;
1812 }
1813
1814 memset(swap_map, 0, maxpages * sizeof(short));
1815 for (i = 0; i < swap_header->info.nr_badpages; i++) {
1816 int page_nr = swap_header->info.badpages[i];
1817 if (page_nr <= 0 || page_nr >= swap_header->info.last_page) {
1818 error = -EINVAL;
1819 goto bad_swap;
1820 }
1821 swap_map[page_nr] = SWAP_MAP_BAD;
1822 }
1823
1824 error = swap_cgroup_swapon(type, maxpages);
1825 if (error)
1826 goto bad_swap;
1827
1828 nr_good_pages = swap_header->info.last_page -
1829 swap_header->info.nr_badpages -
1830 1 ;
1831
1832 if (nr_good_pages) {
1833 swap_map[0] = SWAP_MAP_BAD;
1834 p->max = maxpages;
1835 p->pages = nr_good_pages;
1836 nr_extents = setup_swap_extents(p, &span);
1837 if (nr_extents < 0) {
1838 error = nr_extents;
1839 goto bad_swap;
1840 }
1841 nr_good_pages = p->pages;
1842 }
1843 if (!nr_good_pages) {
1844 printk(KERN_WARNING "Empty swap-file\n");
1845 error = -EINVAL;
1846 goto bad_swap;
1847 }
1848
1849 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
1850 p->flags |= SWP_SOLIDSTATE;
1851 p->cluster_next = 1 + (random32() % p->highest_bit);
1852 }
1853 if (discard_swap(p) == 0)
1854 p->flags |= SWP_DISCARDABLE;
1855
1856 mutex_lock(&swapon_mutex);
1857 spin_lock(&swap_lock);
1858 if (swap_flags & SWAP_FLAG_PREFER)
1859 p->prio =
1860 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
1861 else
1862 p->prio = --least_priority;
1863 p->swap_map = swap_map;
1864 p->flags |= SWP_WRITEOK;
1865 nr_swap_pages += nr_good_pages;
1866 total_swap_pages += nr_good_pages;
1867
1868 printk(KERN_INFO "Adding %uk swap on %s. "
1869 "Priority:%d extents:%d across:%lluk %s%s\n",
1870 nr_good_pages<<(PAGE_SHIFT-10), name, p->prio,
1871 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
1872 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
1873 (p->flags & SWP_DISCARDABLE) ? "D" : "");
1874
1875
1876 prev = -1;
1877 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1878 if (p->prio >= swap_info[i].prio) {
1879 break;
1880 }
1881 prev = i;
1882 }
1883 p->next = i;
1884 if (prev < 0) {
1885 swap_list.head = swap_list.next = p - swap_info;
1886 } else {
1887 swap_info[prev].next = p - swap_info;
1888 }
1889 spin_unlock(&swap_lock);
1890 mutex_unlock(&swapon_mutex);
1891 error = 0;
1892 goto out;
1893bad_swap:
1894 if (bdev) {
1895 set_blocksize(bdev, p->old_block_size);
1896 bd_release(bdev);
1897 }
1898 destroy_swap_extents(p);
1899 swap_cgroup_swapoff(type);
1900bad_swap_2:
1901 spin_lock(&swap_lock);
1902 p->swap_file = NULL;
1903 p->flags = 0;
1904 spin_unlock(&swap_lock);
1905 vfree(swap_map);
1906 if (swap_file)
1907 filp_close(swap_file, NULL);
1908out:
1909 if (page && !IS_ERR(page)) {
1910 kunmap(page);
1911 page_cache_release(page);
1912 }
1913 if (name)
1914 putname(name);
1915 if (did_down) {
1916 if (!error)
1917 inode->i_flags |= S_SWAPFILE;
1918 mutex_unlock(&inode->i_mutex);
1919 }
1920 return error;
1921}
1922
1923void si_swapinfo(struct sysinfo *val)
1924{
1925 unsigned int i;
1926 unsigned long nr_to_be_unused = 0;
1927
1928 spin_lock(&swap_lock);
1929 for (i = 0; i < nr_swapfiles; i++) {
1930 if (!(swap_info[i].flags & SWP_USED) ||
1931 (swap_info[i].flags & SWP_WRITEOK))
1932 continue;
1933 nr_to_be_unused += swap_info[i].inuse_pages;
1934 }
1935 val->freeswap = nr_swap_pages + nr_to_be_unused;
1936 val->totalswap = total_swap_pages + nr_to_be_unused;
1937 spin_unlock(&swap_lock);
1938}
1939
1940
1941
1942
1943
1944
1945
1946int swap_duplicate(swp_entry_t entry)
1947{
1948 struct swap_info_struct * p;
1949 unsigned long offset, type;
1950 int result = 0;
1951
1952 if (is_migration_entry(entry))
1953 return 1;
1954
1955 type = swp_type(entry);
1956 if (type >= nr_swapfiles)
1957 goto bad_file;
1958 p = type + swap_info;
1959 offset = swp_offset(entry);
1960
1961 spin_lock(&swap_lock);
1962 if (offset < p->max && p->swap_map[offset]) {
1963 if (p->swap_map[offset] < SWAP_MAP_MAX - 1) {
1964 p->swap_map[offset]++;
1965 result = 1;
1966 } else if (p->swap_map[offset] <= SWAP_MAP_MAX) {
1967 if (swap_overflow++ < 5)
1968 printk(KERN_WARNING "swap_dup: swap entry overflow\n");
1969 p->swap_map[offset] = SWAP_MAP_MAX;
1970 result = 1;
1971 }
1972 }
1973 spin_unlock(&swap_lock);
1974out:
1975 return result;
1976
1977bad_file:
1978 printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
1979 goto out;
1980}
1981
1982struct swap_info_struct *
1983get_swap_info_struct(unsigned type)
1984{
1985 return &swap_info[type];
1986}
1987
1988
1989
1990
1991
1992int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
1993{
1994 struct swap_info_struct *si;
1995 int our_page_cluster = page_cluster;
1996 pgoff_t target, toff;
1997 pgoff_t base, end;
1998 int nr_pages = 0;
1999
2000 if (!our_page_cluster)
2001 return 0;
2002
2003 si = &swap_info[swp_type(entry)];
2004 target = swp_offset(entry);
2005 base = (target >> our_page_cluster) << our_page_cluster;
2006 end = base + (1 << our_page_cluster);
2007 if (!base)
2008 base++;
2009
2010 spin_lock(&swap_lock);
2011 if (end > si->max)
2012 end = si->max;
2013
2014
2015 for (toff = target; ++toff < end; nr_pages++) {
2016
2017 if (!si->swap_map[toff])
2018 break;
2019 if (si->swap_map[toff] == SWAP_MAP_BAD)
2020 break;
2021 }
2022
2023 for (toff = target; --toff >= base; nr_pages++) {
2024
2025 if (!si->swap_map[toff])
2026 break;
2027 if (si->swap_map[toff] == SWAP_MAP_BAD)
2028 break;
2029 }
2030 spin_unlock(&swap_lock);
2031
2032
2033
2034
2035
2036 *offset = ++toff;
2037 return nr_pages? ++nr_pages: 0;
2038}
2039