1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/blkpg.h>
19#include <linux/magic.h>
20#include <linux/buffer_head.h>
21#include <linux/swap.h>
22#include <linux/pagevec.h>
23#include <linux/writeback.h>
24#include <linux/mpage.h>
25#include <linux/mount.h>
26#include <linux/uio.h>
27#include <linux/namei.h>
28#include <linux/log2.h>
29#include <linux/cleancache.h>
30#include <asm/uaccess.h>
31#include "internal.h"
32
33struct bdev_inode {
34 struct block_device bdev;
35 struct inode vfs_inode;
36};
37
38static const struct address_space_operations def_blk_aops;
39
40static inline struct bdev_inode *BDEV_I(struct inode *inode)
41{
42 return container_of(inode, struct bdev_inode, vfs_inode);
43}
44
45inline struct block_device *I_BDEV(struct inode *inode)
46{
47 return &BDEV_I(inode)->bdev;
48}
49EXPORT_SYMBOL(I_BDEV);
50
51
52
53
54
55
56static void bdev_inode_switch_bdi(struct inode *inode,
57 struct backing_dev_info *dst)
58{
59 struct backing_dev_info *old = inode->i_data.backing_dev_info;
60
61 if (unlikely(dst == old))
62 return;
63 bdi_lock_two(&old->wb, &dst->wb);
64 spin_lock(&inode->i_lock);
65 inode->i_data.backing_dev_info = dst;
66 if (inode->i_state & I_DIRTY)
67 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
68 spin_unlock(&inode->i_lock);
69 spin_unlock(&old->wb.list_lock);
70 spin_unlock(&dst->wb.list_lock);
71}
72
73
74void kill_bdev(struct block_device *bdev)
75{
76 struct address_space *mapping = bdev->bd_inode->i_mapping;
77
78 if (mapping->nrpages == 0)
79 return;
80
81 invalidate_bh_lrus();
82 truncate_inode_pages(mapping, 0);
83}
84EXPORT_SYMBOL(kill_bdev);
85
86
87void invalidate_bdev(struct block_device *bdev)
88{
89 struct address_space *mapping = bdev->bd_inode->i_mapping;
90
91 if (mapping->nrpages == 0)
92 return;
93
94 invalidate_bh_lrus();
95 lru_add_drain_all();
96 invalidate_mapping_pages(mapping, 0, -1);
97
98
99
100 cleancache_invalidate_inode(mapping);
101}
102EXPORT_SYMBOL(invalidate_bdev);
103
104int set_blocksize(struct block_device *bdev, int size)
105{
106
107 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
108 return -EINVAL;
109
110
111 if (size < bdev_logical_block_size(bdev))
112 return -EINVAL;
113
114
115 if (bdev->bd_block_size != size) {
116 sync_blockdev(bdev);
117 bdev->bd_block_size = size;
118 bdev->bd_inode->i_blkbits = blksize_bits(size);
119 kill_bdev(bdev);
120 }
121 return 0;
122}
123
124EXPORT_SYMBOL(set_blocksize);
125
126int sb_set_blocksize(struct super_block *sb, int size)
127{
128 if (set_blocksize(sb->s_bdev, size))
129 return 0;
130
131
132 sb->s_blocksize = size;
133 sb->s_blocksize_bits = blksize_bits(size);
134 return sb->s_blocksize;
135}
136
137EXPORT_SYMBOL(sb_set_blocksize);
138
139int sb_min_blocksize(struct super_block *sb, int size)
140{
141 int minsize = bdev_logical_block_size(sb->s_bdev);
142 if (size < minsize)
143 size = minsize;
144 return sb_set_blocksize(sb, size);
145}
146
147EXPORT_SYMBOL(sb_min_blocksize);
148
149static int
150blkdev_get_block(struct inode *inode, sector_t iblock,
151 struct buffer_head *bh, int create)
152{
153 bh->b_bdev = I_BDEV(inode);
154 bh->b_blocknr = iblock;
155 set_buffer_mapped(bh);
156 return 0;
157}
158
159static ssize_t
160blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
161 loff_t offset, unsigned long nr_segs)
162{
163 struct file *file = iocb->ki_filp;
164 struct inode *inode = file->f_mapping->host;
165
166 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
167 nr_segs, blkdev_get_block, NULL, NULL, 0);
168}
169
170int __sync_blockdev(struct block_device *bdev, int wait)
171{
172 if (!bdev)
173 return 0;
174 if (!wait)
175 return filemap_flush(bdev->bd_inode->i_mapping);
176 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
177}
178
179
180
181
182
183int sync_blockdev(struct block_device *bdev)
184{
185 return __sync_blockdev(bdev, 1);
186}
187EXPORT_SYMBOL(sync_blockdev);
188
189
190
191
192
193
194int fsync_bdev(struct block_device *bdev)
195{
196 struct super_block *sb = get_super(bdev);
197 if (sb) {
198 int res = sync_filesystem(sb);
199 drop_super(sb);
200 return res;
201 }
202 return sync_blockdev(bdev);
203}
204EXPORT_SYMBOL(fsync_bdev);
205
206
207
208
209
210
211
212
213
214
215
216
217
218struct super_block *freeze_bdev(struct block_device *bdev)
219{
220 struct super_block *sb;
221 int error = 0;
222
223 mutex_lock(&bdev->bd_fsfreeze_mutex);
224 if (++bdev->bd_fsfreeze_count > 1) {
225
226
227
228
229
230 sb = get_super(bdev);
231 drop_super(sb);
232 mutex_unlock(&bdev->bd_fsfreeze_mutex);
233 return sb;
234 }
235
236 sb = get_active_super(bdev);
237 if (!sb)
238 goto out;
239 error = freeze_super(sb);
240 if (error) {
241 deactivate_super(sb);
242 bdev->bd_fsfreeze_count--;
243 mutex_unlock(&bdev->bd_fsfreeze_mutex);
244 return ERR_PTR(error);
245 }
246 deactivate_super(sb);
247 out:
248 sync_blockdev(bdev);
249 mutex_unlock(&bdev->bd_fsfreeze_mutex);
250 return sb;
251}
252EXPORT_SYMBOL(freeze_bdev);
253
254
255
256
257
258
259
260
261int thaw_bdev(struct block_device *bdev, struct super_block *sb)
262{
263 int error = -EINVAL;
264
265 mutex_lock(&bdev->bd_fsfreeze_mutex);
266 if (!bdev->bd_fsfreeze_count)
267 goto out;
268
269 error = 0;
270 if (--bdev->bd_fsfreeze_count > 0)
271 goto out;
272
273 if (!sb)
274 goto out;
275
276 error = thaw_super(sb);
277 if (error) {
278 bdev->bd_fsfreeze_count++;
279 mutex_unlock(&bdev->bd_fsfreeze_mutex);
280 return error;
281 }
282out:
283 mutex_unlock(&bdev->bd_fsfreeze_mutex);
284 return 0;
285}
286EXPORT_SYMBOL(thaw_bdev);
287
288static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
289{
290 return block_write_full_page(page, blkdev_get_block, wbc);
291}
292
293static int blkdev_readpage(struct file * file, struct page * page)
294{
295 return block_read_full_page(page, blkdev_get_block);
296}
297
298static int blkdev_write_begin(struct file *file, struct address_space *mapping,
299 loff_t pos, unsigned len, unsigned flags,
300 struct page **pagep, void **fsdata)
301{
302 return block_write_begin(mapping, pos, len, flags, pagep,
303 blkdev_get_block);
304}
305
306static int blkdev_write_end(struct file *file, struct address_space *mapping,
307 loff_t pos, unsigned len, unsigned copied,
308 struct page *page, void *fsdata)
309{
310 int ret;
311 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
312
313 unlock_page(page);
314 page_cache_release(page);
315
316 return ret;
317}
318
319
320
321
322
323
324static loff_t block_llseek(struct file *file, loff_t offset, int whence)
325{
326 struct inode *bd_inode = file->f_mapping->host;
327 loff_t size;
328 loff_t retval;
329
330 mutex_lock(&bd_inode->i_mutex);
331 size = i_size_read(bd_inode);
332
333 retval = -EINVAL;
334 switch (whence) {
335 case SEEK_END:
336 offset += size;
337 break;
338 case SEEK_CUR:
339 offset += file->f_pos;
340 case SEEK_SET:
341 break;
342 default:
343 goto out;
344 }
345 if (offset >= 0 && offset <= size) {
346 if (offset != file->f_pos) {
347 file->f_pos = offset;
348 }
349 retval = offset;
350 }
351out:
352 mutex_unlock(&bd_inode->i_mutex);
353 return retval;
354}
355
356int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
357{
358 struct inode *bd_inode = filp->f_mapping->host;
359 struct block_device *bdev = I_BDEV(bd_inode);
360 int error;
361
362 error = filemap_write_and_wait_range(filp->f_mapping, start, end);
363 if (error)
364 return error;
365
366
367
368
369
370
371 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
372 if (error == -EOPNOTSUPP)
373 error = 0;
374
375 return error;
376}
377EXPORT_SYMBOL(blkdev_fsync);
378
379
380
381
382
383static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
384static struct kmem_cache * bdev_cachep __read_mostly;
385
386static struct inode *bdev_alloc_inode(struct super_block *sb)
387{
388 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
389 if (!ei)
390 return NULL;
391 return &ei->vfs_inode;
392}
393
394static void bdev_i_callback(struct rcu_head *head)
395{
396 struct inode *inode = container_of(head, struct inode, i_rcu);
397 struct bdev_inode *bdi = BDEV_I(inode);
398
399 kmem_cache_free(bdev_cachep, bdi);
400}
401
402static void bdev_destroy_inode(struct inode *inode)
403{
404 call_rcu(&inode->i_rcu, bdev_i_callback);
405}
406
407static void init_once(void *foo)
408{
409 struct bdev_inode *ei = (struct bdev_inode *) foo;
410 struct block_device *bdev = &ei->bdev;
411
412 memset(bdev, 0, sizeof(*bdev));
413 mutex_init(&bdev->bd_mutex);
414 INIT_LIST_HEAD(&bdev->bd_inodes);
415 INIT_LIST_HEAD(&bdev->bd_list);
416#ifdef CONFIG_SYSFS
417 INIT_LIST_HEAD(&bdev->bd_holder_disks);
418#endif
419 inode_init_once(&ei->vfs_inode);
420
421 mutex_init(&bdev->bd_fsfreeze_mutex);
422}
423
424static inline void __bd_forget(struct inode *inode)
425{
426 list_del_init(&inode->i_devices);
427 inode->i_bdev = NULL;
428 inode->i_mapping = &inode->i_data;
429}
430
431static void bdev_evict_inode(struct inode *inode)
432{
433 struct block_device *bdev = &BDEV_I(inode)->bdev;
434 struct list_head *p;
435 truncate_inode_pages(&inode->i_data, 0);
436 invalidate_inode_buffers(inode);
437 clear_inode(inode);
438 spin_lock(&bdev_lock);
439 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
440 __bd_forget(list_entry(p, struct inode, i_devices));
441 }
442 list_del_init(&bdev->bd_list);
443 spin_unlock(&bdev_lock);
444}
445
446static const struct super_operations bdev_sops = {
447 .statfs = simple_statfs,
448 .alloc_inode = bdev_alloc_inode,
449 .destroy_inode = bdev_destroy_inode,
450 .drop_inode = generic_delete_inode,
451 .evict_inode = bdev_evict_inode,
452};
453
454static struct dentry *bd_mount(struct file_system_type *fs_type,
455 int flags, const char *dev_name, void *data)
456{
457 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
458}
459
460static struct file_system_type bd_type = {
461 .name = "bdev",
462 .mount = bd_mount,
463 .kill_sb = kill_anon_super,
464};
465
466static struct super_block *blockdev_superblock __read_mostly;
467
468void __init bdev_cache_init(void)
469{
470 int err;
471 static struct vfsmount *bd_mnt;
472
473 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
474 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
475 SLAB_MEM_SPREAD|SLAB_PANIC),
476 init_once);
477 err = register_filesystem(&bd_type);
478 if (err)
479 panic("Cannot register bdev pseudo-fs");
480 bd_mnt = kern_mount(&bd_type);
481 if (IS_ERR(bd_mnt))
482 panic("Cannot create bdev pseudo-fs");
483 blockdev_superblock = bd_mnt->mnt_sb;
484}
485
486
487
488
489
490
491static inline unsigned long hash(dev_t dev)
492{
493 return MAJOR(dev)+MINOR(dev);
494}
495
496static int bdev_test(struct inode *inode, void *data)
497{
498 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
499}
500
501static int bdev_set(struct inode *inode, void *data)
502{
503 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
504 return 0;
505}
506
507static LIST_HEAD(all_bdevs);
508
509struct block_device *bdget(dev_t dev)
510{
511 struct block_device *bdev;
512 struct inode *inode;
513
514 inode = iget5_locked(blockdev_superblock, hash(dev),
515 bdev_test, bdev_set, &dev);
516
517 if (!inode)
518 return NULL;
519
520 bdev = &BDEV_I(inode)->bdev;
521
522 if (inode->i_state & I_NEW) {
523 bdev->bd_contains = NULL;
524 bdev->bd_super = NULL;
525 bdev->bd_inode = inode;
526 bdev->bd_block_size = (1 << inode->i_blkbits);
527 bdev->bd_part_count = 0;
528 bdev->bd_invalidated = 0;
529 inode->i_mode = S_IFBLK;
530 inode->i_rdev = dev;
531 inode->i_bdev = bdev;
532 inode->i_data.a_ops = &def_blk_aops;
533 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
534 inode->i_data.backing_dev_info = &default_backing_dev_info;
535 spin_lock(&bdev_lock);
536 list_add(&bdev->bd_list, &all_bdevs);
537 spin_unlock(&bdev_lock);
538 unlock_new_inode(inode);
539 }
540 return bdev;
541}
542
543EXPORT_SYMBOL(bdget);
544
545
546
547
548
549struct block_device *bdgrab(struct block_device *bdev)
550{
551 ihold(bdev->bd_inode);
552 return bdev;
553}
554
555long nr_blockdev_pages(void)
556{
557 struct block_device *bdev;
558 long ret = 0;
559 spin_lock(&bdev_lock);
560 list_for_each_entry(bdev, &all_bdevs, bd_list) {
561 ret += bdev->bd_inode->i_mapping->nrpages;
562 }
563 spin_unlock(&bdev_lock);
564 return ret;
565}
566
567void bdput(struct block_device *bdev)
568{
569 iput(bdev->bd_inode);
570}
571
572EXPORT_SYMBOL(bdput);
573
574static struct block_device *bd_acquire(struct inode *inode)
575{
576 struct block_device *bdev;
577
578 spin_lock(&bdev_lock);
579 bdev = inode->i_bdev;
580 if (bdev) {
581 ihold(bdev->bd_inode);
582 spin_unlock(&bdev_lock);
583 return bdev;
584 }
585 spin_unlock(&bdev_lock);
586
587 bdev = bdget(inode->i_rdev);
588 if (bdev) {
589 spin_lock(&bdev_lock);
590 if (!inode->i_bdev) {
591
592
593
594
595
596
597 ihold(bdev->bd_inode);
598 inode->i_bdev = bdev;
599 inode->i_mapping = bdev->bd_inode->i_mapping;
600 list_add(&inode->i_devices, &bdev->bd_inodes);
601 }
602 spin_unlock(&bdev_lock);
603 }
604 return bdev;
605}
606
607static inline int sb_is_blkdev_sb(struct super_block *sb)
608{
609 return sb == blockdev_superblock;
610}
611
612
613
614void bd_forget(struct inode *inode)
615{
616 struct block_device *bdev = NULL;
617
618 spin_lock(&bdev_lock);
619 if (inode->i_bdev) {
620 if (!sb_is_blkdev_sb(inode->i_sb))
621 bdev = inode->i_bdev;
622 __bd_forget(inode);
623 }
624 spin_unlock(&bdev_lock);
625
626 if (bdev)
627 iput(bdev->bd_inode);
628}
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
645 void *holder)
646{
647 if (bdev->bd_holder == holder)
648 return true;
649 else if (bdev->bd_holder != NULL)
650 return false;
651 else if (bdev->bd_contains == bdev)
652 return true;
653
654 else if (whole->bd_holder == bd_may_claim)
655 return true;
656 else if (whole->bd_holder != NULL)
657 return false;
658 else
659 return true;
660}
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680static int bd_prepare_to_claim(struct block_device *bdev,
681 struct block_device *whole, void *holder)
682{
683retry:
684
685 if (!bd_may_claim(bdev, whole, holder))
686 return -EBUSY;
687
688
689 if (whole->bd_claiming) {
690 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
691 DEFINE_WAIT(wait);
692
693 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
694 spin_unlock(&bdev_lock);
695 schedule();
696 finish_wait(wq, &wait);
697 spin_lock(&bdev_lock);
698 goto retry;
699 }
700
701
702 return 0;
703}
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728static struct block_device *bd_start_claiming(struct block_device *bdev,
729 void *holder)
730{
731 struct gendisk *disk;
732 struct block_device *whole;
733 int partno, err;
734
735 might_sleep();
736
737
738
739
740
741 disk = get_gendisk(bdev->bd_dev, &partno);
742 if (!disk)
743 return ERR_PTR(-ENXIO);
744
745
746
747
748
749
750
751
752
753 if (partno)
754 whole = bdget_disk(disk, 0);
755 else
756 whole = bdgrab(bdev);
757
758 module_put(disk->fops->owner);
759 put_disk(disk);
760 if (!whole)
761 return ERR_PTR(-ENOMEM);
762
763
764 spin_lock(&bdev_lock);
765
766 err = bd_prepare_to_claim(bdev, whole, holder);
767 if (err == 0) {
768 whole->bd_claiming = holder;
769 spin_unlock(&bdev_lock);
770 return whole;
771 } else {
772 spin_unlock(&bdev_lock);
773 bdput(whole);
774 return ERR_PTR(err);
775 }
776}
777
778#ifdef CONFIG_SYSFS
779struct bd_holder_disk {
780 struct list_head list;
781 struct gendisk *disk;
782 int refcnt;
783};
784
785static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
786 struct gendisk *disk)
787{
788 struct bd_holder_disk *holder;
789
790 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
791 if (holder->disk == disk)
792 return holder;
793 return NULL;
794}
795
796static int add_symlink(struct kobject *from, struct kobject *to)
797{
798 return sysfs_create_link(from, to, kobject_name(to));
799}
800
801static void del_symlink(struct kobject *from, struct kobject *to)
802{
803 sysfs_remove_link(from, kobject_name(to));
804}
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
835{
836 struct bd_holder_disk *holder;
837 int ret = 0;
838
839 mutex_lock(&bdev->bd_mutex);
840
841 WARN_ON_ONCE(!bdev->bd_holder);
842
843
844 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
845 goto out_unlock;
846
847 holder = bd_find_holder_disk(bdev, disk);
848 if (holder) {
849 holder->refcnt++;
850 goto out_unlock;
851 }
852
853 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
854 if (!holder) {
855 ret = -ENOMEM;
856 goto out_unlock;
857 }
858
859 INIT_LIST_HEAD(&holder->list);
860 holder->disk = disk;
861 holder->refcnt = 1;
862
863 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
864 if (ret)
865 goto out_free;
866
867 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
868 if (ret)
869 goto out_del;
870
871
872
873
874 kobject_get(bdev->bd_part->holder_dir);
875
876 list_add(&holder->list, &bdev->bd_holder_disks);
877 goto out_unlock;
878
879out_del:
880 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
881out_free:
882 kfree(holder);
883out_unlock:
884 mutex_unlock(&bdev->bd_mutex);
885 return ret;
886}
887EXPORT_SYMBOL_GPL(bd_link_disk_holder);
888
889
890
891
892
893
894
895
896
897
898
899void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
900{
901 struct bd_holder_disk *holder;
902
903 mutex_lock(&bdev->bd_mutex);
904
905 holder = bd_find_holder_disk(bdev, disk);
906
907 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
908 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
909 del_symlink(bdev->bd_part->holder_dir,
910 &disk_to_dev(disk)->kobj);
911 kobject_put(bdev->bd_part->holder_dir);
912 list_del_init(&holder->list);
913 kfree(holder);
914 }
915
916 mutex_unlock(&bdev->bd_mutex);
917}
918EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
919#endif
920
921
922
923
924
925
926
927
928
929
930
931static void flush_disk(struct block_device *bdev, bool kill_dirty)
932{
933 if (__invalidate_device(bdev, kill_dirty)) {
934 char name[BDEVNAME_SIZE] = "";
935
936 if (bdev->bd_disk)
937 disk_name(bdev->bd_disk, 0, name);
938 printk(KERN_WARNING "VFS: busy inodes on changed media or "
939 "resized disk %s\n", name);
940 }
941
942 if (!bdev->bd_disk)
943 return;
944 if (disk_part_scan_enabled(bdev->bd_disk))
945 bdev->bd_invalidated = 1;
946}
947
948
949
950
951
952
953
954
955
956void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
957{
958 loff_t disk_size, bdev_size;
959
960 disk_size = (loff_t)get_capacity(disk) << 9;
961 bdev_size = i_size_read(bdev->bd_inode);
962 if (disk_size != bdev_size) {
963 char name[BDEVNAME_SIZE];
964
965 disk_name(disk, 0, name);
966 printk(KERN_INFO
967 "%s: detected capacity change from %lld to %lld\n",
968 name, bdev_size, disk_size);
969 i_size_write(bdev->bd_inode, disk_size);
970 flush_disk(bdev, false);
971 }
972}
973EXPORT_SYMBOL(check_disk_size_change);
974
975
976
977
978
979
980
981
982
983int revalidate_disk(struct gendisk *disk)
984{
985 struct block_device *bdev;
986 int ret = 0;
987
988 if (disk->fops->revalidate_disk)
989 ret = disk->fops->revalidate_disk(disk);
990
991 bdev = bdget_disk(disk, 0);
992 if (!bdev)
993 return ret;
994
995 mutex_lock(&bdev->bd_mutex);
996 check_disk_size_change(disk, bdev);
997 bdev->bd_invalidated = 0;
998 mutex_unlock(&bdev->bd_mutex);
999 bdput(bdev);
1000 return ret;
1001}
1002EXPORT_SYMBOL(revalidate_disk);
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013int check_disk_change(struct block_device *bdev)
1014{
1015 struct gendisk *disk = bdev->bd_disk;
1016 const struct block_device_operations *bdops = disk->fops;
1017 unsigned int events;
1018
1019 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1020 DISK_EVENT_EJECT_REQUEST);
1021 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1022 return 0;
1023
1024 flush_disk(bdev, true);
1025 if (bdops->revalidate_disk)
1026 bdops->revalidate_disk(bdev->bd_disk);
1027 return 1;
1028}
1029
1030EXPORT_SYMBOL(check_disk_change);
1031
1032void bd_set_size(struct block_device *bdev, loff_t size)
1033{
1034 unsigned bsize = bdev_logical_block_size(bdev);
1035
1036 bdev->bd_inode->i_size = size;
1037 while (bsize < PAGE_CACHE_SIZE) {
1038 if (size & bsize)
1039 break;
1040 bsize <<= 1;
1041 }
1042 bdev->bd_block_size = bsize;
1043 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1044}
1045EXPORT_SYMBOL(bd_set_size);
1046
1047static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1048
1049
1050
1051
1052
1053
1054
1055
1056static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1057{
1058 struct gendisk *disk;
1059 struct module *owner;
1060 int ret;
1061 int partno;
1062 int perm = 0;
1063
1064 if (mode & FMODE_READ)
1065 perm |= MAY_READ;
1066 if (mode & FMODE_WRITE)
1067 perm |= MAY_WRITE;
1068
1069
1070
1071 if (!for_part) {
1072 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1073 if (ret != 0) {
1074 bdput(bdev);
1075 return ret;
1076 }
1077 }
1078
1079 restart:
1080
1081 ret = -ENXIO;
1082 disk = get_gendisk(bdev->bd_dev, &partno);
1083 if (!disk)
1084 goto out;
1085 owner = disk->fops->owner;
1086
1087 disk_block_events(disk);
1088 mutex_lock_nested(&bdev->bd_mutex, for_part);
1089 if (!bdev->bd_openers) {
1090 bdev->bd_disk = disk;
1091 bdev->bd_queue = disk->queue;
1092 bdev->bd_contains = bdev;
1093 if (!partno) {
1094 struct backing_dev_info *bdi;
1095
1096 ret = -ENXIO;
1097 bdev->bd_part = disk_get_part(disk, partno);
1098 if (!bdev->bd_part)
1099 goto out_clear;
1100
1101 ret = 0;
1102 if (disk->fops->open) {
1103 ret = disk->fops->open(bdev, mode);
1104 if (ret == -ERESTARTSYS) {
1105
1106
1107
1108
1109 disk_put_part(bdev->bd_part);
1110 bdev->bd_part = NULL;
1111 bdev->bd_disk = NULL;
1112 bdev->bd_queue = NULL;
1113 mutex_unlock(&bdev->bd_mutex);
1114 disk_unblock_events(disk);
1115 put_disk(disk);
1116 module_put(owner);
1117 goto restart;
1118 }
1119 }
1120
1121 if (!ret && !bdev->bd_openers) {
1122 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1123 bdi = blk_get_backing_dev_info(bdev);
1124 if (bdi == NULL)
1125 bdi = &default_backing_dev_info;
1126 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1127 }
1128
1129
1130
1131
1132
1133
1134
1135 if (bdev->bd_invalidated) {
1136 if (!ret)
1137 rescan_partitions(disk, bdev);
1138 else if (ret == -ENOMEDIUM)
1139 invalidate_partitions(disk, bdev);
1140 }
1141 if (ret)
1142 goto out_clear;
1143 } else {
1144 struct block_device *whole;
1145 whole = bdget_disk(disk, 0);
1146 ret = -ENOMEM;
1147 if (!whole)
1148 goto out_clear;
1149 BUG_ON(for_part);
1150 ret = __blkdev_get(whole, mode, 1);
1151 if (ret)
1152 goto out_clear;
1153 bdev->bd_contains = whole;
1154 bdev_inode_switch_bdi(bdev->bd_inode,
1155 whole->bd_inode->i_data.backing_dev_info);
1156 bdev->bd_part = disk_get_part(disk, partno);
1157 if (!(disk->flags & GENHD_FL_UP) ||
1158 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1159 ret = -ENXIO;
1160 goto out_clear;
1161 }
1162 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1163 }
1164 } else {
1165 if (bdev->bd_contains == bdev) {
1166 ret = 0;
1167 if (bdev->bd_disk->fops->open)
1168 ret = bdev->bd_disk->fops->open(bdev, mode);
1169
1170 if (bdev->bd_invalidated) {
1171 if (!ret)
1172 rescan_partitions(bdev->bd_disk, bdev);
1173 else if (ret == -ENOMEDIUM)
1174 invalidate_partitions(bdev->bd_disk, bdev);
1175 }
1176 if (ret)
1177 goto out_unlock_bdev;
1178 }
1179
1180 put_disk(disk);
1181 module_put(owner);
1182 }
1183 bdev->bd_openers++;
1184 if (for_part)
1185 bdev->bd_part_count++;
1186 mutex_unlock(&bdev->bd_mutex);
1187 disk_unblock_events(disk);
1188 return 0;
1189
1190 out_clear:
1191 disk_put_part(bdev->bd_part);
1192 bdev->bd_disk = NULL;
1193 bdev->bd_part = NULL;
1194 bdev->bd_queue = NULL;
1195 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1196 if (bdev != bdev->bd_contains)
1197 __blkdev_put(bdev->bd_contains, mode, 1);
1198 bdev->bd_contains = NULL;
1199 out_unlock_bdev:
1200 mutex_unlock(&bdev->bd_mutex);
1201 disk_unblock_events(disk);
1202 put_disk(disk);
1203 module_put(owner);
1204 out:
1205 bdput(bdev);
1206
1207 return ret;
1208}
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1230{
1231 struct block_device *whole = NULL;
1232 int res;
1233
1234 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1235
1236 if ((mode & FMODE_EXCL) && holder) {
1237 whole = bd_start_claiming(bdev, holder);
1238 if (IS_ERR(whole)) {
1239 bdput(bdev);
1240 return PTR_ERR(whole);
1241 }
1242 }
1243
1244 res = __blkdev_get(bdev, mode, 0);
1245
1246 if (whole) {
1247 struct gendisk *disk = whole->bd_disk;
1248
1249
1250 mutex_lock(&bdev->bd_mutex);
1251 spin_lock(&bdev_lock);
1252
1253 if (!res) {
1254 BUG_ON(!bd_may_claim(bdev, whole, holder));
1255
1256
1257
1258
1259
1260
1261 whole->bd_holders++;
1262 whole->bd_holder = bd_may_claim;
1263 bdev->bd_holders++;
1264 bdev->bd_holder = holder;
1265 }
1266
1267
1268 BUG_ON(whole->bd_claiming != holder);
1269 whole->bd_claiming = NULL;
1270 wake_up_bit(&whole->bd_claiming, 0);
1271
1272 spin_unlock(&bdev_lock);
1273
1274
1275
1276
1277
1278
1279
1280
1281 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1282 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1283 bdev->bd_write_holder = true;
1284 disk_block_events(disk);
1285 }
1286
1287 mutex_unlock(&bdev->bd_mutex);
1288 bdput(whole);
1289 }
1290
1291 return res;
1292}
1293EXPORT_SYMBOL(blkdev_get);
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1313 void *holder)
1314{
1315 struct block_device *bdev;
1316 int err;
1317
1318 bdev = lookup_bdev(path);
1319 if (IS_ERR(bdev))
1320 return bdev;
1321
1322 err = blkdev_get(bdev, mode, holder);
1323 if (err)
1324 return ERR_PTR(err);
1325
1326 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1327 blkdev_put(bdev, mode);
1328 return ERR_PTR(-EACCES);
1329 }
1330
1331 return bdev;
1332}
1333EXPORT_SYMBOL(blkdev_get_by_path);
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1358{
1359 struct block_device *bdev;
1360 int err;
1361
1362 bdev = bdget(dev);
1363 if (!bdev)
1364 return ERR_PTR(-ENOMEM);
1365
1366 err = blkdev_get(bdev, mode, holder);
1367 if (err)
1368 return ERR_PTR(err);
1369
1370 return bdev;
1371}
1372EXPORT_SYMBOL(blkdev_get_by_dev);
1373
1374static int blkdev_open(struct inode * inode, struct file * filp)
1375{
1376 struct block_device *bdev;
1377
1378
1379
1380
1381
1382
1383
1384 filp->f_flags |= O_LARGEFILE;
1385
1386 if (filp->f_flags & O_NDELAY)
1387 filp->f_mode |= FMODE_NDELAY;
1388 if (filp->f_flags & O_EXCL)
1389 filp->f_mode |= FMODE_EXCL;
1390 if ((filp->f_flags & O_ACCMODE) == 3)
1391 filp->f_mode |= FMODE_WRITE_IOCTL;
1392
1393 bdev = bd_acquire(inode);
1394 if (bdev == NULL)
1395 return -ENOMEM;
1396
1397 filp->f_mapping = bdev->bd_inode->i_mapping;
1398
1399 return blkdev_get(bdev, filp->f_mode, filp);
1400}
1401
1402static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1403{
1404 int ret = 0;
1405 struct gendisk *disk = bdev->bd_disk;
1406 struct block_device *victim = NULL;
1407
1408 mutex_lock_nested(&bdev->bd_mutex, for_part);
1409 if (for_part)
1410 bdev->bd_part_count--;
1411
1412 if (!--bdev->bd_openers) {
1413 WARN_ON_ONCE(bdev->bd_holders);
1414 sync_blockdev(bdev);
1415 kill_bdev(bdev);
1416
1417
1418
1419 bdev_inode_switch_bdi(bdev->bd_inode,
1420 &default_backing_dev_info);
1421 }
1422 if (bdev->bd_contains == bdev) {
1423 if (disk->fops->release)
1424 ret = disk->fops->release(disk, mode);
1425 }
1426 if (!bdev->bd_openers) {
1427 struct module *owner = disk->fops->owner;
1428
1429 disk_put_part(bdev->bd_part);
1430 bdev->bd_part = NULL;
1431 bdev->bd_disk = NULL;
1432 if (bdev != bdev->bd_contains)
1433 victim = bdev->bd_contains;
1434 bdev->bd_contains = NULL;
1435
1436 put_disk(disk);
1437 module_put(owner);
1438 }
1439 mutex_unlock(&bdev->bd_mutex);
1440 bdput(bdev);
1441 if (victim)
1442 __blkdev_put(victim, mode, 1);
1443 return ret;
1444}
1445
1446int blkdev_put(struct block_device *bdev, fmode_t mode)
1447{
1448 mutex_lock(&bdev->bd_mutex);
1449
1450 if (mode & FMODE_EXCL) {
1451 bool bdev_free;
1452
1453
1454
1455
1456
1457
1458 spin_lock(&bdev_lock);
1459
1460 WARN_ON_ONCE(--bdev->bd_holders < 0);
1461 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1462
1463
1464 if ((bdev_free = !bdev->bd_holders))
1465 bdev->bd_holder = NULL;
1466 if (!bdev->bd_contains->bd_holders)
1467 bdev->bd_contains->bd_holder = NULL;
1468
1469 spin_unlock(&bdev_lock);
1470
1471
1472
1473
1474
1475 if (bdev_free && bdev->bd_write_holder) {
1476 disk_unblock_events(bdev->bd_disk);
1477 bdev->bd_write_holder = false;
1478 }
1479 }
1480
1481
1482
1483
1484
1485
1486 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1487
1488 mutex_unlock(&bdev->bd_mutex);
1489
1490 return __blkdev_put(bdev, mode, 0);
1491}
1492EXPORT_SYMBOL(blkdev_put);
1493
1494static int blkdev_close(struct inode * inode, struct file * filp)
1495{
1496 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1497
1498 return blkdev_put(bdev, filp->f_mode);
1499}
1500
1501static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1502{
1503 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1504 fmode_t mode = file->f_mode;
1505
1506
1507
1508
1509
1510 if (file->f_flags & O_NDELAY)
1511 mode |= FMODE_NDELAY;
1512 else
1513 mode &= ~FMODE_NDELAY;
1514
1515 return blkdev_ioctl(bdev, mode, cmd, arg);
1516}
1517
1518
1519
1520
1521
1522
1523
1524
1525ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1526 unsigned long nr_segs, loff_t pos)
1527{
1528 struct file *file = iocb->ki_filp;
1529 struct blk_plug plug;
1530 ssize_t ret;
1531
1532 BUG_ON(iocb->ki_pos != pos);
1533
1534 blk_start_plug(&plug);
1535 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1536 if (ret > 0 || ret == -EIOCBQUEUED) {
1537 ssize_t err;
1538
1539 err = generic_write_sync(file, pos, ret);
1540 if (err < 0 && ret > 0)
1541 ret = err;
1542 }
1543 blk_finish_plug(&plug);
1544 return ret;
1545}
1546EXPORT_SYMBOL_GPL(blkdev_aio_write);
1547
1548static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
1549 unsigned long nr_segs, loff_t pos)
1550{
1551 struct file *file = iocb->ki_filp;
1552 struct inode *bd_inode = file->f_mapping->host;
1553 loff_t size = i_size_read(bd_inode);
1554
1555 if (pos >= size)
1556 return 0;
1557
1558 size -= pos;
1559 if (size < INT_MAX)
1560 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size);
1561 return generic_file_aio_read(iocb, iov, nr_segs, pos);
1562}
1563
1564
1565
1566
1567
1568static int blkdev_releasepage(struct page *page, gfp_t wait)
1569{
1570 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1571
1572 if (super && super->s_op->bdev_try_to_free_page)
1573 return super->s_op->bdev_try_to_free_page(super, page, wait);
1574
1575 return try_to_free_buffers(page);
1576}
1577
1578static const struct address_space_operations def_blk_aops = {
1579 .readpage = blkdev_readpage,
1580 .writepage = blkdev_writepage,
1581 .write_begin = blkdev_write_begin,
1582 .write_end = blkdev_write_end,
1583 .writepages = generic_writepages,
1584 .releasepage = blkdev_releasepage,
1585 .direct_IO = blkdev_direct_IO,
1586};
1587
1588const struct file_operations def_blk_fops = {
1589 .open = blkdev_open,
1590 .release = blkdev_close,
1591 .llseek = block_llseek,
1592 .read = do_sync_read,
1593 .write = do_sync_write,
1594 .aio_read = blkdev_aio_read,
1595 .aio_write = blkdev_aio_write,
1596 .mmap = generic_file_mmap,
1597 .fsync = blkdev_fsync,
1598 .unlocked_ioctl = block_ioctl,
1599#ifdef CONFIG_COMPAT
1600 .compat_ioctl = compat_blkdev_ioctl,
1601#endif
1602 .splice_read = generic_file_splice_read,
1603 .splice_write = generic_file_splice_write,
1604};
1605
1606int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1607{
1608 int res;
1609 mm_segment_t old_fs = get_fs();
1610 set_fs(KERNEL_DS);
1611 res = blkdev_ioctl(bdev, 0, cmd, arg);
1612 set_fs(old_fs);
1613 return res;
1614}
1615
1616EXPORT_SYMBOL(ioctl_by_bdev);
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626struct block_device *lookup_bdev(const char *pathname)
1627{
1628 struct block_device *bdev;
1629 struct inode *inode;
1630 struct path path;
1631 int error;
1632
1633 if (!pathname || !*pathname)
1634 return ERR_PTR(-EINVAL);
1635
1636 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1637 if (error)
1638 return ERR_PTR(error);
1639
1640 inode = path.dentry->d_inode;
1641 error = -ENOTBLK;
1642 if (!S_ISBLK(inode->i_mode))
1643 goto fail;
1644 error = -EACCES;
1645 if (path.mnt->mnt_flags & MNT_NODEV)
1646 goto fail;
1647 error = -ENOMEM;
1648 bdev = bd_acquire(inode);
1649 if (!bdev)
1650 goto fail;
1651out:
1652 path_put(&path);
1653 return bdev;
1654fail:
1655 bdev = ERR_PTR(error);
1656 goto out;
1657}
1658EXPORT_SYMBOL(lookup_bdev);
1659
1660int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1661{
1662 struct super_block *sb = get_super(bdev);
1663 int res = 0;
1664
1665 if (sb) {
1666
1667
1668
1669
1670
1671
1672 shrink_dcache_sb(sb);
1673 res = invalidate_inodes(sb, kill_dirty);
1674 drop_super(sb);
1675 }
1676 invalidate_bdev(bdev);
1677 return res;
1678}
1679EXPORT_SYMBOL(__invalidate_device);
1680
1681void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
1682{
1683 struct inode *inode, *old_inode = NULL;
1684
1685 spin_lock(&inode_sb_list_lock);
1686 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
1687 struct address_space *mapping = inode->i_mapping;
1688
1689 spin_lock(&inode->i_lock);
1690 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
1691 mapping->nrpages == 0) {
1692 spin_unlock(&inode->i_lock);
1693 continue;
1694 }
1695 __iget(inode);
1696 spin_unlock(&inode->i_lock);
1697 spin_unlock(&inode_sb_list_lock);
1698
1699
1700
1701
1702
1703
1704
1705
1706 iput(old_inode);
1707 old_inode = inode;
1708
1709 func(I_BDEV(inode), arg);
1710
1711 spin_lock(&inode_sb_list_lock);
1712 }
1713 spin_unlock(&inode_sb_list_lock);
1714 iput(old_inode);
1715}
1716