1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/blkpg.h>
19#include <linux/magic.h>
20#include <linux/buffer_head.h>
21#include <linux/swap.h>
22#include <linux/pagevec.h>
23#include <linux/writeback.h>
24#include <linux/mpage.h>
25#include <linux/mount.h>
26#include <linux/uio.h>
27#include <linux/namei.h>
28#include <linux/log2.h>
29#include <linux/cleancache.h>
30#include <asm/uaccess.h>
31#include "internal.h"
32
33struct bdev_inode {
34 struct block_device bdev;
35 struct inode vfs_inode;
36};
37
38static const struct address_space_operations def_blk_aops;
39
40static inline struct bdev_inode *BDEV_I(struct inode *inode)
41{
42 return container_of(inode, struct bdev_inode, vfs_inode);
43}
44
45inline struct block_device *I_BDEV(struct inode *inode)
46{
47 return &BDEV_I(inode)->bdev;
48}
49EXPORT_SYMBOL(I_BDEV);
50
51
52
53
54
55
56static void bdev_inode_switch_bdi(struct inode *inode,
57 struct backing_dev_info *dst)
58{
59 struct backing_dev_info *old = inode->i_data.backing_dev_info;
60
61 if (unlikely(dst == old))
62 return;
63 bdi_lock_two(&old->wb, &dst->wb);
64 spin_lock(&inode->i_lock);
65 inode->i_data.backing_dev_info = dst;
66 if (inode->i_state & I_DIRTY)
67 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
68 spin_unlock(&inode->i_lock);
69 spin_unlock(&old->wb.list_lock);
70 spin_unlock(&dst->wb.list_lock);
71}
72
73
74void kill_bdev(struct block_device *bdev)
75{
76 struct address_space *mapping = bdev->bd_inode->i_mapping;
77
78 if (mapping->nrpages == 0)
79 return;
80
81 invalidate_bh_lrus();
82 truncate_inode_pages(mapping, 0);
83}
84EXPORT_SYMBOL(kill_bdev);
85
86
87void invalidate_bdev(struct block_device *bdev)
88{
89 struct address_space *mapping = bdev->bd_inode->i_mapping;
90
91 if (mapping->nrpages == 0)
92 return;
93
94 invalidate_bh_lrus();
95 lru_add_drain_all();
96 invalidate_mapping_pages(mapping, 0, -1);
97
98
99
100 cleancache_invalidate_inode(mapping);
101}
102EXPORT_SYMBOL(invalidate_bdev);
103
104int set_blocksize(struct block_device *bdev, int size)
105{
106
107 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
108 return -EINVAL;
109
110
111 if (size < bdev_logical_block_size(bdev))
112 return -EINVAL;
113
114
115 if (bdev->bd_block_size != size) {
116 sync_blockdev(bdev);
117 bdev->bd_block_size = size;
118 bdev->bd_inode->i_blkbits = blksize_bits(size);
119 kill_bdev(bdev);
120 }
121 return 0;
122}
123
124EXPORT_SYMBOL(set_blocksize);
125
126int sb_set_blocksize(struct super_block *sb, int size)
127{
128 if (set_blocksize(sb->s_bdev, size))
129 return 0;
130
131
132 sb->s_blocksize = size;
133 sb->s_blocksize_bits = blksize_bits(size);
134 return sb->s_blocksize;
135}
136
137EXPORT_SYMBOL(sb_set_blocksize);
138
139int sb_min_blocksize(struct super_block *sb, int size)
140{
141 int minsize = bdev_logical_block_size(sb->s_bdev);
142 if (size < minsize)
143 size = minsize;
144 return sb_set_blocksize(sb, size);
145}
146
147EXPORT_SYMBOL(sb_min_blocksize);
148
149static int
150blkdev_get_block(struct inode *inode, sector_t iblock,
151 struct buffer_head *bh, int create)
152{
153 bh->b_bdev = I_BDEV(inode);
154 bh->b_blocknr = iblock;
155 set_buffer_mapped(bh);
156 return 0;
157}
158
159static ssize_t
160blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
161 loff_t offset, unsigned long nr_segs)
162{
163 struct file *file = iocb->ki_filp;
164 struct inode *inode = file->f_mapping->host;
165
166 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
167 nr_segs, blkdev_get_block, NULL, NULL, 0);
168}
169
170int __sync_blockdev(struct block_device *bdev, int wait)
171{
172 if (!bdev)
173 return 0;
174 if (!wait)
175 return filemap_flush(bdev->bd_inode->i_mapping);
176 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
177}
178
179
180
181
182
183int sync_blockdev(struct block_device *bdev)
184{
185 return __sync_blockdev(bdev, 1);
186}
187EXPORT_SYMBOL(sync_blockdev);
188
189
190
191
192
193
194int fsync_bdev(struct block_device *bdev)
195{
196 struct super_block *sb = get_super(bdev);
197 if (sb) {
198 int res = sync_filesystem(sb);
199 drop_super(sb);
200 return res;
201 }
202 return sync_blockdev(bdev);
203}
204EXPORT_SYMBOL(fsync_bdev);
205
206
207
208
209
210
211
212
213
214
215
216
217
218struct super_block *freeze_bdev(struct block_device *bdev)
219{
220 struct super_block *sb;
221 int error = 0;
222
223 mutex_lock(&bdev->bd_fsfreeze_mutex);
224 if (++bdev->bd_fsfreeze_count > 1) {
225
226
227
228
229
230 sb = get_super(bdev);
231 drop_super(sb);
232 mutex_unlock(&bdev->bd_fsfreeze_mutex);
233 return sb;
234 }
235
236 sb = get_active_super(bdev);
237 if (!sb)
238 goto out;
239 error = freeze_super(sb);
240 if (error) {
241 deactivate_super(sb);
242 bdev->bd_fsfreeze_count--;
243 mutex_unlock(&bdev->bd_fsfreeze_mutex);
244 return ERR_PTR(error);
245 }
246 deactivate_super(sb);
247 out:
248 sync_blockdev(bdev);
249 mutex_unlock(&bdev->bd_fsfreeze_mutex);
250 return sb;
251}
252EXPORT_SYMBOL(freeze_bdev);
253
254
255
256
257
258
259
260
261int thaw_bdev(struct block_device *bdev, struct super_block *sb)
262{
263 int error = -EINVAL;
264
265 mutex_lock(&bdev->bd_fsfreeze_mutex);
266 if (!bdev->bd_fsfreeze_count)
267 goto out;
268
269 error = 0;
270 if (--bdev->bd_fsfreeze_count > 0)
271 goto out;
272
273 if (!sb)
274 goto out;
275
276 error = thaw_super(sb);
277 if (error) {
278 bdev->bd_fsfreeze_count++;
279 mutex_unlock(&bdev->bd_fsfreeze_mutex);
280 return error;
281 }
282out:
283 mutex_unlock(&bdev->bd_fsfreeze_mutex);
284 return 0;
285}
286EXPORT_SYMBOL(thaw_bdev);
287
288static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
289{
290 return block_write_full_page(page, blkdev_get_block, wbc);
291}
292
293static int blkdev_readpage(struct file * file, struct page * page)
294{
295 return block_read_full_page(page, blkdev_get_block);
296}
297
298static int blkdev_write_begin(struct file *file, struct address_space *mapping,
299 loff_t pos, unsigned len, unsigned flags,
300 struct page **pagep, void **fsdata)
301{
302 return block_write_begin(mapping, pos, len, flags, pagep,
303 blkdev_get_block);
304}
305
306static int blkdev_write_end(struct file *file, struct address_space *mapping,
307 loff_t pos, unsigned len, unsigned copied,
308 struct page *page, void *fsdata)
309{
310 int ret;
311 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
312
313 unlock_page(page);
314 page_cache_release(page);
315
316 return ret;
317}
318
319
320
321
322
323
324static loff_t block_llseek(struct file *file, loff_t offset, int origin)
325{
326 struct inode *bd_inode = file->f_mapping->host;
327 loff_t size;
328 loff_t retval;
329
330 mutex_lock(&bd_inode->i_mutex);
331 size = i_size_read(bd_inode);
332
333 retval = -EINVAL;
334 switch (origin) {
335 case SEEK_END:
336 offset += size;
337 break;
338 case SEEK_CUR:
339 offset += file->f_pos;
340 case SEEK_SET:
341 break;
342 default:
343 goto out;
344 }
345 if (offset >= 0 && offset <= size) {
346 if (offset != file->f_pos) {
347 file->f_pos = offset;
348 }
349 retval = offset;
350 }
351out:
352 mutex_unlock(&bd_inode->i_mutex);
353 return retval;
354}
355
356int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
357{
358 struct inode *bd_inode = filp->f_mapping->host;
359 struct block_device *bdev = I_BDEV(bd_inode);
360 int error;
361
362 error = filemap_write_and_wait_range(filp->f_mapping, start, end);
363 if (error)
364 return error;
365
366
367
368
369
370
371 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
372 if (error == -EOPNOTSUPP)
373 error = 0;
374
375 return error;
376}
377EXPORT_SYMBOL(blkdev_fsync);
378
379
380
381
382
383static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
384static struct kmem_cache * bdev_cachep __read_mostly;
385
386static struct inode *bdev_alloc_inode(struct super_block *sb)
387{
388 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
389 if (!ei)
390 return NULL;
391 return &ei->vfs_inode;
392}
393
394static void bdev_i_callback(struct rcu_head *head)
395{
396 struct inode *inode = container_of(head, struct inode, i_rcu);
397 struct bdev_inode *bdi = BDEV_I(inode);
398
399 kmem_cache_free(bdev_cachep, bdi);
400}
401
402static void bdev_destroy_inode(struct inode *inode)
403{
404 call_rcu(&inode->i_rcu, bdev_i_callback);
405}
406
407static void init_once(void *foo)
408{
409 struct bdev_inode *ei = (struct bdev_inode *) foo;
410 struct block_device *bdev = &ei->bdev;
411
412 memset(bdev, 0, sizeof(*bdev));
413 mutex_init(&bdev->bd_mutex);
414 INIT_LIST_HEAD(&bdev->bd_inodes);
415 INIT_LIST_HEAD(&bdev->bd_list);
416#ifdef CONFIG_SYSFS
417 INIT_LIST_HEAD(&bdev->bd_holder_disks);
418#endif
419 inode_init_once(&ei->vfs_inode);
420
421 mutex_init(&bdev->bd_fsfreeze_mutex);
422}
423
424static inline void __bd_forget(struct inode *inode)
425{
426 list_del_init(&inode->i_devices);
427 inode->i_bdev = NULL;
428 inode->i_mapping = &inode->i_data;
429}
430
431static void bdev_evict_inode(struct inode *inode)
432{
433 struct block_device *bdev = &BDEV_I(inode)->bdev;
434 struct list_head *p;
435 truncate_inode_pages(&inode->i_data, 0);
436 invalidate_inode_buffers(inode);
437 clear_inode(inode);
438 spin_lock(&bdev_lock);
439 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
440 __bd_forget(list_entry(p, struct inode, i_devices));
441 }
442 list_del_init(&bdev->bd_list);
443 spin_unlock(&bdev_lock);
444}
445
446static const struct super_operations bdev_sops = {
447 .statfs = simple_statfs,
448 .alloc_inode = bdev_alloc_inode,
449 .destroy_inode = bdev_destroy_inode,
450 .drop_inode = generic_delete_inode,
451 .evict_inode = bdev_evict_inode,
452};
453
454static struct dentry *bd_mount(struct file_system_type *fs_type,
455 int flags, const char *dev_name, void *data)
456{
457 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
458}
459
460static struct file_system_type bd_type = {
461 .name = "bdev",
462 .mount = bd_mount,
463 .kill_sb = kill_anon_super,
464};
465
466static struct super_block *blockdev_superblock __read_mostly;
467
468void __init bdev_cache_init(void)
469{
470 int err;
471 static struct vfsmount *bd_mnt;
472
473 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
474 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
475 SLAB_MEM_SPREAD|SLAB_PANIC),
476 init_once);
477 err = register_filesystem(&bd_type);
478 if (err)
479 panic("Cannot register bdev pseudo-fs");
480 bd_mnt = kern_mount(&bd_type);
481 if (IS_ERR(bd_mnt))
482 panic("Cannot create bdev pseudo-fs");
483 blockdev_superblock = bd_mnt->mnt_sb;
484}
485
486
487
488
489
490
491static inline unsigned long hash(dev_t dev)
492{
493 return MAJOR(dev)+MINOR(dev);
494}
495
496static int bdev_test(struct inode *inode, void *data)
497{
498 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
499}
500
501static int bdev_set(struct inode *inode, void *data)
502{
503 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
504 return 0;
505}
506
507static LIST_HEAD(all_bdevs);
508
509struct block_device *bdget(dev_t dev)
510{
511 struct block_device *bdev;
512 struct inode *inode;
513
514 inode = iget5_locked(blockdev_superblock, hash(dev),
515 bdev_test, bdev_set, &dev);
516
517 if (!inode)
518 return NULL;
519
520 bdev = &BDEV_I(inode)->bdev;
521
522 if (inode->i_state & I_NEW) {
523 bdev->bd_contains = NULL;
524 bdev->bd_super = NULL;
525 bdev->bd_inode = inode;
526 bdev->bd_block_size = (1 << inode->i_blkbits);
527 bdev->bd_part_count = 0;
528 bdev->bd_invalidated = 0;
529 inode->i_mode = S_IFBLK;
530 inode->i_rdev = dev;
531 inode->i_bdev = bdev;
532 inode->i_data.a_ops = &def_blk_aops;
533 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
534 inode->i_data.backing_dev_info = &default_backing_dev_info;
535 spin_lock(&bdev_lock);
536 list_add(&bdev->bd_list, &all_bdevs);
537 spin_unlock(&bdev_lock);
538 unlock_new_inode(inode);
539 }
540 return bdev;
541}
542
543EXPORT_SYMBOL(bdget);
544
545
546
547
548
549struct block_device *bdgrab(struct block_device *bdev)
550{
551 ihold(bdev->bd_inode);
552 return bdev;
553}
554
555long nr_blockdev_pages(void)
556{
557 struct block_device *bdev;
558 long ret = 0;
559 spin_lock(&bdev_lock);
560 list_for_each_entry(bdev, &all_bdevs, bd_list) {
561 ret += bdev->bd_inode->i_mapping->nrpages;
562 }
563 spin_unlock(&bdev_lock);
564 return ret;
565}
566
567void bdput(struct block_device *bdev)
568{
569 iput(bdev->bd_inode);
570}
571
572EXPORT_SYMBOL(bdput);
573
574static struct block_device *bd_acquire(struct inode *inode)
575{
576 struct block_device *bdev;
577
578 spin_lock(&bdev_lock);
579 bdev = inode->i_bdev;
580 if (bdev) {
581 ihold(bdev->bd_inode);
582 spin_unlock(&bdev_lock);
583 return bdev;
584 }
585 spin_unlock(&bdev_lock);
586
587 bdev = bdget(inode->i_rdev);
588 if (bdev) {
589 spin_lock(&bdev_lock);
590 if (!inode->i_bdev) {
591
592
593
594
595
596
597 ihold(bdev->bd_inode);
598 inode->i_bdev = bdev;
599 inode->i_mapping = bdev->bd_inode->i_mapping;
600 list_add(&inode->i_devices, &bdev->bd_inodes);
601 }
602 spin_unlock(&bdev_lock);
603 }
604 return bdev;
605}
606
607static inline int sb_is_blkdev_sb(struct super_block *sb)
608{
609 return sb == blockdev_superblock;
610}
611
612
613
614void bd_forget(struct inode *inode)
615{
616 struct block_device *bdev = NULL;
617
618 spin_lock(&bdev_lock);
619 if (inode->i_bdev) {
620 if (!sb_is_blkdev_sb(inode->i_sb))
621 bdev = inode->i_bdev;
622 __bd_forget(inode);
623 }
624 spin_unlock(&bdev_lock);
625
626 if (bdev)
627 iput(bdev->bd_inode);
628}
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
645 void *holder)
646{
647 if (bdev->bd_holder == holder)
648 return true;
649 else if (bdev->bd_holder != NULL)
650 return false;
651 else if (bdev->bd_contains == bdev)
652 return true;
653
654 else if (whole->bd_holder == bd_may_claim)
655 return true;
656 else if (whole->bd_holder != NULL)
657 return false;
658 else
659 return true;
660}
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680static int bd_prepare_to_claim(struct block_device *bdev,
681 struct block_device *whole, void *holder)
682{
683retry:
684
685 if (!bd_may_claim(bdev, whole, holder))
686 return -EBUSY;
687
688
689 if (whole->bd_claiming) {
690 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
691 DEFINE_WAIT(wait);
692
693 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
694 spin_unlock(&bdev_lock);
695 schedule();
696 finish_wait(wq, &wait);
697 spin_lock(&bdev_lock);
698 goto retry;
699 }
700
701
702 return 0;
703}
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728static struct block_device *bd_start_claiming(struct block_device *bdev,
729 void *holder)
730{
731 struct gendisk *disk;
732 struct block_device *whole;
733 int partno, err;
734
735 might_sleep();
736
737
738
739
740
741 disk = get_gendisk(bdev->bd_dev, &partno);
742 if (!disk)
743 return ERR_PTR(-ENXIO);
744
745
746
747
748
749
750
751
752
753 if (partno)
754 whole = bdget_disk(disk, 0);
755 else
756 whole = bdgrab(bdev);
757
758 module_put(disk->fops->owner);
759 put_disk(disk);
760 if (!whole)
761 return ERR_PTR(-ENOMEM);
762
763
764 spin_lock(&bdev_lock);
765
766 err = bd_prepare_to_claim(bdev, whole, holder);
767 if (err == 0) {
768 whole->bd_claiming = holder;
769 spin_unlock(&bdev_lock);
770 return whole;
771 } else {
772 spin_unlock(&bdev_lock);
773 bdput(whole);
774 return ERR_PTR(err);
775 }
776}
777
778#ifdef CONFIG_SYSFS
779struct bd_holder_disk {
780 struct list_head list;
781 struct gendisk *disk;
782 int refcnt;
783};
784
785static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
786 struct gendisk *disk)
787{
788 struct bd_holder_disk *holder;
789
790 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
791 if (holder->disk == disk)
792 return holder;
793 return NULL;
794}
795
796static int add_symlink(struct kobject *from, struct kobject *to)
797{
798 return sysfs_create_link(from, to, kobject_name(to));
799}
800
801static void del_symlink(struct kobject *from, struct kobject *to)
802{
803 sysfs_remove_link(from, kobject_name(to));
804}
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
835{
836 struct bd_holder_disk *holder;
837 int ret = 0;
838
839 mutex_lock(&bdev->bd_mutex);
840
841 WARN_ON_ONCE(!bdev->bd_holder);
842
843
844 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
845 goto out_unlock;
846
847 holder = bd_find_holder_disk(bdev, disk);
848 if (holder) {
849 holder->refcnt++;
850 goto out_unlock;
851 }
852
853 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
854 if (!holder) {
855 ret = -ENOMEM;
856 goto out_unlock;
857 }
858
859 INIT_LIST_HEAD(&holder->list);
860 holder->disk = disk;
861 holder->refcnt = 1;
862
863 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
864 if (ret)
865 goto out_free;
866
867 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
868 if (ret)
869 goto out_del;
870
871
872
873
874 kobject_get(bdev->bd_part->holder_dir);
875
876 list_add(&holder->list, &bdev->bd_holder_disks);
877 goto out_unlock;
878
879out_del:
880 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
881out_free:
882 kfree(holder);
883out_unlock:
884 mutex_unlock(&bdev->bd_mutex);
885 return ret;
886}
887EXPORT_SYMBOL_GPL(bd_link_disk_holder);
888
889
890
891
892
893
894
895
896
897
898
899void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
900{
901 struct bd_holder_disk *holder;
902
903 mutex_lock(&bdev->bd_mutex);
904
905 holder = bd_find_holder_disk(bdev, disk);
906
907 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
908 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
909 del_symlink(bdev->bd_part->holder_dir,
910 &disk_to_dev(disk)->kobj);
911 kobject_put(bdev->bd_part->holder_dir);
912 list_del_init(&holder->list);
913 kfree(holder);
914 }
915
916 mutex_unlock(&bdev->bd_mutex);
917}
918EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
919#endif
920
921
922
923
924
925
926
927
928
929
930
931static void flush_disk(struct block_device *bdev, bool kill_dirty)
932{
933 if (__invalidate_device(bdev, kill_dirty)) {
934 char name[BDEVNAME_SIZE] = "";
935
936 if (bdev->bd_disk)
937 disk_name(bdev->bd_disk, 0, name);
938 printk(KERN_WARNING "VFS: busy inodes on changed media or "
939 "resized disk %s\n", name);
940 }
941
942 if (!bdev->bd_disk)
943 return;
944 if (disk_part_scan_enabled(bdev->bd_disk))
945 bdev->bd_invalidated = 1;
946}
947
948
949
950
951
952
953
954
955
956void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
957{
958 loff_t disk_size, bdev_size;
959
960 disk_size = (loff_t)get_capacity(disk) << 9;
961 bdev_size = i_size_read(bdev->bd_inode);
962 if (disk_size != bdev_size) {
963 char name[BDEVNAME_SIZE];
964
965 disk_name(disk, 0, name);
966 printk(KERN_INFO
967 "%s: detected capacity change from %lld to %lld\n",
968 name, bdev_size, disk_size);
969 i_size_write(bdev->bd_inode, disk_size);
970 flush_disk(bdev, false);
971 }
972}
973EXPORT_SYMBOL(check_disk_size_change);
974
975
976
977
978
979
980
981
982
983int revalidate_disk(struct gendisk *disk)
984{
985 struct block_device *bdev;
986 int ret = 0;
987
988 if (disk->fops->revalidate_disk)
989 ret = disk->fops->revalidate_disk(disk);
990
991 bdev = bdget_disk(disk, 0);
992 if (!bdev)
993 return ret;
994
995 mutex_lock(&bdev->bd_mutex);
996 check_disk_size_change(disk, bdev);
997 mutex_unlock(&bdev->bd_mutex);
998 bdput(bdev);
999 return ret;
1000}
1001EXPORT_SYMBOL(revalidate_disk);
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012int check_disk_change(struct block_device *bdev)
1013{
1014 struct gendisk *disk = bdev->bd_disk;
1015 const struct block_device_operations *bdops = disk->fops;
1016 unsigned int events;
1017
1018 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1019 DISK_EVENT_EJECT_REQUEST);
1020 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1021 return 0;
1022
1023 flush_disk(bdev, true);
1024 if (bdops->revalidate_disk)
1025 bdops->revalidate_disk(bdev->bd_disk);
1026 return 1;
1027}
1028
1029EXPORT_SYMBOL(check_disk_change);
1030
1031void bd_set_size(struct block_device *bdev, loff_t size)
1032{
1033 unsigned bsize = bdev_logical_block_size(bdev);
1034
1035 bdev->bd_inode->i_size = size;
1036 while (bsize < PAGE_CACHE_SIZE) {
1037 if (size & bsize)
1038 break;
1039 bsize <<= 1;
1040 }
1041 bdev->bd_block_size = bsize;
1042 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1043}
1044EXPORT_SYMBOL(bd_set_size);
1045
1046static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1047
1048
1049
1050
1051
1052
1053
1054
1055static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1056{
1057 struct gendisk *disk;
1058 struct module *owner;
1059 int ret;
1060 int partno;
1061 int perm = 0;
1062
1063 if (mode & FMODE_READ)
1064 perm |= MAY_READ;
1065 if (mode & FMODE_WRITE)
1066 perm |= MAY_WRITE;
1067
1068
1069
1070 if (!for_part) {
1071 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1072 if (ret != 0) {
1073 bdput(bdev);
1074 return ret;
1075 }
1076 }
1077
1078 restart:
1079
1080 ret = -ENXIO;
1081 disk = get_gendisk(bdev->bd_dev, &partno);
1082 if (!disk)
1083 goto out;
1084 owner = disk->fops->owner;
1085
1086 disk_block_events(disk);
1087 mutex_lock_nested(&bdev->bd_mutex, for_part);
1088 if (!bdev->bd_openers) {
1089 bdev->bd_disk = disk;
1090 bdev->bd_queue = disk->queue;
1091 bdev->bd_contains = bdev;
1092 if (!partno) {
1093 struct backing_dev_info *bdi;
1094
1095 ret = -ENXIO;
1096 bdev->bd_part = disk_get_part(disk, partno);
1097 if (!bdev->bd_part)
1098 goto out_clear;
1099
1100 ret = 0;
1101 if (disk->fops->open) {
1102 ret = disk->fops->open(bdev, mode);
1103 if (ret == -ERESTARTSYS) {
1104
1105
1106
1107
1108 disk_put_part(bdev->bd_part);
1109 bdev->bd_part = NULL;
1110 bdev->bd_disk = NULL;
1111 bdev->bd_queue = NULL;
1112 mutex_unlock(&bdev->bd_mutex);
1113 disk_unblock_events(disk);
1114 put_disk(disk);
1115 module_put(owner);
1116 goto restart;
1117 }
1118 }
1119
1120 if (!ret && !bdev->bd_openers) {
1121 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1122 bdi = blk_get_backing_dev_info(bdev);
1123 if (bdi == NULL)
1124 bdi = &default_backing_dev_info;
1125 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1126 }
1127
1128
1129
1130
1131
1132
1133
1134 if (bdev->bd_invalidated) {
1135 if (!ret)
1136 rescan_partitions(disk, bdev);
1137 else if (ret == -ENOMEDIUM)
1138 invalidate_partitions(disk, bdev);
1139 }
1140 if (ret)
1141 goto out_clear;
1142 } else {
1143 struct block_device *whole;
1144 whole = bdget_disk(disk, 0);
1145 ret = -ENOMEM;
1146 if (!whole)
1147 goto out_clear;
1148 BUG_ON(for_part);
1149 ret = __blkdev_get(whole, mode, 1);
1150 if (ret)
1151 goto out_clear;
1152 bdev->bd_contains = whole;
1153 bdev_inode_switch_bdi(bdev->bd_inode,
1154 whole->bd_inode->i_data.backing_dev_info);
1155 bdev->bd_part = disk_get_part(disk, partno);
1156 if (!(disk->flags & GENHD_FL_UP) ||
1157 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1158 ret = -ENXIO;
1159 goto out_clear;
1160 }
1161 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1162 }
1163 } else {
1164 if (bdev->bd_contains == bdev) {
1165 ret = 0;
1166 if (bdev->bd_disk->fops->open)
1167 ret = bdev->bd_disk->fops->open(bdev, mode);
1168
1169 if (bdev->bd_invalidated) {
1170 if (!ret)
1171 rescan_partitions(bdev->bd_disk, bdev);
1172 else if (ret == -ENOMEDIUM)
1173 invalidate_partitions(bdev->bd_disk, bdev);
1174 }
1175 if (ret)
1176 goto out_unlock_bdev;
1177 }
1178
1179 put_disk(disk);
1180 module_put(owner);
1181 }
1182 bdev->bd_openers++;
1183 if (for_part)
1184 bdev->bd_part_count++;
1185 mutex_unlock(&bdev->bd_mutex);
1186 disk_unblock_events(disk);
1187 return 0;
1188
1189 out_clear:
1190 disk_put_part(bdev->bd_part);
1191 bdev->bd_disk = NULL;
1192 bdev->bd_part = NULL;
1193 bdev->bd_queue = NULL;
1194 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1195 if (bdev != bdev->bd_contains)
1196 __blkdev_put(bdev->bd_contains, mode, 1);
1197 bdev->bd_contains = NULL;
1198 out_unlock_bdev:
1199 mutex_unlock(&bdev->bd_mutex);
1200 disk_unblock_events(disk);
1201 put_disk(disk);
1202 module_put(owner);
1203 out:
1204 bdput(bdev);
1205
1206 return ret;
1207}
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1229{
1230 struct block_device *whole = NULL;
1231 int res;
1232
1233 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1234
1235 if ((mode & FMODE_EXCL) && holder) {
1236 whole = bd_start_claiming(bdev, holder);
1237 if (IS_ERR(whole)) {
1238 bdput(bdev);
1239 return PTR_ERR(whole);
1240 }
1241 }
1242
1243 res = __blkdev_get(bdev, mode, 0);
1244
1245 if (whole) {
1246 struct gendisk *disk = whole->bd_disk;
1247
1248
1249 mutex_lock(&bdev->bd_mutex);
1250 spin_lock(&bdev_lock);
1251
1252 if (!res) {
1253 BUG_ON(!bd_may_claim(bdev, whole, holder));
1254
1255
1256
1257
1258
1259
1260 whole->bd_holders++;
1261 whole->bd_holder = bd_may_claim;
1262 bdev->bd_holders++;
1263 bdev->bd_holder = holder;
1264 }
1265
1266
1267 BUG_ON(whole->bd_claiming != holder);
1268 whole->bd_claiming = NULL;
1269 wake_up_bit(&whole->bd_claiming, 0);
1270
1271 spin_unlock(&bdev_lock);
1272
1273
1274
1275
1276
1277
1278
1279
1280 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1281 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1282 bdev->bd_write_holder = true;
1283 disk_block_events(disk);
1284 }
1285
1286 mutex_unlock(&bdev->bd_mutex);
1287 bdput(whole);
1288 }
1289
1290 return res;
1291}
1292EXPORT_SYMBOL(blkdev_get);
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1312 void *holder)
1313{
1314 struct block_device *bdev;
1315 int err;
1316
1317 bdev = lookup_bdev(path);
1318 if (IS_ERR(bdev))
1319 return bdev;
1320
1321 err = blkdev_get(bdev, mode, holder);
1322 if (err)
1323 return ERR_PTR(err);
1324
1325 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1326 blkdev_put(bdev, mode);
1327 return ERR_PTR(-EACCES);
1328 }
1329
1330 return bdev;
1331}
1332EXPORT_SYMBOL(blkdev_get_by_path);
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1357{
1358 struct block_device *bdev;
1359 int err;
1360
1361 bdev = bdget(dev);
1362 if (!bdev)
1363 return ERR_PTR(-ENOMEM);
1364
1365 err = blkdev_get(bdev, mode, holder);
1366 if (err)
1367 return ERR_PTR(err);
1368
1369 return bdev;
1370}
1371EXPORT_SYMBOL(blkdev_get_by_dev);
1372
1373static int blkdev_open(struct inode * inode, struct file * filp)
1374{
1375 struct block_device *bdev;
1376
1377
1378
1379
1380
1381
1382
1383 filp->f_flags |= O_LARGEFILE;
1384
1385 if (filp->f_flags & O_NDELAY)
1386 filp->f_mode |= FMODE_NDELAY;
1387 if (filp->f_flags & O_EXCL)
1388 filp->f_mode |= FMODE_EXCL;
1389 if ((filp->f_flags & O_ACCMODE) == 3)
1390 filp->f_mode |= FMODE_WRITE_IOCTL;
1391
1392 bdev = bd_acquire(inode);
1393 if (bdev == NULL)
1394 return -ENOMEM;
1395
1396 filp->f_mapping = bdev->bd_inode->i_mapping;
1397
1398 return blkdev_get(bdev, filp->f_mode, filp);
1399}
1400
1401static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1402{
1403 int ret = 0;
1404 struct gendisk *disk = bdev->bd_disk;
1405 struct block_device *victim = NULL;
1406
1407 mutex_lock_nested(&bdev->bd_mutex, for_part);
1408 if (for_part)
1409 bdev->bd_part_count--;
1410
1411 if (!--bdev->bd_openers) {
1412 WARN_ON_ONCE(bdev->bd_holders);
1413 sync_blockdev(bdev);
1414 kill_bdev(bdev);
1415
1416
1417
1418 bdev_inode_switch_bdi(bdev->bd_inode,
1419 &default_backing_dev_info);
1420 }
1421 if (bdev->bd_contains == bdev) {
1422 if (disk->fops->release)
1423 ret = disk->fops->release(disk, mode);
1424 }
1425 if (!bdev->bd_openers) {
1426 struct module *owner = disk->fops->owner;
1427
1428 disk_put_part(bdev->bd_part);
1429 bdev->bd_part = NULL;
1430 bdev->bd_disk = NULL;
1431 if (bdev != bdev->bd_contains)
1432 victim = bdev->bd_contains;
1433 bdev->bd_contains = NULL;
1434
1435 put_disk(disk);
1436 module_put(owner);
1437 }
1438 mutex_unlock(&bdev->bd_mutex);
1439 bdput(bdev);
1440 if (victim)
1441 __blkdev_put(victim, mode, 1);
1442 return ret;
1443}
1444
1445int blkdev_put(struct block_device *bdev, fmode_t mode)
1446{
1447 mutex_lock(&bdev->bd_mutex);
1448
1449 if (mode & FMODE_EXCL) {
1450 bool bdev_free;
1451
1452
1453
1454
1455
1456
1457 spin_lock(&bdev_lock);
1458
1459 WARN_ON_ONCE(--bdev->bd_holders < 0);
1460 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1461
1462
1463 if ((bdev_free = !bdev->bd_holders))
1464 bdev->bd_holder = NULL;
1465 if (!bdev->bd_contains->bd_holders)
1466 bdev->bd_contains->bd_holder = NULL;
1467
1468 spin_unlock(&bdev_lock);
1469
1470
1471
1472
1473
1474 if (bdev_free && bdev->bd_write_holder) {
1475 disk_unblock_events(bdev->bd_disk);
1476 bdev->bd_write_holder = false;
1477 }
1478 }
1479
1480
1481
1482
1483
1484
1485 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1486
1487 mutex_unlock(&bdev->bd_mutex);
1488
1489 return __blkdev_put(bdev, mode, 0);
1490}
1491EXPORT_SYMBOL(blkdev_put);
1492
1493static int blkdev_close(struct inode * inode, struct file * filp)
1494{
1495 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1496
1497 return blkdev_put(bdev, filp->f_mode);
1498}
1499
1500static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1501{
1502 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1503 fmode_t mode = file->f_mode;
1504
1505
1506
1507
1508
1509 if (file->f_flags & O_NDELAY)
1510 mode |= FMODE_NDELAY;
1511 else
1512 mode &= ~FMODE_NDELAY;
1513
1514 return blkdev_ioctl(bdev, mode, cmd, arg);
1515}
1516
1517
1518
1519
1520
1521
1522
1523
1524ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1525 unsigned long nr_segs, loff_t pos)
1526{
1527 struct file *file = iocb->ki_filp;
1528 struct blk_plug plug;
1529 ssize_t ret;
1530
1531 BUG_ON(iocb->ki_pos != pos);
1532
1533 blk_start_plug(&plug);
1534 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1535 if (ret > 0 || ret == -EIOCBQUEUED) {
1536 ssize_t err;
1537
1538 err = generic_write_sync(file, pos, ret);
1539 if (err < 0 && ret > 0)
1540 ret = err;
1541 }
1542 blk_finish_plug(&plug);
1543 return ret;
1544}
1545EXPORT_SYMBOL_GPL(blkdev_aio_write);
1546
1547static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
1548 unsigned long nr_segs, loff_t pos)
1549{
1550 struct file *file = iocb->ki_filp;
1551 struct inode *bd_inode = file->f_mapping->host;
1552 loff_t size = i_size_read(bd_inode);
1553
1554 if (pos >= size)
1555 return 0;
1556
1557 size -= pos;
1558 if (size < INT_MAX)
1559 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size);
1560 return generic_file_aio_read(iocb, iov, nr_segs, pos);
1561}
1562
1563
1564
1565
1566
1567static int blkdev_releasepage(struct page *page, gfp_t wait)
1568{
1569 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1570
1571 if (super && super->s_op->bdev_try_to_free_page)
1572 return super->s_op->bdev_try_to_free_page(super, page, wait);
1573
1574 return try_to_free_buffers(page);
1575}
1576
1577static const struct address_space_operations def_blk_aops = {
1578 .readpage = blkdev_readpage,
1579 .writepage = blkdev_writepage,
1580 .write_begin = blkdev_write_begin,
1581 .write_end = blkdev_write_end,
1582 .writepages = generic_writepages,
1583 .releasepage = blkdev_releasepage,
1584 .direct_IO = blkdev_direct_IO,
1585};
1586
1587const struct file_operations def_blk_fops = {
1588 .open = blkdev_open,
1589 .release = blkdev_close,
1590 .llseek = block_llseek,
1591 .read = do_sync_read,
1592 .write = do_sync_write,
1593 .aio_read = blkdev_aio_read,
1594 .aio_write = blkdev_aio_write,
1595 .mmap = generic_file_mmap,
1596 .fsync = blkdev_fsync,
1597 .unlocked_ioctl = block_ioctl,
1598#ifdef CONFIG_COMPAT
1599 .compat_ioctl = compat_blkdev_ioctl,
1600#endif
1601 .splice_read = generic_file_splice_read,
1602 .splice_write = generic_file_splice_write,
1603};
1604
1605int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1606{
1607 int res;
1608 mm_segment_t old_fs = get_fs();
1609 set_fs(KERNEL_DS);
1610 res = blkdev_ioctl(bdev, 0, cmd, arg);
1611 set_fs(old_fs);
1612 return res;
1613}
1614
1615EXPORT_SYMBOL(ioctl_by_bdev);
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625struct block_device *lookup_bdev(const char *pathname)
1626{
1627 struct block_device *bdev;
1628 struct inode *inode;
1629 struct path path;
1630 int error;
1631
1632 if (!pathname || !*pathname)
1633 return ERR_PTR(-EINVAL);
1634
1635 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1636 if (error)
1637 return ERR_PTR(error);
1638
1639 inode = path.dentry->d_inode;
1640 error = -ENOTBLK;
1641 if (!S_ISBLK(inode->i_mode))
1642 goto fail;
1643 error = -EACCES;
1644 if (path.mnt->mnt_flags & MNT_NODEV)
1645 goto fail;
1646 error = -ENOMEM;
1647 bdev = bd_acquire(inode);
1648 if (!bdev)
1649 goto fail;
1650out:
1651 path_put(&path);
1652 return bdev;
1653fail:
1654 bdev = ERR_PTR(error);
1655 goto out;
1656}
1657EXPORT_SYMBOL(lookup_bdev);
1658
1659int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1660{
1661 struct super_block *sb = get_super(bdev);
1662 int res = 0;
1663
1664 if (sb) {
1665
1666
1667
1668
1669
1670
1671 shrink_dcache_sb(sb);
1672 res = invalidate_inodes(sb, kill_dirty);
1673 drop_super(sb);
1674 }
1675 invalidate_bdev(bdev);
1676 return res;
1677}
1678EXPORT_SYMBOL(__invalidate_device);
1679
1680void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
1681{
1682 struct inode *inode, *old_inode = NULL;
1683
1684 spin_lock(&inode_sb_list_lock);
1685 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
1686 struct address_space *mapping = inode->i_mapping;
1687
1688 spin_lock(&inode->i_lock);
1689 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
1690 mapping->nrpages == 0) {
1691 spin_unlock(&inode->i_lock);
1692 continue;
1693 }
1694 __iget(inode);
1695 spin_unlock(&inode->i_lock);
1696 spin_unlock(&inode_sb_list_lock);
1697
1698
1699
1700
1701
1702
1703
1704
1705 iput(old_inode);
1706 old_inode = inode;
1707
1708 func(I_BDEV(inode), arg);
1709
1710 spin_lock(&inode_sb_list_lock);
1711 }
1712 spin_unlock(&inode_sb_list_lock);
1713 iput(old_inode);
1714}
1715