1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/blkpg.h>
19#include <linux/buffer_head.h>
20#include <linux/pagevec.h>
21#include <linux/writeback.h>
22#include <linux/mpage.h>
23#include <linux/mount.h>
24#include <linux/uio.h>
25#include <linux/namei.h>
26#include <linux/log2.h>
27#include <linux/kmemleak.h>
28#include <asm/uaccess.h>
29#include "internal.h"
30
31struct bdev_inode {
32 struct block_device bdev;
33 struct inode vfs_inode;
34};
35
36static const struct address_space_operations def_blk_aops;
37
38static inline struct bdev_inode *BDEV_I(struct inode *inode)
39{
40 return container_of(inode, struct bdev_inode, vfs_inode);
41}
42
43inline struct block_device *I_BDEV(struct inode *inode)
44{
45 return &BDEV_I(inode)->bdev;
46}
47EXPORT_SYMBOL(I_BDEV);
48
49
50
51
52
53
54static void bdev_inode_switch_bdi(struct inode *inode,
55 struct backing_dev_info *dst)
56{
57 struct backing_dev_info *old = inode->i_data.backing_dev_info;
58
59 if (unlikely(dst == old))
60 return;
61 bdi_lock_two(&old->wb, &dst->wb);
62 spin_lock(&inode->i_lock);
63 inode->i_data.backing_dev_info = dst;
64 if (inode->i_state & I_DIRTY)
65 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
66 spin_unlock(&inode->i_lock);
67 spin_unlock(&old->wb.list_lock);
68 spin_unlock(&dst->wb.list_lock);
69}
70
71static sector_t max_block(struct block_device *bdev)
72{
73 sector_t retval = ~((sector_t)0);
74 loff_t sz = i_size_read(bdev->bd_inode);
75
76 if (sz) {
77 unsigned int size = block_size(bdev);
78 unsigned int sizebits = blksize_bits(size);
79 retval = (sz >> sizebits);
80 }
81 return retval;
82}
83
84
85static void kill_bdev(struct block_device *bdev)
86{
87 if (bdev->bd_inode->i_mapping->nrpages == 0)
88 return;
89 invalidate_bh_lrus();
90 truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
91}
92
93int set_blocksize(struct block_device *bdev, int size)
94{
95
96 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
97 return -EINVAL;
98
99
100 if (size < bdev_logical_block_size(bdev))
101 return -EINVAL;
102
103
104 if (bdev->bd_block_size != size) {
105 sync_blockdev(bdev);
106 bdev->bd_block_size = size;
107 bdev->bd_inode->i_blkbits = blksize_bits(size);
108 kill_bdev(bdev);
109 }
110 return 0;
111}
112
113EXPORT_SYMBOL(set_blocksize);
114
115int sb_set_blocksize(struct super_block *sb, int size)
116{
117 if (set_blocksize(sb->s_bdev, size))
118 return 0;
119
120
121 sb->s_blocksize = size;
122 sb->s_blocksize_bits = blksize_bits(size);
123 return sb->s_blocksize;
124}
125
126EXPORT_SYMBOL(sb_set_blocksize);
127
128int sb_min_blocksize(struct super_block *sb, int size)
129{
130 int minsize = bdev_logical_block_size(sb->s_bdev);
131 if (size < minsize)
132 size = minsize;
133 return sb_set_blocksize(sb, size);
134}
135
136EXPORT_SYMBOL(sb_min_blocksize);
137
138static int
139blkdev_get_block(struct inode *inode, sector_t iblock,
140 struct buffer_head *bh, int create)
141{
142 if (iblock >= max_block(I_BDEV(inode))) {
143 if (create)
144 return -EIO;
145
146
147
148
149
150
151
152 return 0;
153 }
154 bh->b_bdev = I_BDEV(inode);
155 bh->b_blocknr = iblock;
156 set_buffer_mapped(bh);
157 return 0;
158}
159
160static int
161blkdev_get_blocks(struct inode *inode, sector_t iblock,
162 struct buffer_head *bh, int create)
163{
164 sector_t end_block = max_block(I_BDEV(inode));
165 unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
166
167 if ((iblock + max_blocks) > end_block) {
168 max_blocks = end_block - iblock;
169 if ((long)max_blocks <= 0) {
170 if (create)
171 return -EIO;
172
173
174
175
176 max_blocks = 0;
177 }
178 }
179
180 bh->b_bdev = I_BDEV(inode);
181 bh->b_blocknr = iblock;
182 bh->b_size = max_blocks << inode->i_blkbits;
183 if (max_blocks)
184 set_buffer_mapped(bh);
185 return 0;
186}
187
188static ssize_t
189blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
190 loff_t offset, unsigned long nr_segs)
191{
192 struct file *file = iocb->ki_filp;
193 struct inode *inode = file->f_mapping->host;
194
195 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
196 nr_segs, blkdev_get_blocks, NULL, NULL, 0);
197}
198
199int __sync_blockdev(struct block_device *bdev, int wait)
200{
201 if (!bdev)
202 return 0;
203 if (!wait)
204 return filemap_flush(bdev->bd_inode->i_mapping);
205 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
206}
207
208
209
210
211
212int sync_blockdev(struct block_device *bdev)
213{
214 return __sync_blockdev(bdev, 1);
215}
216EXPORT_SYMBOL(sync_blockdev);
217
218
219
220
221
222
223int fsync_bdev(struct block_device *bdev)
224{
225 struct super_block *sb = get_super(bdev);
226 if (sb) {
227 int res = sync_filesystem(sb);
228 drop_super(sb);
229 return res;
230 }
231 return sync_blockdev(bdev);
232}
233EXPORT_SYMBOL(fsync_bdev);
234
235
236
237
238
239
240
241
242
243
244
245
246
247struct super_block *freeze_bdev(struct block_device *bdev)
248{
249 struct super_block *sb;
250 int error = 0;
251
252 mutex_lock(&bdev->bd_fsfreeze_mutex);
253 if (++bdev->bd_fsfreeze_count > 1) {
254
255
256
257
258
259 sb = get_super(bdev);
260 drop_super(sb);
261 mutex_unlock(&bdev->bd_fsfreeze_mutex);
262 return sb;
263 }
264
265 sb = get_active_super(bdev);
266 if (!sb)
267 goto out;
268 error = freeze_super(sb);
269 if (error) {
270 deactivate_super(sb);
271 bdev->bd_fsfreeze_count--;
272 mutex_unlock(&bdev->bd_fsfreeze_mutex);
273 return ERR_PTR(error);
274 }
275 deactivate_super(sb);
276 out:
277 sync_blockdev(bdev);
278 mutex_unlock(&bdev->bd_fsfreeze_mutex);
279 return sb;
280}
281EXPORT_SYMBOL(freeze_bdev);
282
283
284
285
286
287
288
289
290int thaw_bdev(struct block_device *bdev, struct super_block *sb)
291{
292 int error = -EINVAL;
293
294 mutex_lock(&bdev->bd_fsfreeze_mutex);
295 if (!bdev->bd_fsfreeze_count)
296 goto out;
297
298 error = 0;
299 if (--bdev->bd_fsfreeze_count > 0)
300 goto out;
301
302 if (!sb)
303 goto out;
304
305 error = thaw_super(sb);
306 if (error) {
307 bdev->bd_fsfreeze_count++;
308 mutex_unlock(&bdev->bd_fsfreeze_mutex);
309 return error;
310 }
311out:
312 mutex_unlock(&bdev->bd_fsfreeze_mutex);
313 return 0;
314}
315EXPORT_SYMBOL(thaw_bdev);
316
317static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
318{
319 return block_write_full_page(page, blkdev_get_block, wbc);
320}
321
322static int blkdev_readpage(struct file * file, struct page * page)
323{
324 return block_read_full_page(page, blkdev_get_block);
325}
326
327static int blkdev_write_begin(struct file *file, struct address_space *mapping,
328 loff_t pos, unsigned len, unsigned flags,
329 struct page **pagep, void **fsdata)
330{
331 return block_write_begin(mapping, pos, len, flags, pagep,
332 blkdev_get_block);
333}
334
335static int blkdev_write_end(struct file *file, struct address_space *mapping,
336 loff_t pos, unsigned len, unsigned copied,
337 struct page *page, void *fsdata)
338{
339 int ret;
340 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
341
342 unlock_page(page);
343 page_cache_release(page);
344
345 return ret;
346}
347
348
349
350
351
352
353static loff_t block_llseek(struct file *file, loff_t offset, int origin)
354{
355 struct inode *bd_inode = file->f_mapping->host;
356 loff_t size;
357 loff_t retval;
358
359 mutex_lock(&bd_inode->i_mutex);
360 size = i_size_read(bd_inode);
361
362 retval = -EINVAL;
363 switch (origin) {
364 case SEEK_END:
365 offset += size;
366 break;
367 case SEEK_CUR:
368 offset += file->f_pos;
369 case SEEK_SET:
370 break;
371 default:
372 goto out;
373 }
374 if (offset >= 0 && offset <= size) {
375 if (offset != file->f_pos) {
376 file->f_pos = offset;
377 }
378 retval = offset;
379 }
380out:
381 mutex_unlock(&bd_inode->i_mutex);
382 return retval;
383}
384
385int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
386{
387 struct inode *bd_inode = filp->f_mapping->host;
388 struct block_device *bdev = I_BDEV(bd_inode);
389 int error;
390
391 error = filemap_write_and_wait_range(filp->f_mapping, start, end);
392 if (error)
393 return error;
394
395
396
397
398
399
400 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
401 if (error == -EOPNOTSUPP)
402 error = 0;
403
404 return error;
405}
406EXPORT_SYMBOL(blkdev_fsync);
407
408
409
410
411
412static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
413static struct kmem_cache * bdev_cachep __read_mostly;
414
415static struct inode *bdev_alloc_inode(struct super_block *sb)
416{
417 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
418 if (!ei)
419 return NULL;
420 return &ei->vfs_inode;
421}
422
423static void bdev_i_callback(struct rcu_head *head)
424{
425 struct inode *inode = container_of(head, struct inode, i_rcu);
426 struct bdev_inode *bdi = BDEV_I(inode);
427
428 INIT_LIST_HEAD(&inode->i_dentry);
429 kmem_cache_free(bdev_cachep, bdi);
430}
431
432static void bdev_destroy_inode(struct inode *inode)
433{
434 call_rcu(&inode->i_rcu, bdev_i_callback);
435}
436
437static void init_once(void *foo)
438{
439 struct bdev_inode *ei = (struct bdev_inode *) foo;
440 struct block_device *bdev = &ei->bdev;
441
442 memset(bdev, 0, sizeof(*bdev));
443 mutex_init(&bdev->bd_mutex);
444 INIT_LIST_HEAD(&bdev->bd_inodes);
445 INIT_LIST_HEAD(&bdev->bd_list);
446#ifdef CONFIG_SYSFS
447 INIT_LIST_HEAD(&bdev->bd_holder_disks);
448#endif
449 inode_init_once(&ei->vfs_inode);
450
451 mutex_init(&bdev->bd_fsfreeze_mutex);
452}
453
454static inline void __bd_forget(struct inode *inode)
455{
456 list_del_init(&inode->i_devices);
457 inode->i_bdev = NULL;
458 inode->i_mapping = &inode->i_data;
459}
460
461static void bdev_evict_inode(struct inode *inode)
462{
463 struct block_device *bdev = &BDEV_I(inode)->bdev;
464 struct list_head *p;
465 truncate_inode_pages(&inode->i_data, 0);
466 invalidate_inode_buffers(inode);
467 end_writeback(inode);
468 spin_lock(&bdev_lock);
469 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
470 __bd_forget(list_entry(p, struct inode, i_devices));
471 }
472 list_del_init(&bdev->bd_list);
473 spin_unlock(&bdev_lock);
474}
475
476static const struct super_operations bdev_sops = {
477 .statfs = simple_statfs,
478 .alloc_inode = bdev_alloc_inode,
479 .destroy_inode = bdev_destroy_inode,
480 .drop_inode = generic_delete_inode,
481 .evict_inode = bdev_evict_inode,
482};
483
484static struct dentry *bd_mount(struct file_system_type *fs_type,
485 int flags, const char *dev_name, void *data)
486{
487 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576);
488}
489
490static struct file_system_type bd_type = {
491 .name = "bdev",
492 .mount = bd_mount,
493 .kill_sb = kill_anon_super,
494};
495
496struct super_block *blockdev_superblock __read_mostly;
497
498void __init bdev_cache_init(void)
499{
500 int err;
501 struct vfsmount *bd_mnt;
502
503 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
504 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
505 SLAB_MEM_SPREAD|SLAB_PANIC),
506 init_once);
507 err = register_filesystem(&bd_type);
508 if (err)
509 panic("Cannot register bdev pseudo-fs");
510 bd_mnt = kern_mount(&bd_type);
511 if (IS_ERR(bd_mnt))
512 panic("Cannot create bdev pseudo-fs");
513
514
515
516
517 kmemleak_not_leak(bd_mnt);
518 blockdev_superblock = bd_mnt->mnt_sb;
519}
520
521
522
523
524
525
526static inline unsigned long hash(dev_t dev)
527{
528 return MAJOR(dev)+MINOR(dev);
529}
530
531static int bdev_test(struct inode *inode, void *data)
532{
533 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
534}
535
536static int bdev_set(struct inode *inode, void *data)
537{
538 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
539 return 0;
540}
541
542static LIST_HEAD(all_bdevs);
543
544struct block_device *bdget(dev_t dev)
545{
546 struct block_device *bdev;
547 struct inode *inode;
548
549 inode = iget5_locked(blockdev_superblock, hash(dev),
550 bdev_test, bdev_set, &dev);
551
552 if (!inode)
553 return NULL;
554
555 bdev = &BDEV_I(inode)->bdev;
556
557 if (inode->i_state & I_NEW) {
558 bdev->bd_contains = NULL;
559 bdev->bd_super = NULL;
560 bdev->bd_inode = inode;
561 bdev->bd_block_size = (1 << inode->i_blkbits);
562 bdev->bd_part_count = 0;
563 bdev->bd_invalidated = 0;
564 inode->i_mode = S_IFBLK;
565 inode->i_rdev = dev;
566 inode->i_bdev = bdev;
567 inode->i_data.a_ops = &def_blk_aops;
568 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
569 inode->i_data.backing_dev_info = &default_backing_dev_info;
570 spin_lock(&bdev_lock);
571 list_add(&bdev->bd_list, &all_bdevs);
572 spin_unlock(&bdev_lock);
573 unlock_new_inode(inode);
574 }
575 return bdev;
576}
577
578EXPORT_SYMBOL(bdget);
579
580
581
582
583
584struct block_device *bdgrab(struct block_device *bdev)
585{
586 ihold(bdev->bd_inode);
587 return bdev;
588}
589
590long nr_blockdev_pages(void)
591{
592 struct block_device *bdev;
593 long ret = 0;
594 spin_lock(&bdev_lock);
595 list_for_each_entry(bdev, &all_bdevs, bd_list) {
596 ret += bdev->bd_inode->i_mapping->nrpages;
597 }
598 spin_unlock(&bdev_lock);
599 return ret;
600}
601
602void bdput(struct block_device *bdev)
603{
604 iput(bdev->bd_inode);
605}
606
607EXPORT_SYMBOL(bdput);
608
609static struct block_device *bd_acquire(struct inode *inode)
610{
611 struct block_device *bdev;
612
613 spin_lock(&bdev_lock);
614 bdev = inode->i_bdev;
615 if (bdev) {
616 ihold(bdev->bd_inode);
617 spin_unlock(&bdev_lock);
618 return bdev;
619 }
620 spin_unlock(&bdev_lock);
621
622 bdev = bdget(inode->i_rdev);
623 if (bdev) {
624 spin_lock(&bdev_lock);
625 if (!inode->i_bdev) {
626
627
628
629
630
631
632 ihold(bdev->bd_inode);
633 inode->i_bdev = bdev;
634 inode->i_mapping = bdev->bd_inode->i_mapping;
635 list_add(&inode->i_devices, &bdev->bd_inodes);
636 }
637 spin_unlock(&bdev_lock);
638 }
639 return bdev;
640}
641
642
643
644void bd_forget(struct inode *inode)
645{
646 struct block_device *bdev = NULL;
647
648 spin_lock(&bdev_lock);
649 if (inode->i_bdev) {
650 if (!sb_is_blkdev_sb(inode->i_sb))
651 bdev = inode->i_bdev;
652 __bd_forget(inode);
653 }
654 spin_unlock(&bdev_lock);
655
656 if (bdev)
657 iput(bdev->bd_inode);
658}
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
675 void *holder)
676{
677 if (bdev->bd_holder == holder)
678 return true;
679 else if (bdev->bd_holder != NULL)
680 return false;
681 else if (bdev->bd_contains == bdev)
682 return true;
683
684 else if (whole->bd_holder == bd_may_claim)
685 return true;
686 else if (whole->bd_holder != NULL)
687 return false;
688 else
689 return true;
690}
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710static int bd_prepare_to_claim(struct block_device *bdev,
711 struct block_device *whole, void *holder)
712{
713retry:
714
715 if (!bd_may_claim(bdev, whole, holder))
716 return -EBUSY;
717
718
719 if (whole->bd_claiming) {
720 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
721 DEFINE_WAIT(wait);
722
723 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
724 spin_unlock(&bdev_lock);
725 schedule();
726 finish_wait(wq, &wait);
727 spin_lock(&bdev_lock);
728 goto retry;
729 }
730
731
732 return 0;
733}
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758static struct block_device *bd_start_claiming(struct block_device *bdev,
759 void *holder)
760{
761 struct gendisk *disk;
762 struct block_device *whole;
763 int partno, err;
764
765 might_sleep();
766
767
768
769
770
771 disk = get_gendisk(bdev->bd_dev, &partno);
772 if (!disk)
773 return ERR_PTR(-ENXIO);
774
775
776
777
778
779
780
781
782
783 if (partno)
784 whole = bdget_disk(disk, 0);
785 else
786 whole = bdgrab(bdev);
787
788 module_put(disk->fops->owner);
789 put_disk(disk);
790 if (!whole)
791 return ERR_PTR(-ENOMEM);
792
793
794 spin_lock(&bdev_lock);
795
796 err = bd_prepare_to_claim(bdev, whole, holder);
797 if (err == 0) {
798 whole->bd_claiming = holder;
799 spin_unlock(&bdev_lock);
800 return whole;
801 } else {
802 spin_unlock(&bdev_lock);
803 bdput(whole);
804 return ERR_PTR(err);
805 }
806}
807
808#ifdef CONFIG_SYSFS
809struct bd_holder_disk {
810 struct list_head list;
811 struct gendisk *disk;
812 int refcnt;
813};
814
815static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
816 struct gendisk *disk)
817{
818 struct bd_holder_disk *holder;
819
820 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
821 if (holder->disk == disk)
822 return holder;
823 return NULL;
824}
825
826static int add_symlink(struct kobject *from, struct kobject *to)
827{
828 return sysfs_create_link(from, to, kobject_name(to));
829}
830
831static void del_symlink(struct kobject *from, struct kobject *to)
832{
833 sysfs_remove_link(from, kobject_name(to));
834}
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
865{
866 struct bd_holder_disk *holder;
867 int ret = 0;
868
869 mutex_lock(&bdev->bd_mutex);
870
871 WARN_ON_ONCE(!bdev->bd_holder);
872
873
874 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
875 goto out_unlock;
876
877 holder = bd_find_holder_disk(bdev, disk);
878 if (holder) {
879 holder->refcnt++;
880 goto out_unlock;
881 }
882
883 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
884 if (!holder) {
885 ret = -ENOMEM;
886 goto out_unlock;
887 }
888
889 INIT_LIST_HEAD(&holder->list);
890 holder->disk = disk;
891 holder->refcnt = 1;
892
893 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
894 if (ret)
895 goto out_free;
896
897 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
898 if (ret)
899 goto out_del;
900
901
902
903
904 kobject_get(bdev->bd_part->holder_dir);
905
906 list_add(&holder->list, &bdev->bd_holder_disks);
907 goto out_unlock;
908
909out_del:
910 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
911out_free:
912 kfree(holder);
913out_unlock:
914 mutex_unlock(&bdev->bd_mutex);
915 return ret;
916}
917EXPORT_SYMBOL_GPL(bd_link_disk_holder);
918
919
920
921
922
923
924
925
926
927
928
929void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
930{
931 struct bd_holder_disk *holder;
932
933 mutex_lock(&bdev->bd_mutex);
934
935 holder = bd_find_holder_disk(bdev, disk);
936
937 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
938 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
939 del_symlink(bdev->bd_part->holder_dir,
940 &disk_to_dev(disk)->kobj);
941 kobject_put(bdev->bd_part->holder_dir);
942 list_del_init(&holder->list);
943 kfree(holder);
944 }
945
946 mutex_unlock(&bdev->bd_mutex);
947}
948EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
949#endif
950
951
952
953
954
955
956
957
958
959
960
961static void flush_disk(struct block_device *bdev, bool kill_dirty)
962{
963 if (__invalidate_device(bdev, kill_dirty)) {
964 char name[BDEVNAME_SIZE] = "";
965
966 if (bdev->bd_disk)
967 disk_name(bdev->bd_disk, 0, name);
968 printk(KERN_WARNING "VFS: busy inodes on changed media or "
969 "resized disk %s\n", name);
970 }
971
972 if (!bdev->bd_disk)
973 return;
974 if (disk_part_scan_enabled(bdev->bd_disk))
975 bdev->bd_invalidated = 1;
976}
977
978
979
980
981
982
983
984
985
986void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
987{
988 loff_t disk_size, bdev_size;
989
990 disk_size = (loff_t)get_capacity(disk) << 9;
991 bdev_size = i_size_read(bdev->bd_inode);
992 if (disk_size != bdev_size) {
993 char name[BDEVNAME_SIZE];
994
995 disk_name(disk, 0, name);
996 printk(KERN_INFO
997 "%s: detected capacity change from %lld to %lld\n",
998 name, bdev_size, disk_size);
999 i_size_write(bdev->bd_inode, disk_size);
1000 flush_disk(bdev, false);
1001 }
1002}
1003EXPORT_SYMBOL(check_disk_size_change);
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013int revalidate_disk(struct gendisk *disk)
1014{
1015 struct block_device *bdev;
1016 int ret = 0;
1017
1018 if (disk->fops->revalidate_disk)
1019 ret = disk->fops->revalidate_disk(disk);
1020
1021 bdev = bdget_disk(disk, 0);
1022 if (!bdev)
1023 return ret;
1024
1025 mutex_lock(&bdev->bd_mutex);
1026 check_disk_size_change(disk, bdev);
1027 mutex_unlock(&bdev->bd_mutex);
1028 bdput(bdev);
1029 return ret;
1030}
1031EXPORT_SYMBOL(revalidate_disk);
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042int check_disk_change(struct block_device *bdev)
1043{
1044 struct gendisk *disk = bdev->bd_disk;
1045 const struct block_device_operations *bdops = disk->fops;
1046 unsigned int events;
1047
1048 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1049 DISK_EVENT_EJECT_REQUEST);
1050 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1051 return 0;
1052
1053 flush_disk(bdev, true);
1054 if (bdops->revalidate_disk)
1055 bdops->revalidate_disk(bdev->bd_disk);
1056 return 1;
1057}
1058
1059EXPORT_SYMBOL(check_disk_change);
1060
1061void bd_set_size(struct block_device *bdev, loff_t size)
1062{
1063 unsigned bsize = bdev_logical_block_size(bdev);
1064
1065 bdev->bd_inode->i_size = size;
1066 while (bsize < PAGE_CACHE_SIZE) {
1067 if (size & bsize)
1068 break;
1069 bsize <<= 1;
1070 }
1071 bdev->bd_block_size = bsize;
1072 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1073}
1074EXPORT_SYMBOL(bd_set_size);
1075
1076static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1077
1078
1079
1080
1081
1082
1083
1084
1085static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1086{
1087 struct gendisk *disk;
1088 struct module *owner;
1089 int ret;
1090 int partno;
1091 int perm = 0;
1092
1093 if (mode & FMODE_READ)
1094 perm |= MAY_READ;
1095 if (mode & FMODE_WRITE)
1096 perm |= MAY_WRITE;
1097
1098
1099
1100 if (!for_part) {
1101 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1102 if (ret != 0) {
1103 bdput(bdev);
1104 return ret;
1105 }
1106 }
1107
1108 restart:
1109
1110 ret = -ENXIO;
1111 disk = get_gendisk(bdev->bd_dev, &partno);
1112 if (!disk)
1113 goto out;
1114 owner = disk->fops->owner;
1115
1116 disk_block_events(disk);
1117 mutex_lock_nested(&bdev->bd_mutex, for_part);
1118 if (!bdev->bd_openers) {
1119 bdev->bd_disk = disk;
1120 bdev->bd_contains = bdev;
1121 if (!partno) {
1122 struct backing_dev_info *bdi;
1123
1124 ret = -ENXIO;
1125 bdev->bd_part = disk_get_part(disk, partno);
1126 if (!bdev->bd_part)
1127 goto out_clear;
1128
1129 ret = 0;
1130 if (disk->fops->open) {
1131 ret = disk->fops->open(bdev, mode);
1132 if (ret == -ERESTARTSYS) {
1133
1134
1135
1136
1137 disk_put_part(bdev->bd_part);
1138 bdev->bd_part = NULL;
1139 bdev->bd_disk = NULL;
1140 mutex_unlock(&bdev->bd_mutex);
1141 disk_unblock_events(disk);
1142 put_disk(disk);
1143 module_put(owner);
1144 goto restart;
1145 }
1146 }
1147
1148 if (!ret && !bdev->bd_openers) {
1149 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1150 bdi = blk_get_backing_dev_info(bdev);
1151 if (bdi == NULL)
1152 bdi = &default_backing_dev_info;
1153 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1154 }
1155
1156
1157
1158
1159
1160
1161
1162 if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
1163 rescan_partitions(disk, bdev);
1164 if (ret)
1165 goto out_clear;
1166 } else {
1167 struct block_device *whole;
1168 whole = bdget_disk(disk, 0);
1169 ret = -ENOMEM;
1170 if (!whole)
1171 goto out_clear;
1172 BUG_ON(for_part);
1173 ret = __blkdev_get(whole, mode, 1);
1174 if (ret)
1175 goto out_clear;
1176 bdev->bd_contains = whole;
1177 bdev_inode_switch_bdi(bdev->bd_inode,
1178 whole->bd_inode->i_data.backing_dev_info);
1179 bdev->bd_part = disk_get_part(disk, partno);
1180 if (!(disk->flags & GENHD_FL_UP) ||
1181 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1182 ret = -ENXIO;
1183 goto out_clear;
1184 }
1185 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1186 }
1187 } else {
1188 if (bdev->bd_contains == bdev) {
1189 ret = 0;
1190 if (bdev->bd_disk->fops->open)
1191 ret = bdev->bd_disk->fops->open(bdev, mode);
1192
1193 if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
1194 rescan_partitions(bdev->bd_disk, bdev);
1195 if (ret)
1196 goto out_unlock_bdev;
1197 }
1198
1199 put_disk(disk);
1200 module_put(owner);
1201 }
1202 bdev->bd_openers++;
1203 if (for_part)
1204 bdev->bd_part_count++;
1205 mutex_unlock(&bdev->bd_mutex);
1206 disk_unblock_events(disk);
1207 return 0;
1208
1209 out_clear:
1210 disk_put_part(bdev->bd_part);
1211 bdev->bd_disk = NULL;
1212 bdev->bd_part = NULL;
1213 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1214 if (bdev != bdev->bd_contains)
1215 __blkdev_put(bdev->bd_contains, mode, 1);
1216 bdev->bd_contains = NULL;
1217 out_unlock_bdev:
1218 mutex_unlock(&bdev->bd_mutex);
1219 disk_unblock_events(disk);
1220 put_disk(disk);
1221 module_put(owner);
1222 out:
1223 bdput(bdev);
1224
1225 return ret;
1226}
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1248{
1249 struct block_device *whole = NULL;
1250 int res;
1251
1252 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1253
1254 if ((mode & FMODE_EXCL) && holder) {
1255 whole = bd_start_claiming(bdev, holder);
1256 if (IS_ERR(whole)) {
1257 bdput(bdev);
1258 return PTR_ERR(whole);
1259 }
1260 }
1261
1262 res = __blkdev_get(bdev, mode, 0);
1263
1264 if (whole) {
1265 struct gendisk *disk = whole->bd_disk;
1266
1267
1268 mutex_lock(&bdev->bd_mutex);
1269 spin_lock(&bdev_lock);
1270
1271 if (!res) {
1272 BUG_ON(!bd_may_claim(bdev, whole, holder));
1273
1274
1275
1276
1277
1278
1279 whole->bd_holders++;
1280 whole->bd_holder = bd_may_claim;
1281 bdev->bd_holders++;
1282 bdev->bd_holder = holder;
1283 }
1284
1285
1286 BUG_ON(whole->bd_claiming != holder);
1287 whole->bd_claiming = NULL;
1288 wake_up_bit(&whole->bd_claiming, 0);
1289
1290 spin_unlock(&bdev_lock);
1291
1292
1293
1294
1295
1296
1297
1298
1299 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1300 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1301 bdev->bd_write_holder = true;
1302 disk_block_events(disk);
1303 }
1304
1305 mutex_unlock(&bdev->bd_mutex);
1306 bdput(whole);
1307 }
1308
1309 return res;
1310}
1311EXPORT_SYMBOL(blkdev_get);
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1331 void *holder)
1332{
1333 struct block_device *bdev;
1334 int err;
1335
1336 bdev = lookup_bdev(path);
1337 if (IS_ERR(bdev))
1338 return bdev;
1339
1340 err = blkdev_get(bdev, mode, holder);
1341 if (err)
1342 return ERR_PTR(err);
1343
1344 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1345 blkdev_put(bdev, mode);
1346 return ERR_PTR(-EACCES);
1347 }
1348
1349 return bdev;
1350}
1351EXPORT_SYMBOL(blkdev_get_by_path);
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1376{
1377 struct block_device *bdev;
1378 int err;
1379
1380 bdev = bdget(dev);
1381 if (!bdev)
1382 return ERR_PTR(-ENOMEM);
1383
1384 err = blkdev_get(bdev, mode, holder);
1385 if (err)
1386 return ERR_PTR(err);
1387
1388 return bdev;
1389}
1390EXPORT_SYMBOL(blkdev_get_by_dev);
1391
1392static int blkdev_open(struct inode * inode, struct file * filp)
1393{
1394 struct block_device *bdev;
1395
1396
1397
1398
1399
1400
1401
1402 filp->f_flags |= O_LARGEFILE;
1403
1404 if (filp->f_flags & O_NDELAY)
1405 filp->f_mode |= FMODE_NDELAY;
1406 if (filp->f_flags & O_EXCL)
1407 filp->f_mode |= FMODE_EXCL;
1408 if ((filp->f_flags & O_ACCMODE) == 3)
1409 filp->f_mode |= FMODE_WRITE_IOCTL;
1410
1411 bdev = bd_acquire(inode);
1412 if (bdev == NULL)
1413 return -ENOMEM;
1414
1415 filp->f_mapping = bdev->bd_inode->i_mapping;
1416
1417 return blkdev_get(bdev, filp->f_mode, filp);
1418}
1419
1420static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1421{
1422 int ret = 0;
1423 struct gendisk *disk = bdev->bd_disk;
1424 struct block_device *victim = NULL;
1425
1426 mutex_lock_nested(&bdev->bd_mutex, for_part);
1427 if (for_part)
1428 bdev->bd_part_count--;
1429
1430 if (!--bdev->bd_openers) {
1431 WARN_ON_ONCE(bdev->bd_holders);
1432 sync_blockdev(bdev);
1433 kill_bdev(bdev);
1434
1435
1436
1437 bdev_inode_switch_bdi(bdev->bd_inode,
1438 &default_backing_dev_info);
1439 }
1440 if (bdev->bd_contains == bdev) {
1441 if (disk->fops->release)
1442 ret = disk->fops->release(disk, mode);
1443 }
1444 if (!bdev->bd_openers) {
1445 struct module *owner = disk->fops->owner;
1446
1447 disk_put_part(bdev->bd_part);
1448 bdev->bd_part = NULL;
1449 bdev->bd_disk = NULL;
1450 if (bdev != bdev->bd_contains)
1451 victim = bdev->bd_contains;
1452 bdev->bd_contains = NULL;
1453
1454 put_disk(disk);
1455 module_put(owner);
1456 }
1457 mutex_unlock(&bdev->bd_mutex);
1458 bdput(bdev);
1459 if (victim)
1460 __blkdev_put(victim, mode, 1);
1461 return ret;
1462}
1463
1464int blkdev_put(struct block_device *bdev, fmode_t mode)
1465{
1466 mutex_lock(&bdev->bd_mutex);
1467
1468 if (mode & FMODE_EXCL) {
1469 bool bdev_free;
1470
1471
1472
1473
1474
1475
1476 spin_lock(&bdev_lock);
1477
1478 WARN_ON_ONCE(--bdev->bd_holders < 0);
1479 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1480
1481
1482 if ((bdev_free = !bdev->bd_holders))
1483 bdev->bd_holder = NULL;
1484 if (!bdev->bd_contains->bd_holders)
1485 bdev->bd_contains->bd_holder = NULL;
1486
1487 spin_unlock(&bdev_lock);
1488
1489
1490
1491
1492
1493 if (bdev_free && bdev->bd_write_holder) {
1494 disk_unblock_events(bdev->bd_disk);
1495 bdev->bd_write_holder = false;
1496 }
1497 }
1498
1499
1500
1501
1502
1503
1504 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1505
1506 mutex_unlock(&bdev->bd_mutex);
1507
1508 return __blkdev_put(bdev, mode, 0);
1509}
1510EXPORT_SYMBOL(blkdev_put);
1511
1512static int blkdev_close(struct inode * inode, struct file * filp)
1513{
1514 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1515
1516 return blkdev_put(bdev, filp->f_mode);
1517}
1518
1519static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1520{
1521 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1522 fmode_t mode = file->f_mode;
1523
1524
1525
1526
1527
1528 if (file->f_flags & O_NDELAY)
1529 mode |= FMODE_NDELAY;
1530 else
1531 mode &= ~FMODE_NDELAY;
1532
1533 return blkdev_ioctl(bdev, mode, cmd, arg);
1534}
1535
1536
1537
1538
1539
1540
1541
1542
1543ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1544 unsigned long nr_segs, loff_t pos)
1545{
1546 struct file *file = iocb->ki_filp;
1547 ssize_t ret;
1548
1549 BUG_ON(iocb->ki_pos != pos);
1550
1551 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1552 if (ret > 0 || ret == -EIOCBQUEUED) {
1553 ssize_t err;
1554
1555 err = generic_write_sync(file, pos, ret);
1556 if (err < 0 && ret > 0)
1557 ret = err;
1558 }
1559 return ret;
1560}
1561EXPORT_SYMBOL_GPL(blkdev_aio_write);
1562
1563
1564
1565
1566
1567static int blkdev_releasepage(struct page *page, gfp_t wait)
1568{
1569 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1570
1571 if (super && super->s_op->bdev_try_to_free_page)
1572 return super->s_op->bdev_try_to_free_page(super, page, wait);
1573
1574 return try_to_free_buffers(page);
1575}
1576
1577static const struct address_space_operations def_blk_aops = {
1578 .readpage = blkdev_readpage,
1579 .writepage = blkdev_writepage,
1580 .write_begin = blkdev_write_begin,
1581 .write_end = blkdev_write_end,
1582 .writepages = generic_writepages,
1583 .releasepage = blkdev_releasepage,
1584 .direct_IO = blkdev_direct_IO,
1585};
1586
1587const struct file_operations def_blk_fops = {
1588 .open = blkdev_open,
1589 .release = blkdev_close,
1590 .llseek = block_llseek,
1591 .read = do_sync_read,
1592 .write = do_sync_write,
1593 .aio_read = generic_file_aio_read,
1594 .aio_write = blkdev_aio_write,
1595 .mmap = generic_file_mmap,
1596 .fsync = blkdev_fsync,
1597 .unlocked_ioctl = block_ioctl,
1598#ifdef CONFIG_COMPAT
1599 .compat_ioctl = compat_blkdev_ioctl,
1600#endif
1601 .splice_read = generic_file_splice_read,
1602 .splice_write = generic_file_splice_write,
1603};
1604
1605int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1606{
1607 int res;
1608 mm_segment_t old_fs = get_fs();
1609 set_fs(KERNEL_DS);
1610 res = blkdev_ioctl(bdev, 0, cmd, arg);
1611 set_fs(old_fs);
1612 return res;
1613}
1614
1615EXPORT_SYMBOL(ioctl_by_bdev);
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625struct block_device *lookup_bdev(const char *pathname)
1626{
1627 struct block_device *bdev;
1628 struct inode *inode;
1629 struct path path;
1630 int error;
1631
1632 if (!pathname || !*pathname)
1633 return ERR_PTR(-EINVAL);
1634
1635 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1636 if (error)
1637 return ERR_PTR(error);
1638
1639 inode = path.dentry->d_inode;
1640 error = -ENOTBLK;
1641 if (!S_ISBLK(inode->i_mode))
1642 goto fail;
1643 error = -EACCES;
1644 if (path.mnt->mnt_flags & MNT_NODEV)
1645 goto fail;
1646 error = -ENOMEM;
1647 bdev = bd_acquire(inode);
1648 if (!bdev)
1649 goto fail;
1650out:
1651 path_put(&path);
1652 return bdev;
1653fail:
1654 bdev = ERR_PTR(error);
1655 goto out;
1656}
1657EXPORT_SYMBOL(lookup_bdev);
1658
1659int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1660{
1661 struct super_block *sb = get_super(bdev);
1662 int res = 0;
1663
1664 if (sb) {
1665
1666
1667
1668
1669
1670
1671 shrink_dcache_sb(sb);
1672 res = invalidate_inodes(sb, kill_dirty);
1673 drop_super(sb);
1674 }
1675 invalidate_bdev(bdev);
1676 return res;
1677}
1678EXPORT_SYMBOL(__invalidate_device);
1679