1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/backing-dev.h>
18#include <linux/module.h>
19#include <linux/blkpg.h>
20#include <linux/magic.h>
21#include <linux/buffer_head.h>
22#include <linux/swap.h>
23#include <linux/pagevec.h>
24#include <linux/writeback.h>
25#include <linux/mpage.h>
26#include <linux/mount.h>
27#include <linux/pseudo_fs.h>
28#include <linux/uio.h>
29#include <linux/namei.h>
30#include <linux/log2.h>
31#include <linux/cleancache.h>
32#include <linux/task_io_accounting_ops.h>
33#include <linux/falloc.h>
34#include <linux/part_stat.h>
35#include <linux/uaccess.h>
36#include <linux/suspend.h>
37#include "internal.h"
38
39struct bdev_inode {
40 struct block_device bdev;
41 struct inode vfs_inode;
42};
43
44static const struct address_space_operations def_blk_aops;
45
46static inline struct bdev_inode *BDEV_I(struct inode *inode)
47{
48 return container_of(inode, struct bdev_inode, vfs_inode);
49}
50
51struct block_device *I_BDEV(struct inode *inode)
52{
53 return &BDEV_I(inode)->bdev;
54}
55EXPORT_SYMBOL(I_BDEV);
56
57static void bdev_write_inode(struct block_device *bdev)
58{
59 struct inode *inode = bdev->bd_inode;
60 int ret;
61
62 spin_lock(&inode->i_lock);
63 while (inode->i_state & I_DIRTY) {
64 spin_unlock(&inode->i_lock);
65 ret = write_inode_now(inode, true);
66 if (ret) {
67 char name[BDEVNAME_SIZE];
68 pr_warn_ratelimited("VFS: Dirty inode writeback failed "
69 "for block device %s (err=%d).\n",
70 bdevname(bdev, name), ret);
71 }
72 spin_lock(&inode->i_lock);
73 }
74 spin_unlock(&inode->i_lock);
75}
76
77
78static void kill_bdev(struct block_device *bdev)
79{
80 struct address_space *mapping = bdev->bd_inode->i_mapping;
81
82 if (mapping_empty(mapping))
83 return;
84
85 invalidate_bh_lrus();
86 truncate_inode_pages(mapping, 0);
87}
88
89
90void invalidate_bdev(struct block_device *bdev)
91{
92 struct address_space *mapping = bdev->bd_inode->i_mapping;
93
94 if (mapping->nrpages) {
95 invalidate_bh_lrus();
96 lru_add_drain_all();
97 invalidate_mapping_pages(mapping, 0, -1);
98 }
99
100
101
102 cleancache_invalidate_inode(mapping);
103}
104EXPORT_SYMBOL(invalidate_bdev);
105
106
107
108
109
110int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
111 loff_t lstart, loff_t lend)
112{
113
114
115
116
117
118 if (!(mode & FMODE_EXCL)) {
119 int err = bd_prepare_to_claim(bdev, truncate_bdev_range);
120 if (err)
121 goto invalidate;
122 }
123
124 truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
125 if (!(mode & FMODE_EXCL))
126 bd_abort_claiming(bdev, truncate_bdev_range);
127 return 0;
128
129invalidate:
130
131
132
133
134 return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping,
135 lstart >> PAGE_SHIFT,
136 lend >> PAGE_SHIFT);
137}
138
139static void set_init_blocksize(struct block_device *bdev)
140{
141 unsigned int bsize = bdev_logical_block_size(bdev);
142 loff_t size = i_size_read(bdev->bd_inode);
143
144 while (bsize < PAGE_SIZE) {
145 if (size & bsize)
146 break;
147 bsize <<= 1;
148 }
149 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
150}
151
152int set_blocksize(struct block_device *bdev, int size)
153{
154
155 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
156 return -EINVAL;
157
158
159 if (size < bdev_logical_block_size(bdev))
160 return -EINVAL;
161
162
163 if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
164 sync_blockdev(bdev);
165 bdev->bd_inode->i_blkbits = blksize_bits(size);
166 kill_bdev(bdev);
167 }
168 return 0;
169}
170
171EXPORT_SYMBOL(set_blocksize);
172
173int sb_set_blocksize(struct super_block *sb, int size)
174{
175 if (set_blocksize(sb->s_bdev, size))
176 return 0;
177
178
179 sb->s_blocksize = size;
180 sb->s_blocksize_bits = blksize_bits(size);
181 return sb->s_blocksize;
182}
183
184EXPORT_SYMBOL(sb_set_blocksize);
185
186int sb_min_blocksize(struct super_block *sb, int size)
187{
188 int minsize = bdev_logical_block_size(sb->s_bdev);
189 if (size < minsize)
190 size = minsize;
191 return sb_set_blocksize(sb, size);
192}
193
194EXPORT_SYMBOL(sb_min_blocksize);
195
196static int
197blkdev_get_block(struct inode *inode, sector_t iblock,
198 struct buffer_head *bh, int create)
199{
200 bh->b_bdev = I_BDEV(inode);
201 bh->b_blocknr = iblock;
202 set_buffer_mapped(bh);
203 return 0;
204}
205
206static struct inode *bdev_file_inode(struct file *file)
207{
208 return file->f_mapping->host;
209}
210
211static unsigned int dio_bio_write_op(struct kiocb *iocb)
212{
213 unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
214
215
216 if (iocb->ki_flags & IOCB_DSYNC)
217 op |= REQ_FUA;
218 return op;
219}
220
221#define DIO_INLINE_BIO_VECS 4
222
223static void blkdev_bio_end_io_simple(struct bio *bio)
224{
225 struct task_struct *waiter = bio->bi_private;
226
227 WRITE_ONCE(bio->bi_private, NULL);
228 blk_wake_io_task(waiter);
229}
230
231static ssize_t
232__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
233 unsigned int nr_pages)
234{
235 struct file *file = iocb->ki_filp;
236 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
237 struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
238 loff_t pos = iocb->ki_pos;
239 bool should_dirty = false;
240 struct bio bio;
241 ssize_t ret;
242 blk_qc_t qc;
243
244 if ((pos | iov_iter_alignment(iter)) &
245 (bdev_logical_block_size(bdev) - 1))
246 return -EINVAL;
247
248 if (nr_pages <= DIO_INLINE_BIO_VECS)
249 vecs = inline_vecs;
250 else {
251 vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
252 GFP_KERNEL);
253 if (!vecs)
254 return -ENOMEM;
255 }
256
257 bio_init(&bio, vecs, nr_pages);
258 bio_set_dev(&bio, bdev);
259 bio.bi_iter.bi_sector = pos >> 9;
260 bio.bi_write_hint = iocb->ki_hint;
261 bio.bi_private = current;
262 bio.bi_end_io = blkdev_bio_end_io_simple;
263 bio.bi_ioprio = iocb->ki_ioprio;
264
265 ret = bio_iov_iter_get_pages(&bio, iter);
266 if (unlikely(ret))
267 goto out;
268 ret = bio.bi_iter.bi_size;
269
270 if (iov_iter_rw(iter) == READ) {
271 bio.bi_opf = REQ_OP_READ;
272 if (iter_is_iovec(iter))
273 should_dirty = true;
274 } else {
275 bio.bi_opf = dio_bio_write_op(iocb);
276 task_io_account_write(ret);
277 }
278 if (iocb->ki_flags & IOCB_NOWAIT)
279 bio.bi_opf |= REQ_NOWAIT;
280 if (iocb->ki_flags & IOCB_HIPRI)
281 bio_set_polled(&bio, iocb);
282
283 qc = submit_bio(&bio);
284 for (;;) {
285 set_current_state(TASK_UNINTERRUPTIBLE);
286 if (!READ_ONCE(bio.bi_private))
287 break;
288 if (!(iocb->ki_flags & IOCB_HIPRI) ||
289 !blk_poll(bdev_get_queue(bdev), qc, true))
290 blk_io_schedule();
291 }
292 __set_current_state(TASK_RUNNING);
293
294 bio_release_pages(&bio, should_dirty);
295 if (unlikely(bio.bi_status))
296 ret = blk_status_to_errno(bio.bi_status);
297
298out:
299 if (vecs != inline_vecs)
300 kfree(vecs);
301
302 bio_uninit(&bio);
303
304 return ret;
305}
306
307struct blkdev_dio {
308 union {
309 struct kiocb *iocb;
310 struct task_struct *waiter;
311 };
312 size_t size;
313 atomic_t ref;
314 bool multi_bio : 1;
315 bool should_dirty : 1;
316 bool is_sync : 1;
317 struct bio bio;
318};
319
320static struct bio_set blkdev_dio_pool;
321
322static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
323{
324 struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
325 struct request_queue *q = bdev_get_queue(bdev);
326
327 return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
328}
329
330static void blkdev_bio_end_io(struct bio *bio)
331{
332 struct blkdev_dio *dio = bio->bi_private;
333 bool should_dirty = dio->should_dirty;
334
335 if (bio->bi_status && !dio->bio.bi_status)
336 dio->bio.bi_status = bio->bi_status;
337
338 if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
339 if (!dio->is_sync) {
340 struct kiocb *iocb = dio->iocb;
341 ssize_t ret;
342
343 if (likely(!dio->bio.bi_status)) {
344 ret = dio->size;
345 iocb->ki_pos += ret;
346 } else {
347 ret = blk_status_to_errno(dio->bio.bi_status);
348 }
349
350 dio->iocb->ki_complete(iocb, ret, 0);
351 if (dio->multi_bio)
352 bio_put(&dio->bio);
353 } else {
354 struct task_struct *waiter = dio->waiter;
355
356 WRITE_ONCE(dio->waiter, NULL);
357 blk_wake_io_task(waiter);
358 }
359 }
360
361 if (should_dirty) {
362 bio_check_pages_dirty(bio);
363 } else {
364 bio_release_pages(bio, false);
365 bio_put(bio);
366 }
367}
368
369static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
370 unsigned int nr_pages)
371{
372 struct file *file = iocb->ki_filp;
373 struct inode *inode = bdev_file_inode(file);
374 struct block_device *bdev = I_BDEV(inode);
375 struct blk_plug plug;
376 struct blkdev_dio *dio;
377 struct bio *bio;
378 bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
379 bool is_read = (iov_iter_rw(iter) == READ), is_sync;
380 loff_t pos = iocb->ki_pos;
381 blk_qc_t qc = BLK_QC_T_NONE;
382 int ret = 0;
383
384 if ((pos | iov_iter_alignment(iter)) &
385 (bdev_logical_block_size(bdev) - 1))
386 return -EINVAL;
387
388 bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
389
390 dio = container_of(bio, struct blkdev_dio, bio);
391 dio->is_sync = is_sync = is_sync_kiocb(iocb);
392 if (dio->is_sync) {
393 dio->waiter = current;
394 bio_get(bio);
395 } else {
396 dio->iocb = iocb;
397 }
398
399 dio->size = 0;
400 dio->multi_bio = false;
401 dio->should_dirty = is_read && iter_is_iovec(iter);
402
403
404
405
406
407 if (!is_poll)
408 blk_start_plug(&plug);
409
410 for (;;) {
411 bio_set_dev(bio, bdev);
412 bio->bi_iter.bi_sector = pos >> 9;
413 bio->bi_write_hint = iocb->ki_hint;
414 bio->bi_private = dio;
415 bio->bi_end_io = blkdev_bio_end_io;
416 bio->bi_ioprio = iocb->ki_ioprio;
417
418 ret = bio_iov_iter_get_pages(bio, iter);
419 if (unlikely(ret)) {
420 bio->bi_status = BLK_STS_IOERR;
421 bio_endio(bio);
422 break;
423 }
424
425 if (is_read) {
426 bio->bi_opf = REQ_OP_READ;
427 if (dio->should_dirty)
428 bio_set_pages_dirty(bio);
429 } else {
430 bio->bi_opf = dio_bio_write_op(iocb);
431 task_io_account_write(bio->bi_iter.bi_size);
432 }
433 if (iocb->ki_flags & IOCB_NOWAIT)
434 bio->bi_opf |= REQ_NOWAIT;
435
436 dio->size += bio->bi_iter.bi_size;
437 pos += bio->bi_iter.bi_size;
438
439 nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
440 if (!nr_pages) {
441 bool polled = false;
442
443 if (iocb->ki_flags & IOCB_HIPRI) {
444 bio_set_polled(bio, iocb);
445 polled = true;
446 }
447
448 qc = submit_bio(bio);
449
450 if (polled)
451 WRITE_ONCE(iocb->ki_cookie, qc);
452 break;
453 }
454
455 if (!dio->multi_bio) {
456
457
458
459
460
461 if (!is_sync)
462 bio_get(bio);
463 dio->multi_bio = true;
464 atomic_set(&dio->ref, 2);
465 } else {
466 atomic_inc(&dio->ref);
467 }
468
469 submit_bio(bio);
470 bio = bio_alloc(GFP_KERNEL, nr_pages);
471 }
472
473 if (!is_poll)
474 blk_finish_plug(&plug);
475
476 if (!is_sync)
477 return -EIOCBQUEUED;
478
479 for (;;) {
480 set_current_state(TASK_UNINTERRUPTIBLE);
481 if (!READ_ONCE(dio->waiter))
482 break;
483
484 if (!(iocb->ki_flags & IOCB_HIPRI) ||
485 !blk_poll(bdev_get_queue(bdev), qc, true))
486 blk_io_schedule();
487 }
488 __set_current_state(TASK_RUNNING);
489
490 if (!ret)
491 ret = blk_status_to_errno(dio->bio.bi_status);
492 if (likely(!ret))
493 ret = dio->size;
494
495 bio_put(&dio->bio);
496 return ret;
497}
498
499static ssize_t
500blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
501{
502 unsigned int nr_pages;
503
504 if (!iov_iter_count(iter))
505 return 0;
506
507 nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
508 if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
509 return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
510
511 return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
512}
513
514static __init int blkdev_init(void)
515{
516 return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
517}
518module_init(blkdev_init);
519
520int __sync_blockdev(struct block_device *bdev, int wait)
521{
522 if (!bdev)
523 return 0;
524 if (!wait)
525 return filemap_flush(bdev->bd_inode->i_mapping);
526 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
527}
528
529
530
531
532
533int sync_blockdev(struct block_device *bdev)
534{
535 return __sync_blockdev(bdev, 1);
536}
537EXPORT_SYMBOL(sync_blockdev);
538
539
540
541
542
543
544int fsync_bdev(struct block_device *bdev)
545{
546 struct super_block *sb = get_super(bdev);
547 if (sb) {
548 int res = sync_filesystem(sb);
549 drop_super(sb);
550 return res;
551 }
552 return sync_blockdev(bdev);
553}
554EXPORT_SYMBOL(fsync_bdev);
555
556
557
558
559
560
561
562
563
564
565
566
567
568int freeze_bdev(struct block_device *bdev)
569{
570 struct super_block *sb;
571 int error = 0;
572
573 mutex_lock(&bdev->bd_fsfreeze_mutex);
574 if (++bdev->bd_fsfreeze_count > 1)
575 goto done;
576
577 sb = get_active_super(bdev);
578 if (!sb)
579 goto sync;
580 if (sb->s_op->freeze_super)
581 error = sb->s_op->freeze_super(sb);
582 else
583 error = freeze_super(sb);
584 deactivate_super(sb);
585
586 if (error) {
587 bdev->bd_fsfreeze_count--;
588 goto done;
589 }
590 bdev->bd_fsfreeze_sb = sb;
591
592sync:
593 sync_blockdev(bdev);
594done:
595 mutex_unlock(&bdev->bd_fsfreeze_mutex);
596 return error;
597}
598EXPORT_SYMBOL(freeze_bdev);
599
600
601
602
603
604
605
606int thaw_bdev(struct block_device *bdev)
607{
608 struct super_block *sb;
609 int error = -EINVAL;
610
611 mutex_lock(&bdev->bd_fsfreeze_mutex);
612 if (!bdev->bd_fsfreeze_count)
613 goto out;
614
615 error = 0;
616 if (--bdev->bd_fsfreeze_count > 0)
617 goto out;
618
619 sb = bdev->bd_fsfreeze_sb;
620 if (!sb)
621 goto out;
622
623 if (sb->s_op->thaw_super)
624 error = sb->s_op->thaw_super(sb);
625 else
626 error = thaw_super(sb);
627 if (error)
628 bdev->bd_fsfreeze_count++;
629 else
630 bdev->bd_fsfreeze_sb = NULL;
631out:
632 mutex_unlock(&bdev->bd_fsfreeze_mutex);
633 return error;
634}
635EXPORT_SYMBOL(thaw_bdev);
636
637static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
638{
639 return block_write_full_page(page, blkdev_get_block, wbc);
640}
641
642static int blkdev_readpage(struct file * file, struct page * page)
643{
644 return block_read_full_page(page, blkdev_get_block);
645}
646
647static void blkdev_readahead(struct readahead_control *rac)
648{
649 mpage_readahead(rac, blkdev_get_block);
650}
651
652static int blkdev_write_begin(struct file *file, struct address_space *mapping,
653 loff_t pos, unsigned len, unsigned flags,
654 struct page **pagep, void **fsdata)
655{
656 return block_write_begin(mapping, pos, len, flags, pagep,
657 blkdev_get_block);
658}
659
660static int blkdev_write_end(struct file *file, struct address_space *mapping,
661 loff_t pos, unsigned len, unsigned copied,
662 struct page *page, void *fsdata)
663{
664 int ret;
665 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
666
667 unlock_page(page);
668 put_page(page);
669
670 return ret;
671}
672
673
674
675
676
677
678static loff_t block_llseek(struct file *file, loff_t offset, int whence)
679{
680 struct inode *bd_inode = bdev_file_inode(file);
681 loff_t retval;
682
683 inode_lock(bd_inode);
684 retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
685 inode_unlock(bd_inode);
686 return retval;
687}
688
689int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
690{
691 struct inode *bd_inode = bdev_file_inode(filp);
692 struct block_device *bdev = I_BDEV(bd_inode);
693 int error;
694
695 error = file_write_and_wait_range(filp, start, end);
696 if (error)
697 return error;
698
699
700
701
702
703
704 error = blkdev_issue_flush(bdev);
705 if (error == -EOPNOTSUPP)
706 error = 0;
707
708 return error;
709}
710EXPORT_SYMBOL(blkdev_fsync);
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728int bdev_read_page(struct block_device *bdev, sector_t sector,
729 struct page *page)
730{
731 const struct block_device_operations *ops = bdev->bd_disk->fops;
732 int result = -EOPNOTSUPP;
733
734 if (!ops->rw_page || bdev_get_integrity(bdev))
735 return result;
736
737 result = blk_queue_enter(bdev->bd_disk->queue, 0);
738 if (result)
739 return result;
740 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
741 REQ_OP_READ);
742 blk_queue_exit(bdev->bd_disk->queue);
743 return result;
744}
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765int bdev_write_page(struct block_device *bdev, sector_t sector,
766 struct page *page, struct writeback_control *wbc)
767{
768 int result;
769 const struct block_device_operations *ops = bdev->bd_disk->fops;
770
771 if (!ops->rw_page || bdev_get_integrity(bdev))
772 return -EOPNOTSUPP;
773 result = blk_queue_enter(bdev->bd_disk->queue, 0);
774 if (result)
775 return result;
776
777 set_page_writeback(page);
778 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
779 REQ_OP_WRITE);
780 if (result) {
781 end_page_writeback(page);
782 } else {
783 clean_page_buffers(page);
784 unlock_page(page);
785 }
786 blk_queue_exit(bdev->bd_disk->queue);
787 return result;
788}
789
790
791
792
793
794static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
795static struct kmem_cache * bdev_cachep __read_mostly;
796
797static struct inode *bdev_alloc_inode(struct super_block *sb)
798{
799 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
800
801 if (!ei)
802 return NULL;
803 memset(&ei->bdev, 0, sizeof(ei->bdev));
804 ei->bdev.bd_bdi = &noop_backing_dev_info;
805 return &ei->vfs_inode;
806}
807
808static void bdev_free_inode(struct inode *inode)
809{
810 struct block_device *bdev = I_BDEV(inode);
811
812 free_percpu(bdev->bd_stats);
813 kfree(bdev->bd_meta_info);
814
815 if (!bdev_is_partition(bdev))
816 kfree(bdev->bd_disk);
817 kmem_cache_free(bdev_cachep, BDEV_I(inode));
818}
819
820static void init_once(void *data)
821{
822 struct bdev_inode *ei = data;
823
824 inode_init_once(&ei->vfs_inode);
825}
826
827static void bdev_evict_inode(struct inode *inode)
828{
829 struct block_device *bdev = &BDEV_I(inode)->bdev;
830 truncate_inode_pages_final(&inode->i_data);
831 invalidate_inode_buffers(inode);
832 clear_inode(inode);
833
834 inode_detach_wb(inode);
835 if (bdev->bd_bdi != &noop_backing_dev_info) {
836 bdi_put(bdev->bd_bdi);
837 bdev->bd_bdi = &noop_backing_dev_info;
838 }
839}
840
841static const struct super_operations bdev_sops = {
842 .statfs = simple_statfs,
843 .alloc_inode = bdev_alloc_inode,
844 .free_inode = bdev_free_inode,
845 .drop_inode = generic_delete_inode,
846 .evict_inode = bdev_evict_inode,
847};
848
849static int bd_init_fs_context(struct fs_context *fc)
850{
851 struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
852 if (!ctx)
853 return -ENOMEM;
854 fc->s_iflags |= SB_I_CGROUPWB;
855 ctx->ops = &bdev_sops;
856 return 0;
857}
858
859static struct file_system_type bd_type = {
860 .name = "bdev",
861 .init_fs_context = bd_init_fs_context,
862 .kill_sb = kill_anon_super,
863};
864
865struct super_block *blockdev_superblock __read_mostly;
866EXPORT_SYMBOL_GPL(blockdev_superblock);
867
868void __init bdev_cache_init(void)
869{
870 int err;
871 static struct vfsmount *bd_mnt;
872
873 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
874 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
875 SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
876 init_once);
877 err = register_filesystem(&bd_type);
878 if (err)
879 panic("Cannot register bdev pseudo-fs");
880 bd_mnt = kern_mount(&bd_type);
881 if (IS_ERR(bd_mnt))
882 panic("Cannot create bdev pseudo-fs");
883 blockdev_superblock = bd_mnt->mnt_sb;
884}
885
886struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
887{
888 struct block_device *bdev;
889 struct inode *inode;
890
891 inode = new_inode(blockdev_superblock);
892 if (!inode)
893 return NULL;
894 inode->i_mode = S_IFBLK;
895 inode->i_rdev = 0;
896 inode->i_data.a_ops = &def_blk_aops;
897 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
898
899 bdev = I_BDEV(inode);
900 mutex_init(&bdev->bd_mutex);
901 mutex_init(&bdev->bd_fsfreeze_mutex);
902 spin_lock_init(&bdev->bd_size_lock);
903 bdev->bd_disk = disk;
904 bdev->bd_partno = partno;
905 bdev->bd_inode = inode;
906#ifdef CONFIG_SYSFS
907 INIT_LIST_HEAD(&bdev->bd_holder_disks);
908#endif
909 bdev->bd_stats = alloc_percpu(struct disk_stats);
910 if (!bdev->bd_stats) {
911 iput(inode);
912 return NULL;
913 }
914 return bdev;
915}
916
917void bdev_add(struct block_device *bdev, dev_t dev)
918{
919 bdev->bd_dev = dev;
920 bdev->bd_inode->i_rdev = dev;
921 bdev->bd_inode->i_ino = dev;
922 insert_inode_hash(bdev->bd_inode);
923}
924
925static struct block_device *bdget(dev_t dev)
926{
927 struct inode *inode;
928
929 inode = ilookup(blockdev_superblock, dev);
930 if (!inode)
931 return NULL;
932 return &BDEV_I(inode)->bdev;
933}
934
935
936
937
938
939
940
941
942struct block_device *bdgrab(struct block_device *bdev)
943{
944 if (!igrab(bdev->bd_inode))
945 return NULL;
946 return bdev;
947}
948EXPORT_SYMBOL(bdgrab);
949
950long nr_blockdev_pages(void)
951{
952 struct inode *inode;
953 long ret = 0;
954
955 spin_lock(&blockdev_superblock->s_inode_list_lock);
956 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
957 ret += inode->i_mapping->nrpages;
958 spin_unlock(&blockdev_superblock->s_inode_list_lock);
959
960 return ret;
961}
962
963void bdput(struct block_device *bdev)
964{
965 iput(bdev->bd_inode);
966}
967EXPORT_SYMBOL(bdput);
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
984 void *holder)
985{
986 if (bdev->bd_holder == holder)
987 return true;
988 else if (bdev->bd_holder != NULL)
989 return false;
990 else if (whole == bdev)
991 return true;
992
993 else if (whole->bd_holder == bd_may_claim)
994 return true;
995 else if (whole->bd_holder != NULL)
996 return false;
997 else
998 return true;
999}
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013int bd_prepare_to_claim(struct block_device *bdev, void *holder)
1014{
1015 struct block_device *whole = bdev_whole(bdev);
1016
1017 if (WARN_ON_ONCE(!holder))
1018 return -EINVAL;
1019retry:
1020 spin_lock(&bdev_lock);
1021
1022 if (!bd_may_claim(bdev, whole, holder)) {
1023 spin_unlock(&bdev_lock);
1024 return -EBUSY;
1025 }
1026
1027
1028 if (whole->bd_claiming) {
1029 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
1030 DEFINE_WAIT(wait);
1031
1032 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
1033 spin_unlock(&bdev_lock);
1034 schedule();
1035 finish_wait(wq, &wait);
1036 goto retry;
1037 }
1038
1039
1040 whole->bd_claiming = holder;
1041 spin_unlock(&bdev_lock);
1042 return 0;
1043}
1044EXPORT_SYMBOL_GPL(bd_prepare_to_claim);
1045
1046static void bd_clear_claiming(struct block_device *whole, void *holder)
1047{
1048 lockdep_assert_held(&bdev_lock);
1049
1050 BUG_ON(whole->bd_claiming != holder);
1051 whole->bd_claiming = NULL;
1052 wake_up_bit(&whole->bd_claiming, 0);
1053}
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063static void bd_finish_claiming(struct block_device *bdev, void *holder)
1064{
1065 struct block_device *whole = bdev_whole(bdev);
1066
1067 spin_lock(&bdev_lock);
1068 BUG_ON(!bd_may_claim(bdev, whole, holder));
1069
1070
1071
1072
1073 whole->bd_holders++;
1074 whole->bd_holder = bd_may_claim;
1075 bdev->bd_holders++;
1076 bdev->bd_holder = holder;
1077 bd_clear_claiming(whole, holder);
1078 spin_unlock(&bdev_lock);
1079}
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090void bd_abort_claiming(struct block_device *bdev, void *holder)
1091{
1092 spin_lock(&bdev_lock);
1093 bd_clear_claiming(bdev_whole(bdev), holder);
1094 spin_unlock(&bdev_lock);
1095}
1096EXPORT_SYMBOL(bd_abort_claiming);
1097
1098#ifdef CONFIG_SYSFS
1099struct bd_holder_disk {
1100 struct list_head list;
1101 struct gendisk *disk;
1102 int refcnt;
1103};
1104
1105static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
1106 struct gendisk *disk)
1107{
1108 struct bd_holder_disk *holder;
1109
1110 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
1111 if (holder->disk == disk)
1112 return holder;
1113 return NULL;
1114}
1115
1116static int add_symlink(struct kobject *from, struct kobject *to)
1117{
1118 return sysfs_create_link(from, to, kobject_name(to));
1119}
1120
1121static void del_symlink(struct kobject *from, struct kobject *to)
1122{
1123 sysfs_remove_link(from, kobject_name(to));
1124}
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
1155{
1156 struct bd_holder_disk *holder;
1157 int ret = 0;
1158
1159 mutex_lock(&bdev->bd_mutex);
1160
1161 WARN_ON_ONCE(!bdev->bd_holder);
1162
1163
1164 if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir))
1165 goto out_unlock;
1166
1167 holder = bd_find_holder_disk(bdev, disk);
1168 if (holder) {
1169 holder->refcnt++;
1170 goto out_unlock;
1171 }
1172
1173 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
1174 if (!holder) {
1175 ret = -ENOMEM;
1176 goto out_unlock;
1177 }
1178
1179 INIT_LIST_HEAD(&holder->list);
1180 holder->disk = disk;
1181 holder->refcnt = 1;
1182
1183 ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
1184 if (ret)
1185 goto out_free;
1186
1187 ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
1188 if (ret)
1189 goto out_del;
1190
1191
1192
1193
1194 kobject_get(bdev->bd_holder_dir);
1195
1196 list_add(&holder->list, &bdev->bd_holder_disks);
1197 goto out_unlock;
1198
1199out_del:
1200 del_symlink(disk->slave_dir, bdev_kobj(bdev));
1201out_free:
1202 kfree(holder);
1203out_unlock:
1204 mutex_unlock(&bdev->bd_mutex);
1205 return ret;
1206}
1207EXPORT_SYMBOL_GPL(bd_link_disk_holder);
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
1220{
1221 struct bd_holder_disk *holder;
1222
1223 mutex_lock(&bdev->bd_mutex);
1224
1225 holder = bd_find_holder_disk(bdev, disk);
1226
1227 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
1228 del_symlink(disk->slave_dir, bdev_kobj(bdev));
1229 del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
1230 kobject_put(bdev->bd_holder_dir);
1231 list_del_init(&holder->list);
1232 kfree(holder);
1233 }
1234
1235 mutex_unlock(&bdev->bd_mutex);
1236}
1237EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
1238#endif
1239
1240static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1241
1242int bdev_disk_changed(struct block_device *bdev, bool invalidate)
1243{
1244 struct gendisk *disk = bdev->bd_disk;
1245 int ret = 0;
1246
1247 lockdep_assert_held(&bdev->bd_mutex);
1248
1249 if (!(disk->flags & GENHD_FL_UP))
1250 return -ENXIO;
1251
1252rescan:
1253 if (bdev->bd_part_count)
1254 return -EBUSY;
1255 sync_blockdev(bdev);
1256 invalidate_bdev(bdev);
1257 blk_drop_partitions(disk);
1258
1259 clear_bit(GD_NEED_PART_SCAN, &disk->state);
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269 if (invalidate) {
1270 if (disk_part_scan_enabled(disk) ||
1271 !(disk->flags & GENHD_FL_REMOVABLE))
1272 set_capacity(disk, 0);
1273 }
1274
1275 if (get_capacity(disk)) {
1276 ret = blk_add_partitions(disk, bdev);
1277 if (ret == -EAGAIN)
1278 goto rescan;
1279 } else if (invalidate) {
1280
1281
1282
1283
1284 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
1285 }
1286
1287 return ret;
1288}
1289
1290
1291
1292
1293EXPORT_SYMBOL_GPL(bdev_disk_changed);
1294
1295
1296
1297
1298
1299
1300
1301static int __blkdev_get(struct block_device *bdev, fmode_t mode)
1302{
1303 struct gendisk *disk = bdev->bd_disk;
1304 int ret = 0;
1305
1306 if (!(disk->flags & GENHD_FL_UP))
1307 return -ENXIO;
1308
1309 if (!bdev->bd_openers) {
1310 if (!bdev_is_partition(bdev)) {
1311 ret = 0;
1312 if (disk->fops->open)
1313 ret = disk->fops->open(bdev, mode);
1314
1315 if (!ret)
1316 set_init_blocksize(bdev);
1317
1318
1319
1320
1321
1322
1323
1324 if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
1325 (!ret || ret == -ENOMEDIUM))
1326 bdev_disk_changed(bdev, ret == -ENOMEDIUM);
1327
1328 if (ret)
1329 return ret;
1330 } else {
1331 struct block_device *whole = bdgrab(disk->part0);
1332
1333 mutex_lock_nested(&whole->bd_mutex, 1);
1334 ret = __blkdev_get(whole, mode);
1335 if (ret) {
1336 mutex_unlock(&whole->bd_mutex);
1337 bdput(whole);
1338 return ret;
1339 }
1340 whole->bd_part_count++;
1341 mutex_unlock(&whole->bd_mutex);
1342
1343 if (!bdev_nr_sectors(bdev)) {
1344 __blkdev_put(whole, mode, 1);
1345 bdput(whole);
1346 return -ENXIO;
1347 }
1348 set_init_blocksize(bdev);
1349 }
1350
1351 if (bdev->bd_bdi == &noop_backing_dev_info)
1352 bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
1353 } else {
1354 if (!bdev_is_partition(bdev)) {
1355 if (bdev->bd_disk->fops->open)
1356 ret = bdev->bd_disk->fops->open(bdev, mode);
1357
1358 if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
1359 (!ret || ret == -ENOMEDIUM))
1360 bdev_disk_changed(bdev, ret == -ENOMEDIUM);
1361 if (ret)
1362 return ret;
1363 }
1364 }
1365 bdev->bd_openers++;
1366 return 0;
1367}
1368
1369struct block_device *blkdev_get_no_open(dev_t dev)
1370{
1371 struct block_device *bdev;
1372 struct gendisk *disk;
1373
1374 bdev = bdget(dev);
1375 if (!bdev) {
1376 blk_request_module(dev);
1377 bdev = bdget(dev);
1378 if (!bdev)
1379 return NULL;
1380 }
1381
1382 disk = bdev->bd_disk;
1383 if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
1384 goto bdput;
1385 if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
1386 goto put_disk;
1387 if (!try_module_get(bdev->bd_disk->fops->owner))
1388 goto put_disk;
1389 return bdev;
1390put_disk:
1391 put_disk(disk);
1392bdput:
1393 bdput(bdev);
1394 return NULL;
1395}
1396
1397void blkdev_put_no_open(struct block_device *bdev)
1398{
1399 module_put(bdev->bd_disk->fops->owner);
1400 put_disk(bdev->bd_disk);
1401 bdput(bdev);
1402}
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1426{
1427 bool unblock_events = true;
1428 struct block_device *bdev;
1429 struct gendisk *disk;
1430 int ret;
1431
1432 ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
1433 MAJOR(dev), MINOR(dev),
1434 ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
1435 ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
1436 if (ret)
1437 return ERR_PTR(ret);
1438
1439 bdev = blkdev_get_no_open(dev);
1440 if (!bdev)
1441 return ERR_PTR(-ENXIO);
1442 disk = bdev->bd_disk;
1443
1444 if (mode & FMODE_EXCL) {
1445 ret = bd_prepare_to_claim(bdev, holder);
1446 if (ret)
1447 goto put_blkdev;
1448 }
1449
1450 disk_block_events(disk);
1451
1452 mutex_lock(&bdev->bd_mutex);
1453 ret =__blkdev_get(bdev, mode);
1454 if (ret)
1455 goto abort_claiming;
1456 if (mode & FMODE_EXCL) {
1457 bd_finish_claiming(bdev, holder);
1458
1459
1460
1461
1462
1463
1464
1465
1466 if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1467 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1468 bdev->bd_write_holder = true;
1469 unblock_events = false;
1470 }
1471 }
1472 mutex_unlock(&bdev->bd_mutex);
1473
1474 if (unblock_events)
1475 disk_unblock_events(disk);
1476 return bdev;
1477
1478abort_claiming:
1479 if (mode & FMODE_EXCL)
1480 bd_abort_claiming(bdev, holder);
1481 mutex_unlock(&bdev->bd_mutex);
1482 disk_unblock_events(disk);
1483put_blkdev:
1484 blkdev_put_no_open(bdev);
1485 return ERR_PTR(ret);
1486}
1487EXPORT_SYMBOL(blkdev_get_by_dev);
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1507 void *holder)
1508{
1509 struct block_device *bdev;
1510 dev_t dev;
1511 int error;
1512
1513 error = lookup_bdev(path, &dev);
1514 if (error)
1515 return ERR_PTR(error);
1516
1517 bdev = blkdev_get_by_dev(dev, mode, holder);
1518 if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1519 blkdev_put(bdev, mode);
1520 return ERR_PTR(-EACCES);
1521 }
1522
1523 return bdev;
1524}
1525EXPORT_SYMBOL(blkdev_get_by_path);
1526
1527static int blkdev_open(struct inode * inode, struct file * filp)
1528{
1529 struct block_device *bdev;
1530
1531
1532
1533
1534
1535
1536
1537 filp->f_flags |= O_LARGEFILE;
1538
1539 filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
1540
1541 if (filp->f_flags & O_NDELAY)
1542 filp->f_mode |= FMODE_NDELAY;
1543 if (filp->f_flags & O_EXCL)
1544 filp->f_mode |= FMODE_EXCL;
1545 if ((filp->f_flags & O_ACCMODE) == 3)
1546 filp->f_mode |= FMODE_WRITE_IOCTL;
1547
1548 bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
1549 if (IS_ERR(bdev))
1550 return PTR_ERR(bdev);
1551 filp->f_mapping = bdev->bd_inode->i_mapping;
1552 filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
1553 return 0;
1554}
1555
1556static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1557{
1558 struct gendisk *disk = bdev->bd_disk;
1559 struct block_device *victim = NULL;
1560
1561
1562
1563
1564
1565
1566
1567
1568 if (bdev->bd_openers == 1)
1569 sync_blockdev(bdev);
1570
1571 mutex_lock_nested(&bdev->bd_mutex, for_part);
1572 if (for_part)
1573 bdev->bd_part_count--;
1574
1575 if (!--bdev->bd_openers) {
1576 WARN_ON_ONCE(bdev->bd_holders);
1577 sync_blockdev(bdev);
1578 kill_bdev(bdev);
1579 bdev_write_inode(bdev);
1580 if (bdev_is_partition(bdev))
1581 victim = bdev_whole(bdev);
1582 }
1583
1584 if (!bdev_is_partition(bdev) && disk->fops->release)
1585 disk->fops->release(disk, mode);
1586 mutex_unlock(&bdev->bd_mutex);
1587 if (victim) {
1588 __blkdev_put(victim, mode, 1);
1589 bdput(victim);
1590 }
1591}
1592
1593void blkdev_put(struct block_device *bdev, fmode_t mode)
1594{
1595 struct gendisk *disk = bdev->bd_disk;
1596
1597 mutex_lock(&bdev->bd_mutex);
1598
1599 if (mode & FMODE_EXCL) {
1600 struct block_device *whole = bdev_whole(bdev);
1601 bool bdev_free;
1602
1603
1604
1605
1606
1607
1608 spin_lock(&bdev_lock);
1609
1610 WARN_ON_ONCE(--bdev->bd_holders < 0);
1611 WARN_ON_ONCE(--whole->bd_holders < 0);
1612
1613 if ((bdev_free = !bdev->bd_holders))
1614 bdev->bd_holder = NULL;
1615 if (!whole->bd_holders)
1616 whole->bd_holder = NULL;
1617
1618 spin_unlock(&bdev_lock);
1619
1620
1621
1622
1623
1624 if (bdev_free && bdev->bd_write_holder) {
1625 disk_unblock_events(disk);
1626 bdev->bd_write_holder = false;
1627 }
1628 }
1629
1630
1631
1632
1633
1634
1635 disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
1636 mutex_unlock(&bdev->bd_mutex);
1637
1638 __blkdev_put(bdev, mode, 0);
1639 blkdev_put_no_open(bdev);
1640}
1641EXPORT_SYMBOL(blkdev_put);
1642
1643static int blkdev_close(struct inode * inode, struct file * filp)
1644{
1645 struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
1646 blkdev_put(bdev, filp->f_mode);
1647 return 0;
1648}
1649
1650static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1651{
1652 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1653 fmode_t mode = file->f_mode;
1654
1655
1656
1657
1658
1659 if (file->f_flags & O_NDELAY)
1660 mode |= FMODE_NDELAY;
1661 else
1662 mode &= ~FMODE_NDELAY;
1663
1664 return blkdev_ioctl(bdev, mode, cmd, arg);
1665}
1666
1667
1668
1669
1670
1671
1672
1673
1674ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1675{
1676 struct file *file = iocb->ki_filp;
1677 struct inode *bd_inode = bdev_file_inode(file);
1678 loff_t size = i_size_read(bd_inode);
1679 struct blk_plug plug;
1680 size_t shorted = 0;
1681 ssize_t ret;
1682
1683 if (bdev_read_only(I_BDEV(bd_inode)))
1684 return -EPERM;
1685
1686 if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
1687 return -ETXTBSY;
1688
1689 if (!iov_iter_count(from))
1690 return 0;
1691
1692 if (iocb->ki_pos >= size)
1693 return -ENOSPC;
1694
1695 if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
1696 return -EOPNOTSUPP;
1697
1698 size -= iocb->ki_pos;
1699 if (iov_iter_count(from) > size) {
1700 shorted = iov_iter_count(from) - size;
1701 iov_iter_truncate(from, size);
1702 }
1703
1704 blk_start_plug(&plug);
1705 ret = __generic_file_write_iter(iocb, from);
1706 if (ret > 0)
1707 ret = generic_write_sync(iocb, ret);
1708 iov_iter_reexpand(from, iov_iter_count(from) + shorted);
1709 blk_finish_plug(&plug);
1710 return ret;
1711}
1712EXPORT_SYMBOL_GPL(blkdev_write_iter);
1713
1714ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
1715{
1716 struct file *file = iocb->ki_filp;
1717 struct inode *bd_inode = bdev_file_inode(file);
1718 loff_t size = i_size_read(bd_inode);
1719 loff_t pos = iocb->ki_pos;
1720 size_t shorted = 0;
1721 ssize_t ret;
1722
1723 if (pos >= size)
1724 return 0;
1725
1726 size -= pos;
1727 if (iov_iter_count(to) > size) {
1728 shorted = iov_iter_count(to) - size;
1729 iov_iter_truncate(to, size);
1730 }
1731
1732 ret = generic_file_read_iter(iocb, to);
1733 iov_iter_reexpand(to, iov_iter_count(to) + shorted);
1734 return ret;
1735}
1736EXPORT_SYMBOL_GPL(blkdev_read_iter);
1737
1738
1739
1740
1741
1742static int blkdev_releasepage(struct page *page, gfp_t wait)
1743{
1744 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1745
1746 if (super && super->s_op->bdev_try_to_free_page)
1747 return super->s_op->bdev_try_to_free_page(super, page, wait);
1748
1749 return try_to_free_buffers(page);
1750}
1751
1752static int blkdev_writepages(struct address_space *mapping,
1753 struct writeback_control *wbc)
1754{
1755 return generic_writepages(mapping, wbc);
1756}
1757
1758static const struct address_space_operations def_blk_aops = {
1759 .readpage = blkdev_readpage,
1760 .readahead = blkdev_readahead,
1761 .writepage = blkdev_writepage,
1762 .write_begin = blkdev_write_begin,
1763 .write_end = blkdev_write_end,
1764 .writepages = blkdev_writepages,
1765 .releasepage = blkdev_releasepage,
1766 .direct_IO = blkdev_direct_IO,
1767 .migratepage = buffer_migrate_page_norefs,
1768 .is_dirty_writeback = buffer_check_dirty_writeback,
1769};
1770
1771#define BLKDEV_FALLOC_FL_SUPPORTED \
1772 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
1773 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
1774
1775static long blkdev_fallocate(struct file *file, int mode, loff_t start,
1776 loff_t len)
1777{
1778 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1779 loff_t end = start + len - 1;
1780 loff_t isize;
1781 int error;
1782
1783
1784 if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
1785 return -EOPNOTSUPP;
1786
1787
1788 isize = i_size_read(bdev->bd_inode);
1789 if (start >= isize)
1790 return -EINVAL;
1791 if (end >= isize) {
1792 if (mode & FALLOC_FL_KEEP_SIZE) {
1793 len = isize - start;
1794 end = start + len - 1;
1795 } else
1796 return -EINVAL;
1797 }
1798
1799
1800
1801
1802 if ((start | len) & (bdev_logical_block_size(bdev) - 1))
1803 return -EINVAL;
1804
1805
1806 error = truncate_bdev_range(bdev, file->f_mode, start, end);
1807 if (error)
1808 return error;
1809
1810 switch (mode) {
1811 case FALLOC_FL_ZERO_RANGE:
1812 case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
1813 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
1814 GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
1815 break;
1816 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
1817 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
1818 GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
1819 break;
1820 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
1821 error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
1822 GFP_KERNEL, 0);
1823 break;
1824 default:
1825 return -EOPNOTSUPP;
1826 }
1827 if (error)
1828 return error;
1829
1830
1831
1832
1833
1834
1835 return truncate_bdev_range(bdev, file->f_mode, start, end);
1836}
1837
1838const struct file_operations def_blk_fops = {
1839 .open = blkdev_open,
1840 .release = blkdev_close,
1841 .llseek = block_llseek,
1842 .read_iter = blkdev_read_iter,
1843 .write_iter = blkdev_write_iter,
1844 .iopoll = blkdev_iopoll,
1845 .mmap = generic_file_mmap,
1846 .fsync = blkdev_fsync,
1847 .unlocked_ioctl = block_ioctl,
1848#ifdef CONFIG_COMPAT
1849 .compat_ioctl = compat_blkdev_ioctl,
1850#endif
1851 .splice_read = generic_file_splice_read,
1852 .splice_write = iter_file_splice_write,
1853 .fallocate = blkdev_fallocate,
1854};
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865int lookup_bdev(const char *pathname, dev_t *dev)
1866{
1867 struct inode *inode;
1868 struct path path;
1869 int error;
1870
1871 if (!pathname || !*pathname)
1872 return -EINVAL;
1873
1874 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1875 if (error)
1876 return error;
1877
1878 inode = d_backing_inode(path.dentry);
1879 error = -ENOTBLK;
1880 if (!S_ISBLK(inode->i_mode))
1881 goto out_path_put;
1882 error = -EACCES;
1883 if (!may_open_dev(&path))
1884 goto out_path_put;
1885
1886 *dev = inode->i_rdev;
1887 error = 0;
1888out_path_put:
1889 path_put(&path);
1890 return error;
1891}
1892EXPORT_SYMBOL(lookup_bdev);
1893
1894int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1895{
1896 struct super_block *sb = get_super(bdev);
1897 int res = 0;
1898
1899 if (sb) {
1900
1901
1902
1903
1904
1905
1906 shrink_dcache_sb(sb);
1907 res = invalidate_inodes(sb, kill_dirty);
1908 drop_super(sb);
1909 }
1910 invalidate_bdev(bdev);
1911 return res;
1912}
1913EXPORT_SYMBOL(__invalidate_device);
1914
1915void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
1916{
1917 struct inode *inode, *old_inode = NULL;
1918
1919 spin_lock(&blockdev_superblock->s_inode_list_lock);
1920 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
1921 struct address_space *mapping = inode->i_mapping;
1922 struct block_device *bdev;
1923
1924 spin_lock(&inode->i_lock);
1925 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
1926 mapping->nrpages == 0) {
1927 spin_unlock(&inode->i_lock);
1928 continue;
1929 }
1930 __iget(inode);
1931 spin_unlock(&inode->i_lock);
1932 spin_unlock(&blockdev_superblock->s_inode_list_lock);
1933
1934
1935
1936
1937
1938
1939
1940
1941 iput(old_inode);
1942 old_inode = inode;
1943 bdev = I_BDEV(inode);
1944
1945 mutex_lock(&bdev->bd_mutex);
1946 if (bdev->bd_openers)
1947 func(bdev, arg);
1948 mutex_unlock(&bdev->bd_mutex);
1949
1950 spin_lock(&blockdev_superblock->s_inode_list_lock);
1951 }
1952 spin_unlock(&blockdev_superblock->s_inode_list_lock);
1953 iput(old_inode);
1954}
1955