1
2
3
4
5#include <linux/mm.h>
6#include <linux/swap.h>
7#include <linux/bio.h>
8#include <linux/blkdev.h>
9#include <linux/uio.h>
10#include <linux/iocontext.h>
11#include <linux/slab.h>
12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/export.h>
15#include <linux/mempool.h>
16#include <linux/workqueue.h>
17#include <linux/cgroup.h>
18#include <linux/blk-cgroup.h>
19#include <linux/highmem.h>
20#include <linux/sched/sysctl.h>
21#include <linux/blk-crypto.h>
22#include <linux/xarray.h>
23
24#include <trace/events/block.h>
25#include "blk.h"
26#include "blk-rq-qos.h"
27
28static struct biovec_slab {
29 int nr_vecs;
30 char *name;
31 struct kmem_cache *slab;
32} bvec_slabs[] __read_mostly = {
33 { .nr_vecs = 16, .name = "biovec-16" },
34 { .nr_vecs = 64, .name = "biovec-64" },
35 { .nr_vecs = 128, .name = "biovec-128" },
36 { .nr_vecs = BIO_MAX_VECS, .name = "biovec-max" },
37};
38
39static struct biovec_slab *biovec_slab(unsigned short nr_vecs)
40{
41 switch (nr_vecs) {
42
43 case 5 ... 16:
44 return &bvec_slabs[0];
45 case 17 ... 64:
46 return &bvec_slabs[1];
47 case 65 ... 128:
48 return &bvec_slabs[2];
49 case 129 ... BIO_MAX_VECS:
50 return &bvec_slabs[3];
51 default:
52 BUG();
53 return NULL;
54 }
55}
56
57
58
59
60
61struct bio_set fs_bio_set;
62EXPORT_SYMBOL(fs_bio_set);
63
64
65
66
67struct bio_slab {
68 struct kmem_cache *slab;
69 unsigned int slab_ref;
70 unsigned int slab_size;
71 char name[8];
72};
73static DEFINE_MUTEX(bio_slab_lock);
74static DEFINE_XARRAY(bio_slabs);
75
76static struct bio_slab *create_bio_slab(unsigned int size)
77{
78 struct bio_slab *bslab = kzalloc(sizeof(*bslab), GFP_KERNEL);
79
80 if (!bslab)
81 return NULL;
82
83 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", size);
84 bslab->slab = kmem_cache_create(bslab->name, size,
85 ARCH_KMALLOC_MINALIGN, SLAB_HWCACHE_ALIGN, NULL);
86 if (!bslab->slab)
87 goto fail_alloc_slab;
88
89 bslab->slab_ref = 1;
90 bslab->slab_size = size;
91
92 if (!xa_err(xa_store(&bio_slabs, size, bslab, GFP_KERNEL)))
93 return bslab;
94
95 kmem_cache_destroy(bslab->slab);
96
97fail_alloc_slab:
98 kfree(bslab);
99 return NULL;
100}
101
102static inline unsigned int bs_bio_slab_size(struct bio_set *bs)
103{
104 return bs->front_pad + sizeof(struct bio) + bs->back_pad;
105}
106
107static struct kmem_cache *bio_find_or_create_slab(struct bio_set *bs)
108{
109 unsigned int size = bs_bio_slab_size(bs);
110 struct bio_slab *bslab;
111
112 mutex_lock(&bio_slab_lock);
113 bslab = xa_load(&bio_slabs, size);
114 if (bslab)
115 bslab->slab_ref++;
116 else
117 bslab = create_bio_slab(size);
118 mutex_unlock(&bio_slab_lock);
119
120 if (bslab)
121 return bslab->slab;
122 return NULL;
123}
124
125static void bio_put_slab(struct bio_set *bs)
126{
127 struct bio_slab *bslab = NULL;
128 unsigned int slab_size = bs_bio_slab_size(bs);
129
130 mutex_lock(&bio_slab_lock);
131
132 bslab = xa_load(&bio_slabs, slab_size);
133 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
134 goto out;
135
136 WARN_ON_ONCE(bslab->slab != bs->bio_slab);
137
138 WARN_ON(!bslab->slab_ref);
139
140 if (--bslab->slab_ref)
141 goto out;
142
143 xa_erase(&bio_slabs, slab_size);
144
145 kmem_cache_destroy(bslab->slab);
146 kfree(bslab);
147
148out:
149 mutex_unlock(&bio_slab_lock);
150}
151
152void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs)
153{
154 BIO_BUG_ON(nr_vecs > BIO_MAX_VECS);
155
156 if (nr_vecs == BIO_MAX_VECS)
157 mempool_free(bv, pool);
158 else if (nr_vecs > BIO_INLINE_VECS)
159 kmem_cache_free(biovec_slab(nr_vecs)->slab, bv);
160}
161
162
163
164
165
166static inline gfp_t bvec_alloc_gfp(gfp_t gfp)
167{
168 return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO)) |
169 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
170}
171
172struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
173 gfp_t gfp_mask)
174{
175 struct biovec_slab *bvs = biovec_slab(*nr_vecs);
176
177 if (WARN_ON_ONCE(!bvs))
178 return NULL;
179
180
181
182
183
184 *nr_vecs = bvs->nr_vecs;
185
186
187
188
189
190
191 if (*nr_vecs < BIO_MAX_VECS) {
192 struct bio_vec *bvl;
193
194 bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask));
195 if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM))
196 return bvl;
197 *nr_vecs = BIO_MAX_VECS;
198 }
199
200 return mempool_alloc(pool, gfp_mask);
201}
202
203void bio_uninit(struct bio *bio)
204{
205#ifdef CONFIG_BLK_CGROUP
206 if (bio->bi_blkg) {
207 blkg_put(bio->bi_blkg);
208 bio->bi_blkg = NULL;
209 }
210#endif
211 if (bio_integrity(bio))
212 bio_integrity_free(bio);
213
214 bio_crypt_free_ctx(bio);
215}
216EXPORT_SYMBOL(bio_uninit);
217
218static void bio_free(struct bio *bio)
219{
220 struct bio_set *bs = bio->bi_pool;
221 void *p;
222
223 bio_uninit(bio);
224
225 if (bs) {
226 bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
227
228
229
230
231 p = bio;
232 p -= bs->front_pad;
233
234 mempool_free(p, &bs->bio_pool);
235 } else {
236
237 kfree(bio);
238 }
239}
240
241
242
243
244
245
246void bio_init(struct bio *bio, struct bio_vec *table,
247 unsigned short max_vecs)
248{
249 memset(bio, 0, sizeof(*bio));
250 atomic_set(&bio->__bi_remaining, 1);
251 atomic_set(&bio->__bi_cnt, 1);
252
253 bio->bi_io_vec = table;
254 bio->bi_max_vecs = max_vecs;
255}
256EXPORT_SYMBOL(bio_init);
257
258
259
260
261
262
263
264
265
266
267
268void bio_reset(struct bio *bio)
269{
270 bio_uninit(bio);
271 memset(bio, 0, BIO_RESET_BYTES);
272 atomic_set(&bio->__bi_remaining, 1);
273}
274EXPORT_SYMBOL(bio_reset);
275
276static struct bio *__bio_chain_endio(struct bio *bio)
277{
278 struct bio *parent = bio->bi_private;
279
280 if (bio->bi_status && !parent->bi_status)
281 parent->bi_status = bio->bi_status;
282 bio_put(bio);
283 return parent;
284}
285
286static void bio_chain_endio(struct bio *bio)
287{
288 bio_endio(__bio_chain_endio(bio));
289}
290
291
292
293
294
295
296
297
298
299
300
301
302void bio_chain(struct bio *bio, struct bio *parent)
303{
304 BUG_ON(bio->bi_private || bio->bi_end_io);
305
306 bio->bi_private = parent;
307 bio->bi_end_io = bio_chain_endio;
308 bio_inc_remaining(parent);
309}
310EXPORT_SYMBOL(bio_chain);
311
312static void bio_alloc_rescue(struct work_struct *work)
313{
314 struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
315 struct bio *bio;
316
317 while (1) {
318 spin_lock(&bs->rescue_lock);
319 bio = bio_list_pop(&bs->rescue_list);
320 spin_unlock(&bs->rescue_lock);
321
322 if (!bio)
323 break;
324
325 submit_bio_noacct(bio);
326 }
327}
328
329static void punt_bios_to_rescuer(struct bio_set *bs)
330{
331 struct bio_list punt, nopunt;
332 struct bio *bio;
333
334 if (WARN_ON_ONCE(!bs->rescue_workqueue))
335 return;
336
337
338
339
340
341
342
343
344
345
346
347 bio_list_init(&punt);
348 bio_list_init(&nopunt);
349
350 while ((bio = bio_list_pop(¤t->bio_list[0])))
351 bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
352 current->bio_list[0] = nopunt;
353
354 bio_list_init(&nopunt);
355 while ((bio = bio_list_pop(¤t->bio_list[1])))
356 bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
357 current->bio_list[1] = nopunt;
358
359 spin_lock(&bs->rescue_lock);
360 bio_list_merge(&bs->rescue_list, &punt);
361 spin_unlock(&bs->rescue_lock);
362
363 queue_work(bs->rescue_workqueue, &bs->rescue_work);
364}
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs,
399 struct bio_set *bs)
400{
401 gfp_t saved_gfp = gfp_mask;
402 struct bio *bio;
403 void *p;
404
405
406 if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_iovecs > 0))
407 return NULL;
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427 if (current->bio_list &&
428 (!bio_list_empty(¤t->bio_list[0]) ||
429 !bio_list_empty(¤t->bio_list[1])) &&
430 bs->rescue_workqueue)
431 gfp_mask &= ~__GFP_DIRECT_RECLAIM;
432
433 p = mempool_alloc(&bs->bio_pool, gfp_mask);
434 if (!p && gfp_mask != saved_gfp) {
435 punt_bios_to_rescuer(bs);
436 gfp_mask = saved_gfp;
437 p = mempool_alloc(&bs->bio_pool, gfp_mask);
438 }
439 if (unlikely(!p))
440 return NULL;
441
442 bio = p + bs->front_pad;
443 if (nr_iovecs > BIO_INLINE_VECS) {
444 struct bio_vec *bvl = NULL;
445
446 bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
447 if (!bvl && gfp_mask != saved_gfp) {
448 punt_bios_to_rescuer(bs);
449 gfp_mask = saved_gfp;
450 bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
451 }
452 if (unlikely(!bvl))
453 goto err_free;
454
455 bio_init(bio, bvl, nr_iovecs);
456 } else if (nr_iovecs) {
457 bio_init(bio, bio->bi_inline_vecs, BIO_INLINE_VECS);
458 } else {
459 bio_init(bio, NULL, 0);
460 }
461
462 bio->bi_pool = bs;
463 return bio;
464
465err_free:
466 mempool_free(p, &bs->bio_pool);
467 return NULL;
468}
469EXPORT_SYMBOL(bio_alloc_bioset);
470
471
472
473
474
475
476
477
478
479
480struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
481{
482 struct bio *bio;
483
484 if (nr_iovecs > UIO_MAXIOV)
485 return NULL;
486
487 bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
488 if (unlikely(!bio))
489 return NULL;
490 bio_init(bio, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs);
491 bio->bi_pool = NULL;
492 return bio;
493}
494EXPORT_SYMBOL(bio_kmalloc);
495
496void zero_fill_bio(struct bio *bio)
497{
498 unsigned long flags;
499 struct bio_vec bv;
500 struct bvec_iter iter;
501
502 bio_for_each_segment(bv, bio, iter) {
503 char *data = bvec_kmap_irq(&bv, &flags);
504 memset(data, 0, bv.bv_len);
505 flush_dcache_page(bv.bv_page);
506 bvec_kunmap_irq(data, &flags);
507 }
508}
509EXPORT_SYMBOL(zero_fill_bio);
510
511
512
513
514
515
516
517
518
519
520
521void bio_truncate(struct bio *bio, unsigned new_size)
522{
523 struct bio_vec bv;
524 struct bvec_iter iter;
525 unsigned int done = 0;
526 bool truncated = false;
527
528 if (new_size >= bio->bi_iter.bi_size)
529 return;
530
531 if (bio_op(bio) != REQ_OP_READ)
532 goto exit;
533
534 bio_for_each_segment(bv, bio, iter) {
535 if (done + bv.bv_len > new_size) {
536 unsigned offset;
537
538 if (!truncated)
539 offset = new_size - done;
540 else
541 offset = 0;
542 zero_user(bv.bv_page, offset, bv.bv_len - offset);
543 truncated = true;
544 }
545 done += bv.bv_len;
546 }
547
548 exit:
549
550
551
552
553
554
555
556
557 bio->bi_iter.bi_size = new_size;
558}
559
560
561
562
563
564
565
566
567
568
569
570
571
572void guard_bio_eod(struct bio *bio)
573{
574 sector_t maxsector = bdev_nr_sectors(bio->bi_bdev);
575
576 if (!maxsector)
577 return;
578
579
580
581
582
583
584 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
585 return;
586
587 maxsector -= bio->bi_iter.bi_sector;
588 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
589 return;
590
591 bio_truncate(bio, maxsector << 9);
592}
593
594
595
596
597
598
599
600
601
602void bio_put(struct bio *bio)
603{
604 if (!bio_flagged(bio, BIO_REFFED))
605 bio_free(bio);
606 else {
607 BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
608
609
610
611
612 if (atomic_dec_and_test(&bio->__bi_cnt))
613 bio_free(bio);
614 }
615}
616EXPORT_SYMBOL(bio_put);
617
618
619
620
621
622
623
624
625
626
627
628
629void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
630{
631 WARN_ON_ONCE(bio->bi_pool && bio->bi_max_vecs);
632
633
634
635
636
637 bio->bi_bdev = bio_src->bi_bdev;
638 bio_set_flag(bio, BIO_CLONED);
639 if (bio_flagged(bio_src, BIO_THROTTLED))
640 bio_set_flag(bio, BIO_THROTTLED);
641 if (bio_flagged(bio_src, BIO_REMAPPED))
642 bio_set_flag(bio, BIO_REMAPPED);
643 bio->bi_opf = bio_src->bi_opf;
644 bio->bi_ioprio = bio_src->bi_ioprio;
645 bio->bi_write_hint = bio_src->bi_write_hint;
646 bio->bi_iter = bio_src->bi_iter;
647 bio->bi_io_vec = bio_src->bi_io_vec;
648
649 bio_clone_blkg_association(bio, bio_src);
650 blkcg_bio_issue_init(bio);
651}
652EXPORT_SYMBOL(__bio_clone_fast);
653
654
655
656
657
658
659
660
661
662struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
663{
664 struct bio *b;
665
666 b = bio_alloc_bioset(gfp_mask, 0, bs);
667 if (!b)
668 return NULL;
669
670 __bio_clone_fast(b, bio);
671
672 if (bio_crypt_clone(b, bio, gfp_mask) < 0)
673 goto err_put;
674
675 if (bio_integrity(bio) &&
676 bio_integrity_clone(b, bio, gfp_mask) < 0)
677 goto err_put;
678
679 return b;
680
681err_put:
682 bio_put(b);
683 return NULL;
684}
685EXPORT_SYMBOL(bio_clone_fast);
686
687const char *bio_devname(struct bio *bio, char *buf)
688{
689 return bdevname(bio->bi_bdev, buf);
690}
691EXPORT_SYMBOL(bio_devname);
692
693static inline bool page_is_mergeable(const struct bio_vec *bv,
694 struct page *page, unsigned int len, unsigned int off,
695 bool *same_page)
696{
697 size_t bv_end = bv->bv_offset + bv->bv_len;
698 phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
699 phys_addr_t page_addr = page_to_phys(page);
700
701 if (vec_end_addr + 1 != page_addr + off)
702 return false;
703 if (xen_domain() && !xen_biovec_phys_mergeable(bv, page))
704 return false;
705
706 *same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
707 if (*same_page)
708 return true;
709 return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
710}
711
712
713
714
715
716
717static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
718 struct page *page, unsigned len,
719 unsigned offset, bool *same_page)
720{
721 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
722 unsigned long mask = queue_segment_boundary(q);
723 phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
724 phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
725
726 if ((addr1 | mask) != (addr2 | mask))
727 return false;
728 if (bv->bv_len + len > queue_max_segment_size(q))
729 return false;
730 return __bio_try_merge_page(bio, page, len, offset, same_page);
731}
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746int bio_add_hw_page(struct request_queue *q, struct bio *bio,
747 struct page *page, unsigned int len, unsigned int offset,
748 unsigned int max_sectors, bool *same_page)
749{
750 struct bio_vec *bvec;
751
752 if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
753 return 0;
754
755 if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
756 return 0;
757
758 if (bio->bi_vcnt > 0) {
759 if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
760 return len;
761
762
763
764
765
766 bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
767 if (bvec_gap_to_prev(q, bvec, offset))
768 return 0;
769 }
770
771 if (bio_full(bio, len))
772 return 0;
773
774 if (bio->bi_vcnt >= queue_max_segments(q))
775 return 0;
776
777 bvec = &bio->bi_io_vec[bio->bi_vcnt];
778 bvec->bv_page = page;
779 bvec->bv_len = len;
780 bvec->bv_offset = offset;
781 bio->bi_vcnt++;
782 bio->bi_iter.bi_size += len;
783 return len;
784}
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801int bio_add_pc_page(struct request_queue *q, struct bio *bio,
802 struct page *page, unsigned int len, unsigned int offset)
803{
804 bool same_page = false;
805 return bio_add_hw_page(q, bio, page, len, offset,
806 queue_max_hw_sectors(q), &same_page);
807}
808EXPORT_SYMBOL(bio_add_pc_page);
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826int bio_add_zone_append_page(struct bio *bio, struct page *page,
827 unsigned int len, unsigned int offset)
828{
829 struct request_queue *q = bio->bi_bdev->bd_disk->queue;
830 bool same_page = false;
831
832 if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND))
833 return 0;
834
835 if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
836 return 0;
837
838 return bio_add_hw_page(q, bio, page, len, offset,
839 queue_max_zone_append_sectors(q), &same_page);
840}
841EXPORT_SYMBOL_GPL(bio_add_zone_append_page);
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859bool __bio_try_merge_page(struct bio *bio, struct page *page,
860 unsigned int len, unsigned int off, bool *same_page)
861{
862 if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
863 return false;
864
865 if (bio->bi_vcnt > 0) {
866 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
867
868 if (page_is_mergeable(bv, page, len, off, same_page)) {
869 if (bio->bi_iter.bi_size > UINT_MAX - len) {
870 *same_page = false;
871 return false;
872 }
873 bv->bv_len += len;
874 bio->bi_iter.bi_size += len;
875 return true;
876 }
877 }
878 return false;
879}
880EXPORT_SYMBOL_GPL(__bio_try_merge_page);
881
882
883
884
885
886
887
888
889
890
891
892void __bio_add_page(struct bio *bio, struct page *page,
893 unsigned int len, unsigned int off)
894{
895 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
896
897 WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
898 WARN_ON_ONCE(bio_full(bio, len));
899
900 bv->bv_page = page;
901 bv->bv_offset = off;
902 bv->bv_len = len;
903
904 bio->bi_iter.bi_size += len;
905 bio->bi_vcnt++;
906
907 if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
908 bio_set_flag(bio, BIO_WORKINGSET);
909}
910EXPORT_SYMBOL_GPL(__bio_add_page);
911
912
913
914
915
916
917
918
919
920
921
922int bio_add_page(struct bio *bio, struct page *page,
923 unsigned int len, unsigned int offset)
924{
925 bool same_page = false;
926
927 if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
928 if (bio_full(bio, len))
929 return 0;
930 __bio_add_page(bio, page, len, offset);
931 }
932 return len;
933}
934EXPORT_SYMBOL(bio_add_page);
935
936void bio_release_pages(struct bio *bio, bool mark_dirty)
937{
938 struct bvec_iter_all iter_all;
939 struct bio_vec *bvec;
940
941 if (bio_flagged(bio, BIO_NO_PAGE_REF))
942 return;
943
944 bio_for_each_segment_all(bvec, bio, iter_all) {
945 if (mark_dirty && !PageCompound(bvec->bv_page))
946 set_page_dirty_lock(bvec->bv_page);
947 put_page(bvec->bv_page);
948 }
949}
950EXPORT_SYMBOL_GPL(bio_release_pages);
951
952static void __bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
953{
954 WARN_ON_ONCE(bio->bi_max_vecs);
955
956 bio->bi_vcnt = iter->nr_segs;
957 bio->bi_io_vec = (struct bio_vec *)iter->bvec;
958 bio->bi_iter.bi_bvec_done = iter->iov_offset;
959 bio->bi_iter.bi_size = iter->count;
960 bio_set_flag(bio, BIO_NO_PAGE_REF);
961 bio_set_flag(bio, BIO_CLONED);
962}
963
964static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
965{
966 __bio_iov_bvec_set(bio, iter);
967 iov_iter_advance(iter, iter->count);
968 return 0;
969}
970
971static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter)
972{
973 struct request_queue *q = bio->bi_bdev->bd_disk->queue;
974 struct iov_iter i = *iter;
975
976 iov_iter_truncate(&i, queue_max_zone_append_sectors(q) << 9);
977 __bio_iov_bvec_set(bio, &i);
978 iov_iter_advance(iter, i.count);
979 return 0;
980}
981
982static void bio_put_pages(struct page **pages, size_t size, size_t off)
983{
984 size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
985
986 for (i = 0; i < nr; i++)
987 put_page(pages[i]);
988}
989
990#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
991
992
993
994
995
996
997
998
999
1000
1001
1002static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
1003{
1004 unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
1005 unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
1006 struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
1007 struct page **pages = (struct page **)bv;
1008 bool same_page = false;
1009 ssize_t size, left;
1010 unsigned len, i;
1011 size_t offset;
1012
1013
1014
1015
1016
1017
1018 BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
1019 pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
1020
1021 size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
1022 if (unlikely(size <= 0))
1023 return size ? size : -EFAULT;
1024
1025 for (left = size, i = 0; left > 0; left -= len, i++) {
1026 struct page *page = pages[i];
1027
1028 len = min_t(size_t, PAGE_SIZE - offset, left);
1029
1030 if (__bio_try_merge_page(bio, page, len, offset, &same_page)) {
1031 if (same_page)
1032 put_page(page);
1033 } else {
1034 if (WARN_ON_ONCE(bio_full(bio, len))) {
1035 bio_put_pages(pages + i, left, offset);
1036 return -EINVAL;
1037 }
1038 __bio_add_page(bio, page, len, offset);
1039 }
1040 offset = 0;
1041 }
1042
1043 iov_iter_advance(iter, size);
1044 return 0;
1045}
1046
1047static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
1048{
1049 unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
1050 unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
1051 struct request_queue *q = bio->bi_bdev->bd_disk->queue;
1052 unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
1053 struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
1054 struct page **pages = (struct page **)bv;
1055 ssize_t size, left;
1056 unsigned len, i;
1057 size_t offset;
1058 int ret = 0;
1059
1060 if (WARN_ON_ONCE(!max_append_sectors))
1061 return 0;
1062
1063
1064
1065
1066
1067
1068 BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
1069 pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
1070
1071 size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
1072 if (unlikely(size <= 0))
1073 return size ? size : -EFAULT;
1074
1075 for (left = size, i = 0; left > 0; left -= len, i++) {
1076 struct page *page = pages[i];
1077 bool same_page = false;
1078
1079 len = min_t(size_t, PAGE_SIZE - offset, left);
1080 if (bio_add_hw_page(q, bio, page, len, offset,
1081 max_append_sectors, &same_page) != len) {
1082 bio_put_pages(pages + i, left, offset);
1083 ret = -EINVAL;
1084 break;
1085 }
1086 if (same_page)
1087 put_page(page);
1088 offset = 0;
1089 }
1090
1091 iov_iter_advance(iter, size - left);
1092 return ret;
1093}
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
1119{
1120 int ret = 0;
1121
1122 if (iov_iter_is_bvec(iter)) {
1123 if (bio_op(bio) == REQ_OP_ZONE_APPEND)
1124 return bio_iov_bvec_set_append(bio, iter);
1125 return bio_iov_bvec_set(bio, iter);
1126 }
1127
1128 do {
1129 if (bio_op(bio) == REQ_OP_ZONE_APPEND)
1130 ret = __bio_iov_append_get_pages(bio, iter);
1131 else
1132 ret = __bio_iov_iter_get_pages(bio, iter);
1133 } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
1134
1135
1136 bio_clear_flag(bio, BIO_WORKINGSET);
1137 return bio->bi_vcnt ? 0 : ret;
1138}
1139EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
1140
1141static void submit_bio_wait_endio(struct bio *bio)
1142{
1143 complete(bio->bi_private);
1144}
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157int submit_bio_wait(struct bio *bio)
1158{
1159 DECLARE_COMPLETION_ONSTACK_MAP(done,
1160 bio->bi_bdev->bd_disk->lockdep_map);
1161 unsigned long hang_check;
1162
1163 bio->bi_private = &done;
1164 bio->bi_end_io = submit_bio_wait_endio;
1165 bio->bi_opf |= REQ_SYNC;
1166 submit_bio(bio);
1167
1168
1169 hang_check = sysctl_hung_task_timeout_secs;
1170 if (hang_check)
1171 while (!wait_for_completion_io_timeout(&done,
1172 hang_check * (HZ/2)))
1173 ;
1174 else
1175 wait_for_completion_io(&done);
1176
1177 return blk_status_to_errno(bio->bi_status);
1178}
1179EXPORT_SYMBOL(submit_bio_wait);
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192void bio_advance(struct bio *bio, unsigned bytes)
1193{
1194 if (bio_integrity(bio))
1195 bio_integrity_advance(bio, bytes);
1196
1197 bio_crypt_advance(bio, bytes);
1198 bio_advance_iter(bio, &bio->bi_iter, bytes);
1199}
1200EXPORT_SYMBOL(bio_advance);
1201
1202void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
1203 struct bio *src, struct bvec_iter *src_iter)
1204{
1205 struct bio_vec src_bv, dst_bv;
1206 void *src_p, *dst_p;
1207 unsigned bytes;
1208
1209 while (src_iter->bi_size && dst_iter->bi_size) {
1210 src_bv = bio_iter_iovec(src, *src_iter);
1211 dst_bv = bio_iter_iovec(dst, *dst_iter);
1212
1213 bytes = min(src_bv.bv_len, dst_bv.bv_len);
1214
1215 src_p = kmap_atomic(src_bv.bv_page);
1216 dst_p = kmap_atomic(dst_bv.bv_page);
1217
1218 memcpy(dst_p + dst_bv.bv_offset,
1219 src_p + src_bv.bv_offset,
1220 bytes);
1221
1222 kunmap_atomic(dst_p);
1223 kunmap_atomic(src_p);
1224
1225 flush_dcache_page(dst_bv.bv_page);
1226
1227 bio_advance_iter_single(src, src_iter, bytes);
1228 bio_advance_iter_single(dst, dst_iter, bytes);
1229 }
1230}
1231EXPORT_SYMBOL(bio_copy_data_iter);
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241void bio_copy_data(struct bio *dst, struct bio *src)
1242{
1243 struct bvec_iter src_iter = src->bi_iter;
1244 struct bvec_iter dst_iter = dst->bi_iter;
1245
1246 bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
1247}
1248EXPORT_SYMBOL(bio_copy_data);
1249
1250void bio_free_pages(struct bio *bio)
1251{
1252 struct bio_vec *bvec;
1253 struct bvec_iter_all iter_all;
1254
1255 bio_for_each_segment_all(bvec, bio, iter_all)
1256 __free_page(bvec->bv_page);
1257}
1258EXPORT_SYMBOL(bio_free_pages);
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289void bio_set_pages_dirty(struct bio *bio)
1290{
1291 struct bio_vec *bvec;
1292 struct bvec_iter_all iter_all;
1293
1294 bio_for_each_segment_all(bvec, bio, iter_all) {
1295 if (!PageCompound(bvec->bv_page))
1296 set_page_dirty_lock(bvec->bv_page);
1297 }
1298}
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311static void bio_dirty_fn(struct work_struct *work);
1312
1313static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
1314static DEFINE_SPINLOCK(bio_dirty_lock);
1315static struct bio *bio_dirty_list;
1316
1317
1318
1319
1320static void bio_dirty_fn(struct work_struct *work)
1321{
1322 struct bio *bio, *next;
1323
1324 spin_lock_irq(&bio_dirty_lock);
1325 next = bio_dirty_list;
1326 bio_dirty_list = NULL;
1327 spin_unlock_irq(&bio_dirty_lock);
1328
1329 while ((bio = next) != NULL) {
1330 next = bio->bi_private;
1331
1332 bio_release_pages(bio, true);
1333 bio_put(bio);
1334 }
1335}
1336
1337void bio_check_pages_dirty(struct bio *bio)
1338{
1339 struct bio_vec *bvec;
1340 unsigned long flags;
1341 struct bvec_iter_all iter_all;
1342
1343 bio_for_each_segment_all(bvec, bio, iter_all) {
1344 if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
1345 goto defer;
1346 }
1347
1348 bio_release_pages(bio, false);
1349 bio_put(bio);
1350 return;
1351defer:
1352 spin_lock_irqsave(&bio_dirty_lock, flags);
1353 bio->bi_private = bio_dirty_list;
1354 bio_dirty_list = bio;
1355 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1356 schedule_work(&bio_dirty_work);
1357}
1358
1359static inline bool bio_remaining_done(struct bio *bio)
1360{
1361
1362
1363
1364
1365 if (!bio_flagged(bio, BIO_CHAIN))
1366 return true;
1367
1368 BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
1369
1370 if (atomic_dec_and_test(&bio->__bi_remaining)) {
1371 bio_clear_flag(bio, BIO_CHAIN);
1372 return true;
1373 }
1374
1375 return false;
1376}
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391void bio_endio(struct bio *bio)
1392{
1393again:
1394 if (!bio_remaining_done(bio))
1395 return;
1396 if (!bio_integrity_endio(bio))
1397 return;
1398
1399 if (bio->bi_bdev)
1400 rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio);
1401
1402 if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
1403 trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio);
1404 bio_clear_flag(bio, BIO_TRACE_COMPLETION);
1405 }
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415 if (bio->bi_end_io == bio_chain_endio) {
1416 bio = __bio_chain_endio(bio);
1417 goto again;
1418 }
1419
1420 blk_throtl_bio_endio(bio);
1421
1422 bio_uninit(bio);
1423 if (bio->bi_end_io)
1424 bio->bi_end_io(bio);
1425}
1426EXPORT_SYMBOL(bio_endio);
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442struct bio *bio_split(struct bio *bio, int sectors,
1443 gfp_t gfp, struct bio_set *bs)
1444{
1445 struct bio *split;
1446
1447 BUG_ON(sectors <= 0);
1448 BUG_ON(sectors >= bio_sectors(bio));
1449
1450
1451 if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
1452 return NULL;
1453
1454 split = bio_clone_fast(bio, gfp, bs);
1455 if (!split)
1456 return NULL;
1457
1458 split->bi_iter.bi_size = sectors << 9;
1459
1460 if (bio_integrity(split))
1461 bio_integrity_trim(split);
1462
1463 bio_advance(bio, split->bi_iter.bi_size);
1464
1465 if (bio_flagged(bio, BIO_TRACE_COMPLETION))
1466 bio_set_flag(split, BIO_TRACE_COMPLETION);
1467
1468 return split;
1469}
1470EXPORT_SYMBOL(bio_split);
1471
1472
1473
1474
1475
1476
1477
1478void bio_trim(struct bio *bio, int offset, int size)
1479{
1480
1481
1482
1483
1484 size <<= 9;
1485 if (offset == 0 && size == bio->bi_iter.bi_size)
1486 return;
1487
1488 bio_advance(bio, offset << 9);
1489 bio->bi_iter.bi_size = size;
1490
1491 if (bio_integrity(bio))
1492 bio_integrity_trim(bio);
1493
1494}
1495EXPORT_SYMBOL_GPL(bio_trim);
1496
1497
1498
1499
1500
1501int biovec_init_pool(mempool_t *pool, int pool_entries)
1502{
1503 struct biovec_slab *bp = bvec_slabs + ARRAY_SIZE(bvec_slabs) - 1;
1504
1505 return mempool_init_slab_pool(pool, pool_entries, bp->slab);
1506}
1507
1508
1509
1510
1511
1512
1513
1514void bioset_exit(struct bio_set *bs)
1515{
1516 if (bs->rescue_workqueue)
1517 destroy_workqueue(bs->rescue_workqueue);
1518 bs->rescue_workqueue = NULL;
1519
1520 mempool_exit(&bs->bio_pool);
1521 mempool_exit(&bs->bvec_pool);
1522
1523 bioset_integrity_free(bs);
1524 if (bs->bio_slab)
1525 bio_put_slab(bs);
1526 bs->bio_slab = NULL;
1527}
1528EXPORT_SYMBOL(bioset_exit);
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551int bioset_init(struct bio_set *bs,
1552 unsigned int pool_size,
1553 unsigned int front_pad,
1554 int flags)
1555{
1556 bs->front_pad = front_pad;
1557 if (flags & BIOSET_NEED_BVECS)
1558 bs->back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1559 else
1560 bs->back_pad = 0;
1561
1562 spin_lock_init(&bs->rescue_lock);
1563 bio_list_init(&bs->rescue_list);
1564 INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
1565
1566 bs->bio_slab = bio_find_or_create_slab(bs);
1567 if (!bs->bio_slab)
1568 return -ENOMEM;
1569
1570 if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))
1571 goto bad;
1572
1573 if ((flags & BIOSET_NEED_BVECS) &&
1574 biovec_init_pool(&bs->bvec_pool, pool_size))
1575 goto bad;
1576
1577 if (!(flags & BIOSET_NEED_RESCUER))
1578 return 0;
1579
1580 bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
1581 if (!bs->rescue_workqueue)
1582 goto bad;
1583
1584 return 0;
1585bad:
1586 bioset_exit(bs);
1587 return -ENOMEM;
1588}
1589EXPORT_SYMBOL(bioset_init);
1590
1591
1592
1593
1594
1595int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
1596{
1597 int flags;
1598
1599 flags = 0;
1600 if (src->bvec_pool.min_nr)
1601 flags |= BIOSET_NEED_BVECS;
1602 if (src->rescue_workqueue)
1603 flags |= BIOSET_NEED_RESCUER;
1604
1605 return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);
1606}
1607EXPORT_SYMBOL(bioset_init_from_src);
1608
1609static int __init init_bio(void)
1610{
1611 int i;
1612
1613 bio_integrity_init();
1614
1615 for (i = 0; i < ARRAY_SIZE(bvec_slabs); i++) {
1616 struct biovec_slab *bvs = bvec_slabs + i;
1617
1618 bvs->slab = kmem_cache_create(bvs->name,
1619 bvs->nr_vecs * sizeof(struct bio_vec), 0,
1620 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
1621 }
1622
1623 if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
1624 panic("bio: can't allocate bios\n");
1625
1626 if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
1627 panic("bio: can't create integrity pool\n");
1628
1629 return 0;
1630}
1631subsys_initcall(init_bio);
1632