1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/bio.h>
21#include <linux/blkdev.h>
22#include <linux/iocontext.h>
23#include <linux/slab.h>
24#include <linux/init.h>
25#include <linux/kernel.h>
26#include <linux/export.h>
27#include <linux/mempool.h>
28#include <linux/workqueue.h>
29#include <linux/cgroup.h>
30#include <scsi/sg.h>
31
32#include <trace/events/block.h>
33
34
35
36
37
38#define BIO_INLINE_VECS 4
39
40static mempool_t *bio_split_pool __read_mostly;
41
42
43
44
45
46
47#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
48static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
49 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
50};
51#undef BV
52
53
54
55
56
57struct bio_set *fs_bio_set;
58EXPORT_SYMBOL(fs_bio_set);
59
60
61
62
63struct bio_slab {
64 struct kmem_cache *slab;
65 unsigned int slab_ref;
66 unsigned int slab_size;
67 char name[8];
68};
69static DEFINE_MUTEX(bio_slab_lock);
70static struct bio_slab *bio_slabs;
71static unsigned int bio_slab_nr, bio_slab_max;
72
73static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
74{
75 unsigned int sz = sizeof(struct bio) + extra_size;
76 struct kmem_cache *slab = NULL;
77 struct bio_slab *bslab, *new_bio_slabs;
78 unsigned int new_bio_slab_max;
79 unsigned int i, entry = -1;
80
81 mutex_lock(&bio_slab_lock);
82
83 i = 0;
84 while (i < bio_slab_nr) {
85 bslab = &bio_slabs[i];
86
87 if (!bslab->slab && entry == -1)
88 entry = i;
89 else if (bslab->slab_size == sz) {
90 slab = bslab->slab;
91 bslab->slab_ref++;
92 break;
93 }
94 i++;
95 }
96
97 if (slab)
98 goto out_unlock;
99
100 if (bio_slab_nr == bio_slab_max && entry == -1) {
101 new_bio_slab_max = bio_slab_max << 1;
102 new_bio_slabs = krealloc(bio_slabs,
103 new_bio_slab_max * sizeof(struct bio_slab),
104 GFP_KERNEL);
105 if (!new_bio_slabs)
106 goto out_unlock;
107 bio_slab_max = new_bio_slab_max;
108 bio_slabs = new_bio_slabs;
109 }
110 if (entry == -1)
111 entry = bio_slab_nr++;
112
113 bslab = &bio_slabs[entry];
114
115 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
116 slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
117 if (!slab)
118 goto out_unlock;
119
120 printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
121 bslab->slab = slab;
122 bslab->slab_ref = 1;
123 bslab->slab_size = sz;
124out_unlock:
125 mutex_unlock(&bio_slab_lock);
126 return slab;
127}
128
129static void bio_put_slab(struct bio_set *bs)
130{
131 struct bio_slab *bslab = NULL;
132 unsigned int i;
133
134 mutex_lock(&bio_slab_lock);
135
136 for (i = 0; i < bio_slab_nr; i++) {
137 if (bs->bio_slab == bio_slabs[i].slab) {
138 bslab = &bio_slabs[i];
139 break;
140 }
141 }
142
143 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
144 goto out;
145
146 WARN_ON(!bslab->slab_ref);
147
148 if (--bslab->slab_ref)
149 goto out;
150
151 kmem_cache_destroy(bslab->slab);
152 bslab->slab = NULL;
153
154out:
155 mutex_unlock(&bio_slab_lock);
156}
157
158unsigned int bvec_nr_vecs(unsigned short idx)
159{
160 return bvec_slabs[idx].nr_vecs;
161}
162
163void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
164{
165 BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
166
167 if (idx == BIOVEC_MAX_IDX)
168 mempool_free(bv, bs->bvec_pool);
169 else {
170 struct biovec_slab *bvs = bvec_slabs + idx;
171
172 kmem_cache_free(bvs->slab, bv);
173 }
174}
175
176struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
177 struct bio_set *bs)
178{
179 struct bio_vec *bvl;
180
181
182
183
184 switch (nr) {
185 case 1:
186 *idx = 0;
187 break;
188 case 2 ... 4:
189 *idx = 1;
190 break;
191 case 5 ... 16:
192 *idx = 2;
193 break;
194 case 17 ... 64:
195 *idx = 3;
196 break;
197 case 65 ... 128:
198 *idx = 4;
199 break;
200 case 129 ... BIO_MAX_PAGES:
201 *idx = 5;
202 break;
203 default:
204 return NULL;
205 }
206
207
208
209
210
211 if (*idx == BIOVEC_MAX_IDX) {
212fallback:
213 bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
214 } else {
215 struct biovec_slab *bvs = bvec_slabs + *idx;
216 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
217
218
219
220
221
222
223 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
224
225
226
227
228
229 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
230 if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
231 *idx = BIOVEC_MAX_IDX;
232 goto fallback;
233 }
234 }
235
236 return bvl;
237}
238
239static void __bio_free(struct bio *bio)
240{
241 bio_disassociate_task(bio);
242
243 if (bio_integrity(bio))
244 bio_integrity_free(bio);
245}
246
247static void bio_free(struct bio *bio)
248{
249 struct bio_set *bs = bio->bi_pool;
250 void *p;
251
252 __bio_free(bio);
253
254 if (bs) {
255 if (bio_has_allocated_vec(bio))
256 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
257
258
259
260
261 p = bio;
262 p -= bs->front_pad;
263
264 mempool_free(p, bs->bio_pool);
265 } else {
266
267 kfree(bio);
268 }
269}
270
271void bio_init(struct bio *bio)
272{
273 memset(bio, 0, sizeof(*bio));
274 bio->bi_flags = 1 << BIO_UPTODATE;
275 atomic_set(&bio->bi_cnt, 1);
276}
277EXPORT_SYMBOL(bio_init);
278
279
280
281
282
283
284
285
286
287
288
289void bio_reset(struct bio *bio)
290{
291 unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
292
293 __bio_free(bio);
294
295 memset(bio, 0, BIO_RESET_BYTES);
296 bio->bi_flags = flags|(1 << BIO_UPTODATE);
297}
298EXPORT_SYMBOL(bio_reset);
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
321{
322 unsigned front_pad;
323 unsigned inline_vecs;
324 unsigned long idx = BIO_POOL_NONE;
325 struct bio_vec *bvl = NULL;
326 struct bio *bio;
327 void *p;
328
329 if (!bs) {
330 if (nr_iovecs > UIO_MAXIOV)
331 return NULL;
332
333 p = kmalloc(sizeof(struct bio) +
334 nr_iovecs * sizeof(struct bio_vec),
335 gfp_mask);
336 front_pad = 0;
337 inline_vecs = nr_iovecs;
338 } else {
339 p = mempool_alloc(bs->bio_pool, gfp_mask);
340 front_pad = bs->front_pad;
341 inline_vecs = BIO_INLINE_VECS;
342 }
343
344 if (unlikely(!p))
345 return NULL;
346
347 bio = p + front_pad;
348 bio_init(bio);
349
350 if (nr_iovecs > inline_vecs) {
351 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
352 if (unlikely(!bvl))
353 goto err_free;
354 } else if (nr_iovecs) {
355 bvl = bio->bi_inline_vecs;
356 }
357
358 bio->bi_pool = bs;
359 bio->bi_flags |= idx << BIO_POOL_OFFSET;
360 bio->bi_max_vecs = nr_iovecs;
361 bio->bi_io_vec = bvl;
362 return bio;
363
364err_free:
365 mempool_free(p, bs->bio_pool);
366 return NULL;
367}
368EXPORT_SYMBOL(bio_alloc_bioset);
369
370void zero_fill_bio(struct bio *bio)
371{
372 unsigned long flags;
373 struct bio_vec *bv;
374 int i;
375
376 bio_for_each_segment(bv, bio, i) {
377 char *data = bvec_kmap_irq(bv, &flags);
378 memset(data, 0, bv->bv_len);
379 flush_dcache_page(bv->bv_page);
380 bvec_kunmap_irq(data, &flags);
381 }
382}
383EXPORT_SYMBOL(zero_fill_bio);
384
385
386
387
388
389
390
391
392
393void bio_put(struct bio *bio)
394{
395 BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
396
397
398
399
400 if (atomic_dec_and_test(&bio->bi_cnt))
401 bio_free(bio);
402}
403EXPORT_SYMBOL(bio_put);
404
405inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
406{
407 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
408 blk_recount_segments(q, bio);
409
410 return bio->bi_phys_segments;
411}
412EXPORT_SYMBOL(bio_phys_segments);
413
414
415
416
417
418
419
420
421
422
423void __bio_clone(struct bio *bio, struct bio *bio_src)
424{
425 memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
426 bio_src->bi_max_vecs * sizeof(struct bio_vec));
427
428
429
430
431
432 bio->bi_sector = bio_src->bi_sector;
433 bio->bi_bdev = bio_src->bi_bdev;
434 bio->bi_flags |= 1 << BIO_CLONED;
435 bio->bi_rw = bio_src->bi_rw;
436 bio->bi_vcnt = bio_src->bi_vcnt;
437 bio->bi_size = bio_src->bi_size;
438 bio->bi_idx = bio_src->bi_idx;
439}
440EXPORT_SYMBOL(__bio_clone);
441
442
443
444
445
446
447
448
449
450struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask,
451 struct bio_set *bs)
452{
453 struct bio *b;
454
455 b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, bs);
456 if (!b)
457 return NULL;
458
459 __bio_clone(b, bio);
460
461 if (bio_integrity(bio)) {
462 int ret;
463
464 ret = bio_integrity_clone(b, bio, gfp_mask);
465
466 if (ret < 0) {
467 bio_put(b);
468 return NULL;
469 }
470 }
471
472 return b;
473}
474EXPORT_SYMBOL(bio_clone_bioset);
475
476
477
478
479
480
481
482
483
484
485int bio_get_nr_vecs(struct block_device *bdev)
486{
487 struct request_queue *q = bdev_get_queue(bdev);
488 int nr_pages;
489
490 nr_pages = min_t(unsigned,
491 queue_max_segments(q),
492 queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
493
494 return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
495
496}
497EXPORT_SYMBOL(bio_get_nr_vecs);
498
499static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
500 *page, unsigned int len, unsigned int offset,
501 unsigned short max_sectors)
502{
503 int retried_segments = 0;
504 struct bio_vec *bvec;
505
506
507
508
509 if (unlikely(bio_flagged(bio, BIO_CLONED)))
510 return 0;
511
512 if (((bio->bi_size + len) >> 9) > max_sectors)
513 return 0;
514
515
516
517
518
519
520 if (bio->bi_vcnt > 0) {
521 struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
522
523 if (page == prev->bv_page &&
524 offset == prev->bv_offset + prev->bv_len) {
525 unsigned int prev_bv_len = prev->bv_len;
526 prev->bv_len += len;
527
528 if (q->merge_bvec_fn) {
529 struct bvec_merge_data bvm = {
530
531
532
533
534 .bi_bdev = bio->bi_bdev,
535 .bi_sector = bio->bi_sector,
536 .bi_size = bio->bi_size - prev_bv_len,
537 .bi_rw = bio->bi_rw,
538 };
539
540 if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
541 prev->bv_len -= len;
542 return 0;
543 }
544 }
545
546 goto done;
547 }
548 }
549
550 if (bio->bi_vcnt >= bio->bi_max_vecs)
551 return 0;
552
553
554
555
556
557
558 while (bio->bi_phys_segments >= queue_max_segments(q)) {
559
560 if (retried_segments)
561 return 0;
562
563 retried_segments = 1;
564 blk_recount_segments(q, bio);
565 }
566
567
568
569
570
571 bvec = &bio->bi_io_vec[bio->bi_vcnt];
572 bvec->bv_page = page;
573 bvec->bv_len = len;
574 bvec->bv_offset = offset;
575
576
577
578
579
580
581 if (q->merge_bvec_fn) {
582 struct bvec_merge_data bvm = {
583 .bi_bdev = bio->bi_bdev,
584 .bi_sector = bio->bi_sector,
585 .bi_size = bio->bi_size,
586 .bi_rw = bio->bi_rw,
587 };
588
589
590
591
592
593 if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
594 bvec->bv_page = NULL;
595 bvec->bv_len = 0;
596 bvec->bv_offset = 0;
597 return 0;
598 }
599 }
600
601
602 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
603 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
604
605 bio->bi_vcnt++;
606 bio->bi_phys_segments++;
607 done:
608 bio->bi_size += len;
609 return len;
610}
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
628 unsigned int len, unsigned int offset)
629{
630 return __bio_add_page(q, bio, page, len, offset,
631 queue_max_hw_sectors(q));
632}
633EXPORT_SYMBOL(bio_add_pc_page);
634
635
636
637
638
639
640
641
642
643
644
645
646
647int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
648 unsigned int offset)
649{
650 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
651 return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
652}
653EXPORT_SYMBOL(bio_add_page);
654
655struct bio_map_data {
656 struct bio_vec *iovecs;
657 struct sg_iovec *sgvecs;
658 int nr_sgvecs;
659 int is_our_pages;
660};
661
662static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
663 struct sg_iovec *iov, int iov_count,
664 int is_our_pages)
665{
666 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
667 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
668 bmd->nr_sgvecs = iov_count;
669 bmd->is_our_pages = is_our_pages;
670 bio->bi_private = bmd;
671}
672
673static void bio_free_map_data(struct bio_map_data *bmd)
674{
675 kfree(bmd->iovecs);
676 kfree(bmd->sgvecs);
677 kfree(bmd);
678}
679
680static struct bio_map_data *bio_alloc_map_data(int nr_segs,
681 unsigned int iov_count,
682 gfp_t gfp_mask)
683{
684 struct bio_map_data *bmd;
685
686 if (iov_count > UIO_MAXIOV)
687 return NULL;
688
689 bmd = kmalloc(sizeof(*bmd), gfp_mask);
690 if (!bmd)
691 return NULL;
692
693 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
694 if (!bmd->iovecs) {
695 kfree(bmd);
696 return NULL;
697 }
698
699 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
700 if (bmd->sgvecs)
701 return bmd;
702
703 kfree(bmd->iovecs);
704 kfree(bmd);
705 return NULL;
706}
707
708static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
709 struct sg_iovec *iov, int iov_count,
710 int to_user, int from_user, int do_free_page)
711{
712 int ret = 0, i;
713 struct bio_vec *bvec;
714 int iov_idx = 0;
715 unsigned int iov_off = 0;
716
717 __bio_for_each_segment(bvec, bio, i, 0) {
718 char *bv_addr = page_address(bvec->bv_page);
719 unsigned int bv_len = iovecs[i].bv_len;
720
721 while (bv_len && iov_idx < iov_count) {
722 unsigned int bytes;
723 char __user *iov_addr;
724
725 bytes = min_t(unsigned int,
726 iov[iov_idx].iov_len - iov_off, bv_len);
727 iov_addr = iov[iov_idx].iov_base + iov_off;
728
729 if (!ret) {
730 if (to_user)
731 ret = copy_to_user(iov_addr, bv_addr,
732 bytes);
733
734 if (from_user)
735 ret = copy_from_user(bv_addr, iov_addr,
736 bytes);
737
738 if (ret)
739 ret = -EFAULT;
740 }
741
742 bv_len -= bytes;
743 bv_addr += bytes;
744 iov_addr += bytes;
745 iov_off += bytes;
746
747 if (iov[iov_idx].iov_len == iov_off) {
748 iov_idx++;
749 iov_off = 0;
750 }
751 }
752
753 if (do_free_page)
754 __free_page(bvec->bv_page);
755 }
756
757 return ret;
758}
759
760
761
762
763
764
765
766
767int bio_uncopy_user(struct bio *bio)
768{
769 struct bio_map_data *bmd = bio->bi_private;
770 int ret = 0;
771
772 if (!bio_flagged(bio, BIO_NULL_MAPPED))
773 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
774 bmd->nr_sgvecs, bio_data_dir(bio) == READ,
775 0, bmd->is_our_pages);
776 bio_free_map_data(bmd);
777 bio_put(bio);
778 return ret;
779}
780EXPORT_SYMBOL(bio_uncopy_user);
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795struct bio *bio_copy_user_iov(struct request_queue *q,
796 struct rq_map_data *map_data,
797 struct sg_iovec *iov, int iov_count,
798 int write_to_vm, gfp_t gfp_mask)
799{
800 struct bio_map_data *bmd;
801 struct bio_vec *bvec;
802 struct page *page;
803 struct bio *bio;
804 int i, ret;
805 int nr_pages = 0;
806 unsigned int len = 0;
807 unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
808
809 for (i = 0; i < iov_count; i++) {
810 unsigned long uaddr;
811 unsigned long end;
812 unsigned long start;
813
814 uaddr = (unsigned long)iov[i].iov_base;
815 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
816 start = uaddr >> PAGE_SHIFT;
817
818
819
820
821 if (end < start)
822 return ERR_PTR(-EINVAL);
823
824 nr_pages += end - start;
825 len += iov[i].iov_len;
826 }
827
828 if (offset)
829 nr_pages++;
830
831 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
832 if (!bmd)
833 return ERR_PTR(-ENOMEM);
834
835 ret = -ENOMEM;
836 bio = bio_kmalloc(gfp_mask, nr_pages);
837 if (!bio)
838 goto out_bmd;
839
840 if (!write_to_vm)
841 bio->bi_rw |= REQ_WRITE;
842
843 ret = 0;
844
845 if (map_data) {
846 nr_pages = 1 << map_data->page_order;
847 i = map_data->offset / PAGE_SIZE;
848 }
849 while (len) {
850 unsigned int bytes = PAGE_SIZE;
851
852 bytes -= offset;
853
854 if (bytes > len)
855 bytes = len;
856
857 if (map_data) {
858 if (i == map_data->nr_entries * nr_pages) {
859 ret = -ENOMEM;
860 break;
861 }
862
863 page = map_data->pages[i / nr_pages];
864 page += (i % nr_pages);
865
866 i++;
867 } else {
868 page = alloc_page(q->bounce_gfp | gfp_mask);
869 if (!page) {
870 ret = -ENOMEM;
871 break;
872 }
873 }
874
875 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
876 break;
877
878 len -= bytes;
879 offset = 0;
880 }
881
882 if (ret)
883 goto cleanup;
884
885
886
887
888 if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
889 (map_data && map_data->from_user)) {
890 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
891 if (ret)
892 goto cleanup;
893 }
894
895 bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
896 return bio;
897cleanup:
898 if (!map_data)
899 bio_for_each_segment(bvec, bio, i)
900 __free_page(bvec->bv_page);
901
902 bio_put(bio);
903out_bmd:
904 bio_free_map_data(bmd);
905 return ERR_PTR(ret);
906}
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
922 unsigned long uaddr, unsigned int len,
923 int write_to_vm, gfp_t gfp_mask)
924{
925 struct sg_iovec iov;
926
927 iov.iov_base = (void __user *)uaddr;
928 iov.iov_len = len;
929
930 return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
931}
932EXPORT_SYMBOL(bio_copy_user);
933
934static struct bio *__bio_map_user_iov(struct request_queue *q,
935 struct block_device *bdev,
936 struct sg_iovec *iov, int iov_count,
937 int write_to_vm, gfp_t gfp_mask)
938{
939 int i, j;
940 int nr_pages = 0;
941 struct page **pages;
942 struct bio *bio;
943 int cur_page = 0;
944 int ret, offset;
945
946 for (i = 0; i < iov_count; i++) {
947 unsigned long uaddr = (unsigned long)iov[i].iov_base;
948 unsigned long len = iov[i].iov_len;
949 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
950 unsigned long start = uaddr >> PAGE_SHIFT;
951
952
953
954
955 if (end < start)
956 return ERR_PTR(-EINVAL);
957
958 nr_pages += end - start;
959
960
961
962 if (uaddr & queue_dma_alignment(q))
963 return ERR_PTR(-EINVAL);
964 }
965
966 if (!nr_pages)
967 return ERR_PTR(-EINVAL);
968
969 bio = bio_kmalloc(gfp_mask, nr_pages);
970 if (!bio)
971 return ERR_PTR(-ENOMEM);
972
973 ret = -ENOMEM;
974 pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
975 if (!pages)
976 goto out;
977
978 for (i = 0; i < iov_count; i++) {
979 unsigned long uaddr = (unsigned long)iov[i].iov_base;
980 unsigned long len = iov[i].iov_len;
981 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
982 unsigned long start = uaddr >> PAGE_SHIFT;
983 const int local_nr_pages = end - start;
984 const int page_limit = cur_page + local_nr_pages;
985
986 ret = get_user_pages_fast(uaddr, local_nr_pages,
987 write_to_vm, &pages[cur_page]);
988 if (ret < local_nr_pages) {
989 ret = -EFAULT;
990 goto out_unmap;
991 }
992
993 offset = uaddr & ~PAGE_MASK;
994 for (j = cur_page; j < page_limit; j++) {
995 unsigned int bytes = PAGE_SIZE - offset;
996
997 if (len <= 0)
998 break;
999
1000 if (bytes > len)
1001 bytes = len;
1002
1003
1004
1005
1006 if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
1007 bytes)
1008 break;
1009
1010 len -= bytes;
1011 offset = 0;
1012 }
1013
1014 cur_page = j;
1015
1016
1017
1018 while (j < page_limit)
1019 page_cache_release(pages[j++]);
1020 }
1021
1022 kfree(pages);
1023
1024
1025
1026
1027 if (!write_to_vm)
1028 bio->bi_rw |= REQ_WRITE;
1029
1030 bio->bi_bdev = bdev;
1031 bio->bi_flags |= (1 << BIO_USER_MAPPED);
1032 return bio;
1033
1034 out_unmap:
1035 for (i = 0; i < nr_pages; i++) {
1036 if(!pages[i])
1037 break;
1038 page_cache_release(pages[i]);
1039 }
1040 out:
1041 kfree(pages);
1042 bio_put(bio);
1043 return ERR_PTR(ret);
1044}
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
1059 unsigned long uaddr, unsigned int len, int write_to_vm,
1060 gfp_t gfp_mask)
1061{
1062 struct sg_iovec iov;
1063
1064 iov.iov_base = (void __user *)uaddr;
1065 iov.iov_len = len;
1066
1067 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
1068}
1069EXPORT_SYMBOL(bio_map_user);
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
1084 struct sg_iovec *iov, int iov_count,
1085 int write_to_vm, gfp_t gfp_mask)
1086{
1087 struct bio *bio;
1088
1089 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
1090 gfp_mask);
1091 if (IS_ERR(bio))
1092 return bio;
1093
1094
1095
1096
1097
1098
1099
1100 bio_get(bio);
1101
1102 return bio;
1103}
1104
1105static void __bio_unmap_user(struct bio *bio)
1106{
1107 struct bio_vec *bvec;
1108 int i;
1109
1110
1111
1112
1113 __bio_for_each_segment(bvec, bio, i, 0) {
1114 if (bio_data_dir(bio) == READ)
1115 set_page_dirty_lock(bvec->bv_page);
1116
1117 page_cache_release(bvec->bv_page);
1118 }
1119
1120 bio_put(bio);
1121}
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132void bio_unmap_user(struct bio *bio)
1133{
1134 __bio_unmap_user(bio);
1135 bio_put(bio);
1136}
1137EXPORT_SYMBOL(bio_unmap_user);
1138
1139static void bio_map_kern_endio(struct bio *bio, int err)
1140{
1141 bio_put(bio);
1142}
1143
1144static struct bio *__bio_map_kern(struct request_queue *q, void *data,
1145 unsigned int len, gfp_t gfp_mask)
1146{
1147 unsigned long kaddr = (unsigned long)data;
1148 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1149 unsigned long start = kaddr >> PAGE_SHIFT;
1150 const int nr_pages = end - start;
1151 int offset, i;
1152 struct bio *bio;
1153
1154 bio = bio_kmalloc(gfp_mask, nr_pages);
1155 if (!bio)
1156 return ERR_PTR(-ENOMEM);
1157
1158 offset = offset_in_page(kaddr);
1159 for (i = 0; i < nr_pages; i++) {
1160 unsigned int bytes = PAGE_SIZE - offset;
1161
1162 if (len <= 0)
1163 break;
1164
1165 if (bytes > len)
1166 bytes = len;
1167
1168 if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
1169 offset) < bytes)
1170 break;
1171
1172 data += bytes;
1173 len -= bytes;
1174 offset = 0;
1175 }
1176
1177 bio->bi_end_io = bio_map_kern_endio;
1178 return bio;
1179}
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
1192 gfp_t gfp_mask)
1193{
1194 struct bio *bio;
1195
1196 bio = __bio_map_kern(q, data, len, gfp_mask);
1197 if (IS_ERR(bio))
1198 return bio;
1199
1200 if (bio->bi_size == len)
1201 return bio;
1202
1203
1204
1205
1206 bio_put(bio);
1207 return ERR_PTR(-EINVAL);
1208}
1209EXPORT_SYMBOL(bio_map_kern);
1210
1211static void bio_copy_kern_endio(struct bio *bio, int err)
1212{
1213 struct bio_vec *bvec;
1214 const int read = bio_data_dir(bio) == READ;
1215 struct bio_map_data *bmd = bio->bi_private;
1216 int i;
1217 char *p = bmd->sgvecs[0].iov_base;
1218
1219 __bio_for_each_segment(bvec, bio, i, 0) {
1220 char *addr = page_address(bvec->bv_page);
1221 int len = bmd->iovecs[i].bv_len;
1222
1223 if (read)
1224 memcpy(p, addr, len);
1225
1226 __free_page(bvec->bv_page);
1227 p += len;
1228 }
1229
1230 bio_free_map_data(bmd);
1231 bio_put(bio);
1232}
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1246 gfp_t gfp_mask, int reading)
1247{
1248 struct bio *bio;
1249 struct bio_vec *bvec;
1250 int i;
1251
1252 bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
1253 if (IS_ERR(bio))
1254 return bio;
1255
1256 if (!reading) {
1257 void *p = data;
1258
1259 bio_for_each_segment(bvec, bio, i) {
1260 char *addr = page_address(bvec->bv_page);
1261
1262 memcpy(addr, p, bvec->bv_len);
1263 p += bvec->bv_len;
1264 }
1265 }
1266
1267 bio->bi_end_io = bio_copy_kern_endio;
1268
1269 return bio;
1270}
1271EXPORT_SYMBOL(bio_copy_kern);
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302void bio_set_pages_dirty(struct bio *bio)
1303{
1304 struct bio_vec *bvec = bio->bi_io_vec;
1305 int i;
1306
1307 for (i = 0; i < bio->bi_vcnt; i++) {
1308 struct page *page = bvec[i].bv_page;
1309
1310 if (page && !PageCompound(page))
1311 set_page_dirty_lock(page);
1312 }
1313}
1314
1315static void bio_release_pages(struct bio *bio)
1316{
1317 struct bio_vec *bvec = bio->bi_io_vec;
1318 int i;
1319
1320 for (i = 0; i < bio->bi_vcnt; i++) {
1321 struct page *page = bvec[i].bv_page;
1322
1323 if (page)
1324 put_page(page);
1325 }
1326}
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339static void bio_dirty_fn(struct work_struct *work);
1340
1341static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
1342static DEFINE_SPINLOCK(bio_dirty_lock);
1343static struct bio *bio_dirty_list;
1344
1345
1346
1347
1348static void bio_dirty_fn(struct work_struct *work)
1349{
1350 unsigned long flags;
1351 struct bio *bio;
1352
1353 spin_lock_irqsave(&bio_dirty_lock, flags);
1354 bio = bio_dirty_list;
1355 bio_dirty_list = NULL;
1356 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1357
1358 while (bio) {
1359 struct bio *next = bio->bi_private;
1360
1361 bio_set_pages_dirty(bio);
1362 bio_release_pages(bio);
1363 bio_put(bio);
1364 bio = next;
1365 }
1366}
1367
1368void bio_check_pages_dirty(struct bio *bio)
1369{
1370 struct bio_vec *bvec = bio->bi_io_vec;
1371 int nr_clean_pages = 0;
1372 int i;
1373
1374 for (i = 0; i < bio->bi_vcnt; i++) {
1375 struct page *page = bvec[i].bv_page;
1376
1377 if (PageDirty(page) || PageCompound(page)) {
1378 page_cache_release(page);
1379 bvec[i].bv_page = NULL;
1380 } else {
1381 nr_clean_pages++;
1382 }
1383 }
1384
1385 if (nr_clean_pages) {
1386 unsigned long flags;
1387
1388 spin_lock_irqsave(&bio_dirty_lock, flags);
1389 bio->bi_private = bio_dirty_list;
1390 bio_dirty_list = bio;
1391 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1392 schedule_work(&bio_dirty_work);
1393 } else {
1394 bio_put(bio);
1395 }
1396}
1397
1398#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
1399void bio_flush_dcache_pages(struct bio *bi)
1400{
1401 int i;
1402 struct bio_vec *bvec;
1403
1404 bio_for_each_segment(bvec, bi, i)
1405 flush_dcache_page(bvec->bv_page);
1406}
1407EXPORT_SYMBOL(bio_flush_dcache_pages);
1408#endif
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424void bio_endio(struct bio *bio, int error)
1425{
1426 if (error)
1427 clear_bit(BIO_UPTODATE, &bio->bi_flags);
1428 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1429 error = -EIO;
1430
1431 if (bio->bi_end_io)
1432 bio->bi_end_io(bio, error);
1433}
1434EXPORT_SYMBOL(bio_endio);
1435
1436void bio_pair_release(struct bio_pair *bp)
1437{
1438 if (atomic_dec_and_test(&bp->cnt)) {
1439 struct bio *master = bp->bio1.bi_private;
1440
1441 bio_endio(master, bp->error);
1442 mempool_free(bp, bp->bio2.bi_private);
1443 }
1444}
1445EXPORT_SYMBOL(bio_pair_release);
1446
1447static void bio_pair_end_1(struct bio *bi, int err)
1448{
1449 struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
1450
1451 if (err)
1452 bp->error = err;
1453
1454 bio_pair_release(bp);
1455}
1456
1457static void bio_pair_end_2(struct bio *bi, int err)
1458{
1459 struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
1460
1461 if (err)
1462 bp->error = err;
1463
1464 bio_pair_release(bp);
1465}
1466
1467
1468
1469
1470struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1471{
1472 struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
1473
1474 if (!bp)
1475 return bp;
1476
1477 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1478 bi->bi_sector + first_sectors);
1479
1480 BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0);
1481 BUG_ON(bi->bi_idx != 0);
1482 atomic_set(&bp->cnt, 3);
1483 bp->error = 0;
1484 bp->bio1 = *bi;
1485 bp->bio2 = *bi;
1486 bp->bio2.bi_sector += first_sectors;
1487 bp->bio2.bi_size -= first_sectors << 9;
1488 bp->bio1.bi_size = first_sectors << 9;
1489
1490 if (bi->bi_vcnt != 0) {
1491 bp->bv1 = bi->bi_io_vec[0];
1492 bp->bv2 = bi->bi_io_vec[0];
1493
1494 if (bio_is_rw(bi)) {
1495 bp->bv2.bv_offset += first_sectors << 9;
1496 bp->bv2.bv_len -= first_sectors << 9;
1497 bp->bv1.bv_len = first_sectors << 9;
1498 }
1499
1500 bp->bio1.bi_io_vec = &bp->bv1;
1501 bp->bio2.bi_io_vec = &bp->bv2;
1502
1503 bp->bio1.bi_max_vecs = 1;
1504 bp->bio2.bi_max_vecs = 1;
1505 }
1506
1507 bp->bio1.bi_end_io = bio_pair_end_1;
1508 bp->bio2.bi_end_io = bio_pair_end_2;
1509
1510 bp->bio1.bi_private = bi;
1511 bp->bio2.bi_private = bio_split_pool;
1512
1513 if (bio_integrity(bi))
1514 bio_integrity_split(bi, bp, first_sectors);
1515
1516 return bp;
1517}
1518EXPORT_SYMBOL(bio_split);
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530sector_t bio_sector_offset(struct bio *bio, unsigned short index,
1531 unsigned int offset)
1532{
1533 unsigned int sector_sz;
1534 struct bio_vec *bv;
1535 sector_t sectors;
1536 int i;
1537
1538 sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
1539 sectors = 0;
1540
1541 if (index >= bio->bi_idx)
1542 index = bio->bi_vcnt - 1;
1543
1544 __bio_for_each_segment(bv, bio, i, 0) {
1545 if (i == index) {
1546 if (offset > bv->bv_offset)
1547 sectors += (offset - bv->bv_offset) / sector_sz;
1548 break;
1549 }
1550
1551 sectors += bv->bv_len / sector_sz;
1552 }
1553
1554 return sectors;
1555}
1556EXPORT_SYMBOL(bio_sector_offset);
1557
1558
1559
1560
1561
1562static int biovec_create_pools(struct bio_set *bs, int pool_entries)
1563{
1564 struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
1565
1566 bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
1567 if (!bs->bvec_pool)
1568 return -ENOMEM;
1569
1570 return 0;
1571}
1572
1573static void biovec_free_pools(struct bio_set *bs)
1574{
1575 mempool_destroy(bs->bvec_pool);
1576}
1577
1578void bioset_free(struct bio_set *bs)
1579{
1580 if (bs->bio_pool)
1581 mempool_destroy(bs->bio_pool);
1582
1583 bioset_integrity_free(bs);
1584 biovec_free_pools(bs);
1585 bio_put_slab(bs);
1586
1587 kfree(bs);
1588}
1589EXPORT_SYMBOL(bioset_free);
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1605{
1606 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1607 struct bio_set *bs;
1608
1609 bs = kzalloc(sizeof(*bs), GFP_KERNEL);
1610 if (!bs)
1611 return NULL;
1612
1613 bs->front_pad = front_pad;
1614
1615 bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
1616 if (!bs->bio_slab) {
1617 kfree(bs);
1618 return NULL;
1619 }
1620
1621 bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
1622 if (!bs->bio_pool)
1623 goto bad;
1624
1625 if (!biovec_create_pools(bs, pool_size))
1626 return bs;
1627
1628bad:
1629 bioset_free(bs);
1630 return NULL;
1631}
1632EXPORT_SYMBOL(bioset_create);
1633
1634#ifdef CONFIG_BLK_CGROUP
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648int bio_associate_current(struct bio *bio)
1649{
1650 struct io_context *ioc;
1651 struct cgroup_subsys_state *css;
1652
1653 if (bio->bi_ioc)
1654 return -EBUSY;
1655
1656 ioc = current->io_context;
1657 if (!ioc)
1658 return -ENOENT;
1659
1660
1661 get_io_context_active(ioc);
1662 bio->bi_ioc = ioc;
1663
1664
1665 rcu_read_lock();
1666 css = task_subsys_state(current, blkio_subsys_id);
1667 if (css && css_tryget(css))
1668 bio->bi_css = css;
1669 rcu_read_unlock();
1670
1671 return 0;
1672}
1673
1674
1675
1676
1677
1678void bio_disassociate_task(struct bio *bio)
1679{
1680 if (bio->bi_ioc) {
1681 put_io_context(bio->bi_ioc);
1682 bio->bi_ioc = NULL;
1683 }
1684 if (bio->bi_css) {
1685 css_put(bio->bi_css);
1686 bio->bi_css = NULL;
1687 }
1688}
1689
1690#endif
1691
1692static void __init biovec_init_slabs(void)
1693{
1694 int i;
1695
1696 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
1697 int size;
1698 struct biovec_slab *bvs = bvec_slabs + i;
1699
1700 if (bvs->nr_vecs <= BIO_INLINE_VECS) {
1701 bvs->slab = NULL;
1702 continue;
1703 }
1704
1705 size = bvs->nr_vecs * sizeof(struct bio_vec);
1706 bvs->slab = kmem_cache_create(bvs->name, size, 0,
1707 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1708 }
1709}
1710
1711static int __init init_bio(void)
1712{
1713 bio_slab_max = 2;
1714 bio_slab_nr = 0;
1715 bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
1716 if (!bio_slabs)
1717 panic("bio: can't allocate bios\n");
1718
1719 bio_integrity_init();
1720 biovec_init_slabs();
1721
1722 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
1723 if (!fs_bio_set)
1724 panic("bio: can't allocate bios\n");
1725
1726 if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
1727 panic("bio: can't create integrity pool\n");
1728
1729 bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
1730 sizeof(struct bio_pair));
1731 if (!bio_split_pool)
1732 panic("bio: can't create split pool\n");
1733
1734 return 0;
1735}
1736subsys_initcall(init_bio);
1737