1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/bio.h>
21#include <linux/blkdev.h>
22#include <linux/slab.h>
23#include <linux/init.h>
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/mempool.h>
27#include <linux/workqueue.h>
28
29#define BIO_POOL_SIZE 256
30
31static mempool_t *bio_pool;
32static kmem_cache_t *bio_slab;
33
34#define BIOVEC_NR_POOLS 6
35
36
37
38
39
40#define BIO_SPLIT_ENTRIES 8
41mempool_t *bio_split_pool;
42
43struct biovec_pool {
44 int nr_vecs;
45 char *name;
46 kmem_cache_t *slab;
47 mempool_t *pool;
48};
49
50
51
52
53
54
55
56#define BV(x) { .nr_vecs = x, .name = "biovec-" #x }
57static struct biovec_pool bvec_array[BIOVEC_NR_POOLS] = {
58 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
59};
60#undef BV
61
62static inline struct bio_vec *bvec_alloc(int gfp_mask, int nr, unsigned long *idx)
63{
64 struct biovec_pool *bp;
65 struct bio_vec *bvl;
66
67
68
69
70 switch (nr) {
71 case 1 : *idx = 0; break;
72 case 2 ... 4: *idx = 1; break;
73 case 5 ... 16: *idx = 2; break;
74 case 17 ... 64: *idx = 3; break;
75 case 65 ... 128: *idx = 4; break;
76 case 129 ... BIO_MAX_PAGES: *idx = 5; break;
77 default:
78 return NULL;
79 }
80
81
82
83 bp = bvec_array + *idx;
84
85 bvl = mempool_alloc(bp->pool, gfp_mask);
86 if (bvl)
87 memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
88 return bvl;
89}
90
91
92
93
94void bio_destructor(struct bio *bio)
95{
96 const int pool_idx = BIO_POOL_IDX(bio);
97 struct biovec_pool *bp = bvec_array + pool_idx;
98
99 BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
100
101
102
103
104 if (!bio_flagged(bio, BIO_CLONED))
105 mempool_free(bio->bi_io_vec, bp->pool);
106
107 mempool_free(bio, bio_pool);
108}
109
110inline void bio_init(struct bio *bio)
111{
112 bio->bi_next = NULL;
113 bio->bi_flags = 1 << BIO_UPTODATE;
114 bio->bi_rw = 0;
115 bio->bi_vcnt = 0;
116 bio->bi_idx = 0;
117 bio->bi_phys_segments = 0;
118 bio->bi_hw_segments = 0;
119 bio->bi_size = 0;
120 bio->bi_max_vecs = 0;
121 bio->bi_end_io = NULL;
122 atomic_set(&bio->bi_cnt, 1);
123 bio->bi_private = NULL;
124}
125
126
127
128
129
130
131
132
133
134
135
136struct bio *bio_alloc(int gfp_mask, int nr_iovecs)
137{
138 struct bio_vec *bvl = NULL;
139 unsigned long idx;
140 struct bio *bio;
141
142 bio = mempool_alloc(bio_pool, gfp_mask);
143 if (unlikely(!bio))
144 goto out;
145
146 bio_init(bio);
147
148 if (unlikely(!nr_iovecs))
149 goto noiovec;
150
151 bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx);
152 if (bvl) {
153 bio->bi_flags |= idx << BIO_POOL_OFFSET;
154 bio->bi_max_vecs = bvec_array[idx].nr_vecs;
155noiovec:
156 bio->bi_io_vec = bvl;
157 bio->bi_destructor = bio_destructor;
158out:
159 return bio;
160 }
161
162 mempool_free(bio, bio_pool);
163 bio = NULL;
164 goto out;
165}
166
167
168
169
170
171
172
173
174
175void bio_put(struct bio *bio)
176{
177 BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
178
179
180
181
182 if (atomic_dec_and_test(&bio->bi_cnt)) {
183 bio->bi_next = NULL;
184 bio->bi_destructor(bio);
185 }
186}
187
188inline int bio_phys_segments(request_queue_t *q, struct bio *bio)
189{
190 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
191 blk_recount_segments(q, bio);
192
193 return bio->bi_phys_segments;
194}
195
196inline int bio_hw_segments(request_queue_t *q, struct bio *bio)
197{
198 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
199 blk_recount_segments(q, bio);
200
201 return bio->bi_hw_segments;
202}
203
204
205
206
207
208
209
210
211
212
213inline void __bio_clone(struct bio *bio, struct bio *bio_src)
214{
215 bio->bi_io_vec = bio_src->bi_io_vec;
216
217 bio->bi_sector = bio_src->bi_sector;
218 bio->bi_bdev = bio_src->bi_bdev;
219 bio->bi_flags |= 1 << BIO_CLONED;
220 bio->bi_rw = bio_src->bi_rw;
221
222
223
224
225
226 bio->bi_vcnt = bio_src->bi_vcnt;
227 bio->bi_idx = bio_src->bi_idx;
228 if (bio_flagged(bio, BIO_SEG_VALID)) {
229 bio->bi_phys_segments = bio_src->bi_phys_segments;
230 bio->bi_hw_segments = bio_src->bi_hw_segments;
231 bio->bi_flags |= (1 << BIO_SEG_VALID);
232 }
233 bio->bi_size = bio_src->bi_size;
234
235
236
237
238
239
240 bio->bi_max_vecs = 0;
241 bio->bi_flags &= (BIO_POOL_MASK - 1);
242}
243
244
245
246
247
248
249
250
251struct bio *bio_clone(struct bio *bio, int gfp_mask)
252{
253 struct bio *b = bio_alloc(gfp_mask, 0);
254
255 if (b)
256 __bio_clone(b, bio);
257
258 return b;
259}
260
261
262
263
264
265
266
267
268
269
270int bio_get_nr_vecs(struct block_device *bdev)
271{
272 request_queue_t *q = bdev_get_queue(bdev);
273 int nr_pages;
274
275 nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
276 if (nr_pages > q->max_phys_segments)
277 nr_pages = q->max_phys_segments;
278 if (nr_pages > q->max_hw_segments)
279 nr_pages = q->max_hw_segments;
280
281 return nr_pages;
282}
283
284
285
286
287
288
289
290
291
292
293
294
295int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
296 unsigned int offset)
297{
298 request_queue_t *q = bdev_get_queue(bio->bi_bdev);
299 int retried_segments = 0;
300 struct bio_vec *bvec;
301
302
303
304
305 if (unlikely(bio_flagged(bio, BIO_CLONED)))
306 return 0;
307
308 if (bio->bi_vcnt >= bio->bi_max_vecs)
309 return 0;
310
311 if (((bio->bi_size + len) >> 9) > q->max_sectors)
312 return 0;
313
314
315
316
317
318
319 while (bio_phys_segments(q, bio) >= q->max_phys_segments
320 || bio_hw_segments(q, bio) >= q->max_hw_segments) {
321
322 if (retried_segments)
323 return 0;
324
325 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
326 retried_segments = 1;
327 }
328
329
330
331
332
333 bvec = &bio->bi_io_vec[bio->bi_vcnt];
334 bvec->bv_page = page;
335 bvec->bv_len = len;
336 bvec->bv_offset = offset;
337
338
339
340
341
342
343 if (q->merge_bvec_fn) {
344
345
346
347
348 if (q->merge_bvec_fn(q, bio, bvec) < len) {
349 bvec->bv_page = NULL;
350 bvec->bv_len = 0;
351 bvec->bv_offset = 0;
352 return 0;
353 }
354 }
355
356 bio->bi_vcnt++;
357 bio->bi_phys_segments++;
358 bio->bi_hw_segments++;
359 bio->bi_size += len;
360 return len;
361}
362
363static struct bio *__bio_map_user(struct block_device *bdev,
364 unsigned long uaddr, unsigned int len,
365 int write_to_vm)
366{
367 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
368 unsigned long start = uaddr >> PAGE_SHIFT;
369 const int nr_pages = end - start;
370 request_queue_t *q = bdev_get_queue(bdev);
371 int ret, offset, i;
372 struct page **pages;
373 struct bio *bio;
374
375
376
377
378
379 if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
380 return NULL;
381
382 bio = bio_alloc(GFP_KERNEL, nr_pages);
383 if (!bio)
384 return NULL;
385
386 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
387 if (!pages)
388 goto out;
389
390 down_read(¤t->mm->mmap_sem);
391 ret = get_user_pages(current, current->mm, uaddr, nr_pages,
392 write_to_vm, 0, pages, NULL);
393 up_read(¤t->mm->mmap_sem);
394
395 if (ret < nr_pages)
396 goto out;
397
398 bio->bi_bdev = bdev;
399
400 offset = uaddr & ~PAGE_MASK;
401 for (i = 0; i < nr_pages; i++) {
402 unsigned int bytes = PAGE_SIZE - offset;
403
404 if (len <= 0)
405 break;
406
407 if (bytes > len)
408 bytes = len;
409
410
411
412
413 if (bio_add_page(bio, pages[i], bytes, offset) < bytes)
414 break;
415
416 len -= bytes;
417 offset = 0;
418 }
419
420
421
422
423 while (i < nr_pages)
424 page_cache_release(pages[i++]);
425
426 kfree(pages);
427
428
429
430
431 if (!write_to_vm)
432 bio->bi_rw |= (1 << BIO_RW);
433
434 blk_queue_bounce(q, &bio);
435 return bio;
436out:
437 kfree(pages);
438 bio_put(bio);
439 return NULL;
440}
441
442
443
444
445
446
447
448
449
450
451
452struct bio *bio_map_user(struct block_device *bdev, unsigned long uaddr,
453 unsigned int len, int write_to_vm)
454{
455 struct bio *bio;
456
457 bio = __bio_map_user(bdev, uaddr, len, write_to_vm);
458
459 if (bio) {
460
461
462
463
464
465
466 bio_get(bio);
467
468 if (bio->bi_size < len) {
469 bio_endio(bio, bio->bi_size, 0);
470 bio_unmap_user(bio, 0);
471 return NULL;
472 }
473 }
474
475 return bio;
476}
477
478static void __bio_unmap_user(struct bio *bio, int write_to_vm)
479{
480 struct bio_vec *bvec;
481 int i;
482
483
484
485
486 if (bio->bi_private) {
487
488
489
490 BUG_ON(!bio_flagged(bio, BIO_BOUNCED));
491
492 bio = bio->bi_private;
493 }
494
495
496
497
498 __bio_for_each_segment(bvec, bio, i, 0) {
499 if (write_to_vm)
500 set_page_dirty_lock(bvec->bv_page);
501
502 page_cache_release(bvec->bv_page);
503 }
504
505 bio_put(bio);
506}
507
508
509
510
511
512
513
514
515
516
517
518
519void bio_unmap_user(struct bio *bio, int write_to_vm)
520{
521 __bio_unmap_user(bio, write_to_vm);
522 bio_put(bio);
523}
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554void bio_set_pages_dirty(struct bio *bio)
555{
556 struct bio_vec *bvec = bio->bi_io_vec;
557 int i;
558
559 for (i = 0; i < bio->bi_vcnt; i++) {
560 struct page *page = bvec[i].bv_page;
561
562 if (page && !PageCompound(page))
563 set_page_dirty_lock(page);
564 }
565}
566
567static void bio_release_pages(struct bio *bio)
568{
569 struct bio_vec *bvec = bio->bi_io_vec;
570 int i;
571
572 for (i = 0; i < bio->bi_vcnt; i++) {
573 struct page *page = bvec[i].bv_page;
574
575 if (page)
576 put_page(page);
577 }
578}
579
580
581
582
583
584
585
586
587
588
589
590
591static void bio_dirty_fn(void *data);
592
593static DECLARE_WORK(bio_dirty_work, bio_dirty_fn, NULL);
594static spinlock_t bio_dirty_lock = SPIN_LOCK_UNLOCKED;
595static struct bio *bio_dirty_list = NULL;
596
597
598
599
600static void bio_dirty_fn(void *data)
601{
602 unsigned long flags;
603 struct bio *bio;
604
605 spin_lock_irqsave(&bio_dirty_lock, flags);
606 bio = bio_dirty_list;
607 bio_dirty_list = NULL;
608 spin_unlock_irqrestore(&bio_dirty_lock, flags);
609
610 while (bio) {
611 struct bio *next = bio->bi_private;
612
613 bio_set_pages_dirty(bio);
614 bio_release_pages(bio);
615 bio_put(bio);
616 bio = next;
617 }
618}
619
620void bio_check_pages_dirty(struct bio *bio)
621{
622 struct bio_vec *bvec = bio->bi_io_vec;
623 int nr_clean_pages = 0;
624 int i;
625
626 for (i = 0; i < bio->bi_vcnt; i++) {
627 struct page *page = bvec[i].bv_page;
628
629 if (PageDirty(page) || PageCompound(page)) {
630 page_cache_release(page);
631 bvec[i].bv_page = NULL;
632 } else {
633 nr_clean_pages++;
634 }
635 }
636
637 if (nr_clean_pages) {
638 unsigned long flags;
639
640 spin_lock_irqsave(&bio_dirty_lock, flags);
641 bio->bi_private = bio_dirty_list;
642 bio_dirty_list = bio;
643 spin_unlock_irqrestore(&bio_dirty_lock, flags);
644 schedule_work(&bio_dirty_work);
645 } else {
646 bio_put(bio);
647 }
648}
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665void bio_endio(struct bio *bio, unsigned int bytes_done, int error)
666{
667 if (error)
668 clear_bit(BIO_UPTODATE, &bio->bi_flags);
669
670 if (unlikely(bytes_done > bio->bi_size)) {
671 printk("%s: want %u bytes done, only %u left\n", __FUNCTION__,
672 bytes_done, bio->bi_size);
673 bytes_done = bio->bi_size;
674 }
675
676 bio->bi_size -= bytes_done;
677 bio->bi_sector += (bytes_done >> 9);
678
679 if (bio->bi_end_io)
680 bio->bi_end_io(bio, bytes_done, error);
681}
682
683void bio_pair_release(struct bio_pair *bp)
684{
685 if (atomic_dec_and_test(&bp->cnt)) {
686 struct bio *master = bp->bio1.bi_private;
687
688 bio_endio(master, master->bi_size, bp->error);
689 mempool_free(bp, bp->bio2.bi_private);
690 }
691}
692
693static int bio_pair_end_1(struct bio * bi, unsigned int done, int err)
694{
695 struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
696
697 if (bi->bi_size)
698 return 1;
699 if (err)
700 bp->error = err;
701
702 bio_pair_release(bp);
703 return 0;
704}
705
706static int bio_pair_end_2(struct bio * bi, unsigned int done, int err)
707{
708 struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
709
710 if (bi->bi_size)
711 return 1;
712 if (err)
713 bp->error = err;
714
715 bio_pair_release(bp);
716 return 0;
717}
718
719
720
721
722
723struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
724{
725 struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO);
726
727 if (!bp)
728 return bp;
729
730 BUG_ON(bi->bi_vcnt != 1);
731 BUG_ON(bi->bi_idx != 0);
732 atomic_set(&bp->cnt, 3);
733 bp->error = 0;
734 bp->bio1 = *bi;
735 bp->bio2 = *bi;
736 bp->bio2.bi_sector += first_sectors;
737 bp->bio2.bi_size -= first_sectors << 9;
738 bp->bio1.bi_size = first_sectors << 9;
739
740 bp->bv1 = bi->bi_io_vec[0];
741 bp->bv2 = bi->bi_io_vec[0];
742 bp->bv2.bv_offset += first_sectors << 9;
743 bp->bv2.bv_len -= first_sectors << 9;
744 bp->bv1.bv_len = first_sectors << 9;
745
746 bp->bio1.bi_io_vec = &bp->bv1;
747 bp->bio2.bi_io_vec = &bp->bv2;
748
749 bp->bio1.bi_end_io = bio_pair_end_1;
750 bp->bio2.bi_end_io = bio_pair_end_2;
751
752 bp->bio1.bi_private = bi;
753 bp->bio2.bi_private = pool;
754
755 return bp;
756}
757
758static void *bio_pair_alloc(int gfp_flags, void *data)
759{
760 return kmalloc(sizeof(struct bio_pair), gfp_flags);
761}
762
763static void bio_pair_free(void *bp, void *data)
764{
765 kfree(bp);
766}
767
768static void __init biovec_init_pools(void)
769{
770 int i, size, megabytes, pool_entries = BIO_POOL_SIZE;
771 int scale = BIOVEC_NR_POOLS;
772
773 megabytes = nr_free_pages() >> (20 - PAGE_SHIFT);
774
775
776
777
778 if (megabytes <= 16)
779 scale = 0;
780 else if (megabytes <= 32)
781 scale = 1;
782 else if (megabytes <= 64)
783 scale = 2;
784 else if (megabytes <= 96)
785 scale = 3;
786 else if (megabytes <= 128)
787 scale = 4;
788
789
790
791
792 pool_entries = megabytes * 2;
793 if (pool_entries > 256)
794 pool_entries = 256;
795
796 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
797 struct biovec_pool *bp = bvec_array + i;
798
799 size = bp->nr_vecs * sizeof(struct bio_vec);
800
801 bp->slab = kmem_cache_create(bp->name, size, 0,
802 SLAB_HWCACHE_ALIGN, NULL, NULL);
803 if (!bp->slab)
804 panic("biovec: can't init slab cache\n");
805
806 if (i >= scale)
807 pool_entries >>= 1;
808
809 bp->pool = mempool_create(pool_entries, mempool_alloc_slab,
810 mempool_free_slab, bp->slab);
811 if (!bp->pool)
812 panic("biovec: can't init mempool\n");
813 }
814}
815
816static int __init init_bio(void)
817{
818 bio_slab = kmem_cache_create("bio", sizeof(struct bio), 0,
819 SLAB_HWCACHE_ALIGN, NULL, NULL);
820 if (!bio_slab)
821 panic("bio: can't create slab cache\n");
822 bio_pool = mempool_create(BIO_POOL_SIZE, mempool_alloc_slab, mempool_free_slab, bio_slab);
823 if (!bio_pool)
824 panic("bio: can't create mempool\n");
825
826 biovec_init_pools();
827
828 bio_split_pool = mempool_create(BIO_SPLIT_ENTRIES, bio_pair_alloc, bio_pair_free, NULL);
829 if (!bio_split_pool)
830 panic("bio: can't create split pool\n");
831
832 return 0;
833}
834
835subsys_initcall(init_bio);
836
837EXPORT_SYMBOL(bio_alloc);
838EXPORT_SYMBOL(bio_put);
839EXPORT_SYMBOL(bio_endio);
840EXPORT_SYMBOL(bio_init);
841EXPORT_SYMBOL(__bio_clone);
842EXPORT_SYMBOL(bio_clone);
843EXPORT_SYMBOL(bio_phys_segments);
844EXPORT_SYMBOL(bio_hw_segments);
845EXPORT_SYMBOL(bio_add_page);
846EXPORT_SYMBOL(bio_get_nr_vecs);
847EXPORT_SYMBOL(bio_map_user);
848EXPORT_SYMBOL(bio_unmap_user);
849EXPORT_SYMBOL(bio_pair_release);
850EXPORT_SYMBOL(bio_split);
851EXPORT_SYMBOL(bio_split_pool);
852