1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/mm.h>
20#include <linux/bio.h>
21#include <linux/blk.h>
22#include <linux/slab.h>
23#include <linux/iobuf.h>
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/mempool.h>
27
28#define BIO_POOL_SIZE 256
29
30static mempool_t *bio_pool;
31static kmem_cache_t *bio_slab;
32
33#define BIOVEC_NR_POOLS 6
34
35struct biovec_pool {
36 int nr_vecs;
37 char *name;
38 kmem_cache_t *slab;
39 mempool_t *pool;
40};
41
42
43
44
45
46
47
48#define BV(x) { x, "biovec-" #x }
49static struct biovec_pool bvec_array[BIOVEC_NR_POOLS] = {
50 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
51};
52#undef BV
53
54static void *slab_pool_alloc(int gfp_mask, void *data)
55{
56 return kmem_cache_alloc(data, gfp_mask);
57}
58
59static void slab_pool_free(void *ptr, void *data)
60{
61 kmem_cache_free(data, ptr);
62}
63
64static inline struct bio_vec *bvec_alloc(int gfp_mask, int nr, int *idx)
65{
66 struct biovec_pool *bp;
67 struct bio_vec *bvl;
68
69
70
71
72 switch (nr) {
73 case 1 : *idx = 0; break;
74 case 2 ... 4: *idx = 1; break;
75 case 5 ... 16: *idx = 2; break;
76 case 17 ... 64: *idx = 3; break;
77 case 65 ... 128: *idx = 4; break;
78 case 129 ... BIO_MAX_PAGES: *idx = 5; break;
79 default:
80 return NULL;
81 }
82
83
84
85 bp = bvec_array + *idx;
86
87 bvl = mempool_alloc(bp->pool, gfp_mask);
88 if (bvl)
89 memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
90 return bvl;
91}
92
93
94
95
96void bio_destructor(struct bio *bio)
97{
98 struct biovec_pool *bp = bvec_array + bio->bi_max;
99
100 BIO_BUG_ON(bio->bi_max >= BIOVEC_NR_POOLS);
101
102
103
104 if (!bio_flagged(bio, BIO_CLONED))
105 mempool_free(bio->bi_io_vec, bp->pool);
106
107 mempool_free(bio, bio_pool);
108}
109
110inline void bio_init(struct bio *bio)
111{
112 bio->bi_next = NULL;
113 bio->bi_flags = 1 << BIO_UPTODATE;
114 bio->bi_rw = 0;
115 bio->bi_vcnt = 0;
116 bio->bi_idx = 0;
117 bio->bi_phys_segments = 0;
118 bio->bi_hw_segments = 0;
119 bio->bi_size = 0;
120 bio->bi_end_io = NULL;
121 atomic_set(&bio->bi_cnt, 1);
122}
123
124
125
126
127
128
129
130
131
132
133
134struct bio *bio_alloc(int gfp_mask, int nr_iovecs)
135{
136 struct bio *bio;
137 struct bio_vec *bvl = NULL;
138 int pf_flags = current->flags;
139
140 current->flags |= PF_NOWARN;
141 bio = mempool_alloc(bio_pool, gfp_mask);
142 if (unlikely(!bio))
143 goto out;
144
145 if (!nr_iovecs || (bvl = bvec_alloc(gfp_mask,nr_iovecs,&bio->bi_max))) {
146 bio_init(bio);
147 bio->bi_destructor = bio_destructor;
148 bio->bi_io_vec = bvl;
149 goto out;
150 }
151
152 mempool_free(bio, bio_pool);
153 bio = NULL;
154out:
155 current->flags = pf_flags;
156 return bio;
157}
158
159
160
161
162
163
164
165
166
167void bio_put(struct bio *bio)
168{
169 BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
170
171
172
173
174 if (atomic_dec_and_test(&bio->bi_cnt)) {
175 bio->bi_next = NULL;
176 bio->bi_destructor(bio);
177 }
178}
179
180inline int bio_phys_segments(request_queue_t *q, struct bio *bio)
181{
182 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
183 blk_recount_segments(q, bio);
184
185 return bio->bi_phys_segments;
186}
187
188inline int bio_hw_segments(request_queue_t *q, struct bio *bio)
189{
190 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
191 blk_recount_segments(q, bio);
192
193 return bio->bi_hw_segments;
194}
195
196
197
198
199
200
201
202
203
204
205inline void __bio_clone(struct bio *bio, struct bio *bio_src)
206{
207 bio->bi_io_vec = bio_src->bi_io_vec;
208
209 bio->bi_sector = bio_src->bi_sector;
210 bio->bi_bdev = bio_src->bi_bdev;
211 bio->bi_flags |= 1 << BIO_CLONED;
212 bio->bi_rw = bio_src->bi_rw;
213
214
215
216
217
218 bio->bi_vcnt = bio_src->bi_vcnt;
219 bio->bi_idx = bio_src->bi_idx;
220 if (bio_flagged(bio, BIO_SEG_VALID)) {
221 bio->bi_phys_segments = bio_src->bi_phys_segments;
222 bio->bi_hw_segments = bio_src->bi_hw_segments;
223 bio->bi_flags |= (1 << BIO_SEG_VALID);
224 }
225 bio->bi_size = bio_src->bi_size;
226 bio->bi_max = bio_src->bi_max;
227}
228
229
230
231
232
233
234
235
236struct bio *bio_clone(struct bio *bio, int gfp_mask)
237{
238 struct bio *b = bio_alloc(gfp_mask, 0);
239
240 if (b)
241 __bio_clone(b, bio);
242
243 return b;
244}
245
246
247
248
249
250
251
252
253
254
255
256struct bio *bio_copy(struct bio *bio, int gfp_mask, int copy)
257{
258 struct bio *b = bio_alloc(gfp_mask, bio->bi_vcnt);
259 unsigned long flags = 0;
260 struct bio_vec *bv;
261 int i;
262
263 if (unlikely(!b))
264 return NULL;
265
266
267
268
269 __bio_for_each_segment(bv, bio, i, 0) {
270 struct bio_vec *bbv = &b->bi_io_vec[i];
271 char *vfrom, *vto;
272
273 bbv->bv_page = alloc_page(gfp_mask);
274 if (bbv->bv_page == NULL)
275 goto oom;
276
277 bbv->bv_len = bv->bv_len;
278 bbv->bv_offset = bv->bv_offset;
279
280
281
282
283
284 if (!copy)
285 continue;
286
287 if (gfp_mask & __GFP_WAIT) {
288 vfrom = kmap(bv->bv_page);
289 vto = kmap(bbv->bv_page);
290 } else {
291 local_irq_save(flags);
292 vfrom = kmap_atomic(bv->bv_page, KM_BIO_SRC_IRQ);
293 vto = kmap_atomic(bbv->bv_page, KM_BIO_DST_IRQ);
294 }
295
296 memcpy(vto + bbv->bv_offset, vfrom + bv->bv_offset, bv->bv_len);
297 if (gfp_mask & __GFP_WAIT) {
298 kunmap(bbv->bv_page);
299 kunmap(bv->bv_page);
300 } else {
301 kunmap_atomic(vto, KM_BIO_DST_IRQ);
302 kunmap_atomic(vfrom, KM_BIO_SRC_IRQ);
303 local_irq_restore(flags);
304 }
305 }
306
307 b->bi_sector = bio->bi_sector;
308 b->bi_bdev = bio->bi_bdev;
309 b->bi_rw = bio->bi_rw;
310
311 b->bi_vcnt = bio->bi_vcnt;
312 b->bi_size = bio->bi_size;
313
314 return b;
315
316oom:
317 while (--i >= 0)
318 __free_page(b->bi_io_vec[i].bv_page);
319
320 mempool_free(b, bio_pool);
321 return NULL;
322}
323
324
325
326
327
328
329
330
331
332
333
334
335int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
336 unsigned int offset)
337{
338 request_queue_t *q = bdev_get_queue(bio->bi_bdev);
339 int fail_segments = 0, retried_segments = 0;
340 struct bio_vec *bvec;
341
342
343
344
345 if (unlikely(bio_flagged(bio, BIO_CLONED)))
346 return 1;
347
348
349
350
351 BUG_ON(bio->bi_max > BIOVEC_NR_POOLS);
352
353 if (bio->bi_vcnt >= bvec_array[bio->bi_max].nr_vecs)
354 return 1;
355
356 if (((bio->bi_size + len) >> 9) > q->max_sectors)
357 return 1;
358
359
360
361
362
363retry_segments:
364 if (bio_phys_segments(q, bio) >= q->max_phys_segments
365 || bio_hw_segments(q, bio) >= q->max_hw_segments)
366 fail_segments = 1;
367
368 if (fail_segments) {
369 if (retried_segments)
370 return 1;
371
372 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
373 retried_segments = 1;
374 goto retry_segments;
375 }
376
377
378
379
380
381 bvec = &bio->bi_io_vec[bio->bi_vcnt];
382 bvec->bv_page = page;
383 bvec->bv_len = len;
384 bvec->bv_offset = offset;
385
386
387
388
389
390
391 if (q->merge_bvec_fn && q->merge_bvec_fn(q, bio, bvec)) {
392 bvec->bv_page = NULL;
393 bvec->bv_len = 0;
394 bvec->bv_offset = 0;
395 return 1;
396 }
397
398 bio->bi_vcnt++;
399 bio->bi_phys_segments++;
400 bio->bi_hw_segments++;
401 bio->bi_size += len;
402 return 0;
403}
404
405static int bio_end_io_kio(struct bio *bio, unsigned int bytes_done, int error)
406{
407 struct kiobuf *kio = (struct kiobuf *) bio->bi_private;
408
409 if (bio->bi_size)
410 return 1;
411
412 end_kio_request(kio, error);
413 bio_put(bio);
414 return 0;
415}
416
417
418
419
420
421
422
423
424
425
426
427
428
429void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t sector)
430{
431 int i, offset, size, err, map_i, total_nr_pages, nr_pages;
432 struct bio *bio;
433
434 err = 0;
435 if ((rw & WRITE) && bdev_read_only(bdev)) {
436 printk("ll_rw_bio: WRITE to ro device %s\n", bdevname(bdev));
437 err = -EPERM;
438 goto out;
439 }
440
441 if (!kio->nr_pages) {
442 err = -EINVAL;
443 goto out;
444 }
445
446
447
448
449
450 total_nr_pages = kio->nr_pages;
451 offset = kio->offset & ~PAGE_MASK;
452 size = kio->length;
453
454 atomic_set(&kio->io_count, 1);
455
456 map_i = 0;
457
458next_chunk:
459 nr_pages = BIO_MAX_PAGES;
460 if (nr_pages > total_nr_pages)
461 nr_pages = total_nr_pages;
462
463 atomic_inc(&kio->io_count);
464
465
466
467
468 if ((bio = bio_alloc(GFP_NOIO, nr_pages)) == NULL) {
469 err = -ENOMEM;
470 goto out;
471 }
472
473 bio->bi_sector = sector;
474 bio->bi_bdev = bdev;
475 bio->bi_idx = 0;
476 bio->bi_end_io = bio_end_io_kio;
477 bio->bi_private = kio;
478
479 for (i = 0; i < nr_pages; i++, map_i++) {
480 int nbytes = PAGE_SIZE - offset;
481
482 if (nbytes > size)
483 nbytes = size;
484
485 BUG_ON(kio->maplist[map_i] == NULL);
486
487
488
489
490
491 if (bio_add_page(bio, kio->maplist[map_i], nbytes, offset))
492 break;
493
494
495
496
497 offset = 0;
498
499 sector += nbytes >> 9;
500 size -= nbytes;
501 total_nr_pages--;
502 kio->offset += nbytes;
503 }
504
505 submit_bio(rw, bio);
506
507 if (total_nr_pages)
508 goto next_chunk;
509
510 if (size) {
511 printk("ll_rw_kio: size %d left (kio %d)\n", size, kio->length);
512 BUG();
513 }
514
515out:
516 if (err)
517 kio->errno = err;
518
519
520
521
522
523
524 end_kio_request(kio, !err);
525}
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541int bio_endio(struct bio *bio, unsigned int bytes_done, int error)
542{
543 if (error)
544 clear_bit(BIO_UPTODATE, &bio->bi_flags);
545
546 if (unlikely(bytes_done > bio->bi_size)) {
547 printk("%s: want %u bytes done, only %u left\n", __FUNCTION__,
548 bytes_done, bio->bi_size);
549 bytes_done = bio->bi_size;
550 }
551
552 bio->bi_size -= bytes_done;
553 return bio->bi_end_io(bio, bytes_done, error);
554}
555
556static void __init biovec_init_pools(void)
557{
558 int i, size, megabytes, pool_entries = BIO_POOL_SIZE;
559 int scale = BIOVEC_NR_POOLS;
560
561 megabytes = nr_free_pages() >> (20 - PAGE_SHIFT);
562
563
564
565
566 if (megabytes <= 16)
567 scale = 0;
568 else if (megabytes <= 32)
569 scale = 1;
570 else if (megabytes <= 64)
571 scale = 2;
572 else if (megabytes <= 96)
573 scale = 3;
574 else if (megabytes <= 128)
575 scale = 4;
576
577
578
579
580 pool_entries = megabytes * 2;
581 if (pool_entries > 256)
582 pool_entries = 256;
583
584 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
585 struct biovec_pool *bp = bvec_array + i;
586
587 size = bp->nr_vecs * sizeof(struct bio_vec);
588
589 bp->slab = kmem_cache_create(bp->name, size, 0,
590 SLAB_HWCACHE_ALIGN, NULL, NULL);
591 if (!bp->slab)
592 panic("biovec: can't init slab cache\n");
593
594 if (i >= scale)
595 pool_entries >>= 1;
596
597 bp->pool = mempool_create(pool_entries, slab_pool_alloc,
598 slab_pool_free, bp->slab);
599 if (!bp->pool)
600 panic("biovec: can't init mempool\n");
601
602 printk("biovec pool[%d]: %3d bvecs: %3d entries (%d bytes)\n",
603 i, bp->nr_vecs, pool_entries,
604 size);
605 }
606}
607
608static int __init init_bio(void)
609{
610 bio_slab = kmem_cache_create("bio", sizeof(struct bio), 0,
611 SLAB_HWCACHE_ALIGN, NULL, NULL);
612 if (!bio_slab)
613 panic("bio: can't create slab cache\n");
614 bio_pool = mempool_create(BIO_POOL_SIZE, slab_pool_alloc, slab_pool_free, bio_slab);
615 if (!bio_pool)
616 panic("bio: can't create mempool\n");
617
618 printk("BIO: pool of %d setup, %ZuKb (%Zd bytes/bio)\n", BIO_POOL_SIZE, BIO_POOL_SIZE * sizeof(struct bio) >> 10, sizeof(struct bio));
619
620 biovec_init_pools();
621
622 return 0;
623}
624
625module_init(init_bio);
626
627EXPORT_SYMBOL(bio_alloc);
628EXPORT_SYMBOL(bio_put);
629EXPORT_SYMBOL(ll_rw_kio);
630EXPORT_SYMBOL(bio_endio);
631EXPORT_SYMBOL(bio_init);
632EXPORT_SYMBOL(bio_copy);
633EXPORT_SYMBOL(__bio_clone);
634EXPORT_SYMBOL(bio_clone);
635EXPORT_SYMBOL(bio_phys_segments);
636EXPORT_SYMBOL(bio_hw_segments);
637EXPORT_SYMBOL(bio_add_page);
638