linux-bk/fs/bio.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License version 2 as
   6 * published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 * GNU General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public Licens
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
  16 *
  17 */
  18#include <linux/mm.h>
  19#include <linux/swap.h>
  20#include <linux/bio.h>
  21#include <linux/blkdev.h>
  22#include <linux/slab.h>
  23#include <linux/init.h>
  24#include <linux/kernel.h>
  25#include <linux/module.h>
  26#include <linux/mempool.h>
  27#include <linux/workqueue.h>
  28
  29#define BIO_POOL_SIZE 256
  30
  31static mempool_t *bio_pool;
  32static kmem_cache_t *bio_slab;
  33
  34#define BIOVEC_NR_POOLS 6
  35
  36/*
  37 * a small number of entries is fine, not going to be performance critical.
  38 * basically we just need to survive
  39 */
  40#define BIO_SPLIT_ENTRIES 8     
  41mempool_t *bio_split_pool;
  42
  43struct biovec_pool {
  44        int nr_vecs;
  45        char *name; 
  46        kmem_cache_t *slab;
  47        mempool_t *pool;
  48};
  49
  50/*
  51 * if you change this list, also change bvec_alloc or things will
  52 * break badly! cannot be bigger than what you can fit into an
  53 * unsigned short
  54 */
  55
  56#define BV(x) { .nr_vecs = x, .name = "biovec-" #x }
  57static struct biovec_pool bvec_array[BIOVEC_NR_POOLS] = {
  58        BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
  59};
  60#undef BV
  61
  62static inline struct bio_vec *bvec_alloc(int gfp_mask, int nr, unsigned long *idx)
  63{
  64        struct biovec_pool *bp;
  65        struct bio_vec *bvl;
  66
  67        /*
  68         * see comment near bvec_array define!
  69         */
  70        switch (nr) {
  71                case   1        : *idx = 0; break;
  72                case   2 ...   4: *idx = 1; break;
  73                case   5 ...  16: *idx = 2; break;
  74                case  17 ...  64: *idx = 3; break;
  75                case  65 ... 128: *idx = 4; break;
  76                case 129 ... BIO_MAX_PAGES: *idx = 5; break;
  77                default:
  78                        return NULL;
  79        }
  80        /*
  81         * idx now points to the pool we want to allocate from
  82         */
  83        bp = bvec_array + *idx;
  84
  85        bvl = mempool_alloc(bp->pool, gfp_mask);
  86        if (bvl)
  87                memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
  88        return bvl;
  89}
  90
  91/*
  92 * default destructor for a bio allocated with bio_alloc()
  93 */
  94void bio_destructor(struct bio *bio)
  95{
  96        const int pool_idx = BIO_POOL_IDX(bio);
  97        struct biovec_pool *bp = bvec_array + pool_idx;
  98
  99        BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
 100
 101        /*
 102         * cloned bio doesn't own the veclist
 103         */
 104        if (!bio_flagged(bio, BIO_CLONED))
 105                mempool_free(bio->bi_io_vec, bp->pool);
 106
 107        mempool_free(bio, bio_pool);
 108}
 109
 110inline void bio_init(struct bio *bio)
 111{
 112        bio->bi_next = NULL;
 113        bio->bi_flags = 1 << BIO_UPTODATE;
 114        bio->bi_rw = 0;
 115        bio->bi_vcnt = 0;
 116        bio->bi_idx = 0;
 117        bio->bi_phys_segments = 0;
 118        bio->bi_hw_segments = 0;
 119        bio->bi_size = 0;
 120        bio->bi_max_vecs = 0;
 121        bio->bi_end_io = NULL;
 122        atomic_set(&bio->bi_cnt, 1);
 123        bio->bi_private = NULL;
 124}
 125
 126/**
 127 * bio_alloc - allocate a bio for I/O
 128 * @gfp_mask:   the GFP_ mask given to the slab allocator
 129 * @nr_iovecs:  number of iovecs to pre-allocate
 130 *
 131 * Description:
 132 *   bio_alloc will first try it's on mempool to satisfy the allocation.
 133 *   If %__GFP_WAIT is set then we will block on the internal pool waiting
 134 *   for a &struct bio to become free.
 135 **/
 136struct bio *bio_alloc(int gfp_mask, int nr_iovecs)
 137{
 138        struct bio_vec *bvl = NULL;
 139        unsigned long idx;
 140        struct bio *bio;
 141
 142        bio = mempool_alloc(bio_pool, gfp_mask);
 143        if (unlikely(!bio))
 144                goto out;
 145
 146        bio_init(bio);
 147
 148        if (unlikely(!nr_iovecs))
 149                goto noiovec;
 150
 151        bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx);
 152        if (bvl) {
 153                bio->bi_flags |= idx << BIO_POOL_OFFSET;
 154                bio->bi_max_vecs = bvec_array[idx].nr_vecs;
 155noiovec:
 156                bio->bi_io_vec = bvl;
 157                bio->bi_destructor = bio_destructor;
 158out:
 159                return bio;
 160        }
 161
 162        mempool_free(bio, bio_pool);
 163        bio = NULL;
 164        goto out;
 165}
 166
 167/**
 168 * bio_put - release a reference to a bio
 169 * @bio:   bio to release reference to
 170 *
 171 * Description:
 172 *   Put a reference to a &struct bio, either one you have gotten with
 173 *   bio_alloc or bio_get. The last put of a bio will free it.
 174 **/
 175void bio_put(struct bio *bio)
 176{
 177        BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
 178
 179        /*
 180         * last put frees it
 181         */
 182        if (atomic_dec_and_test(&bio->bi_cnt)) {
 183                bio->bi_next = NULL;
 184                bio->bi_destructor(bio);
 185        }
 186}
 187
 188inline int bio_phys_segments(request_queue_t *q, struct bio *bio)
 189{
 190        if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
 191                blk_recount_segments(q, bio);
 192
 193        return bio->bi_phys_segments;
 194}
 195
 196inline int bio_hw_segments(request_queue_t *q, struct bio *bio)
 197{
 198        if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
 199                blk_recount_segments(q, bio);
 200
 201        return bio->bi_hw_segments;
 202}
 203
 204/**
 205 *      __bio_clone     -       clone a bio
 206 *      @bio: destination bio
 207 *      @bio_src: bio to clone
 208 *
 209 *      Clone a &bio. Caller will own the returned bio, but not
 210 *      the actual data it points to. Reference count of returned
 211 *      bio will be one.
 212 */
 213inline void __bio_clone(struct bio *bio, struct bio *bio_src)
 214{
 215        bio->bi_io_vec = bio_src->bi_io_vec;
 216
 217        bio->bi_sector = bio_src->bi_sector;
 218        bio->bi_bdev = bio_src->bi_bdev;
 219        bio->bi_flags |= 1 << BIO_CLONED;
 220        bio->bi_rw = bio_src->bi_rw;
 221
 222        /*
 223         * notes -- maybe just leave bi_idx alone. assume identical mapping
 224         * for the clone
 225         */
 226        bio->bi_vcnt = bio_src->bi_vcnt;
 227        bio->bi_idx = bio_src->bi_idx;
 228        if (bio_flagged(bio, BIO_SEG_VALID)) {
 229                bio->bi_phys_segments = bio_src->bi_phys_segments;
 230                bio->bi_hw_segments = bio_src->bi_hw_segments;
 231                bio->bi_flags |= (1 << BIO_SEG_VALID);
 232        }
 233        bio->bi_size = bio_src->bi_size;
 234
 235        /*
 236         * cloned bio does not own the bio_vec, so users cannot fiddle with
 237         * it. clear bi_max_vecs and clear the BIO_POOL_BITS to make this
 238         * apparent
 239         */
 240        bio->bi_max_vecs = 0;
 241        bio->bi_flags &= (BIO_POOL_MASK - 1);
 242}
 243
 244/**
 245 *      bio_clone       -       clone a bio
 246 *      @bio: bio to clone
 247 *      @gfp_mask: allocation priority
 248 *
 249 *      Like __bio_clone, only also allocates the returned bio
 250 */
 251struct bio *bio_clone(struct bio *bio, int gfp_mask)
 252{
 253        struct bio *b = bio_alloc(gfp_mask, 0);
 254
 255        if (b)
 256                __bio_clone(b, bio);
 257
 258        return b;
 259}
 260
 261/**
 262 *      bio_get_nr_vecs         - return approx number of vecs
 263 *      @bdev:  I/O target
 264 *
 265 *      Return the approximate number of pages we can send to this target.
 266 *      There's no guarantee that you will be able to fit this number of pages
 267 *      into a bio, it does not account for dynamic restrictions that vary
 268 *      on offset.
 269 */
 270int bio_get_nr_vecs(struct block_device *bdev)
 271{
 272        request_queue_t *q = bdev_get_queue(bdev);
 273        int nr_pages;
 274
 275        nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 276        if (nr_pages > q->max_phys_segments)
 277                nr_pages = q->max_phys_segments;
 278        if (nr_pages > q->max_hw_segments)
 279                nr_pages = q->max_hw_segments;
 280
 281        return nr_pages;
 282}
 283
 284/**
 285 *      bio_add_page    -       attempt to add page to bio
 286 *      @bio: destination bio
 287 *      @page: page to add
 288 *      @len: vec entry length
 289 *      @offset: vec entry offset
 290 *
 291 *      Attempt to add a page to the bio_vec maplist. This can fail for a
 292 *      number of reasons, such as the bio being full or target block
 293 *      device limitations.
 294 */
 295int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
 296                 unsigned int offset)
 297{
 298        request_queue_t *q = bdev_get_queue(bio->bi_bdev);
 299        int retried_segments = 0;
 300        struct bio_vec *bvec;
 301
 302        /*
 303         * cloned bio must not modify vec list
 304         */
 305        if (unlikely(bio_flagged(bio, BIO_CLONED)))
 306                return 0;
 307
 308        if (bio->bi_vcnt >= bio->bi_max_vecs)
 309                return 0;
 310
 311        if (((bio->bi_size + len) >> 9) > q->max_sectors)
 312                return 0;
 313
 314        /*
 315         * we might lose a segment or two here, but rather that than
 316         * make this too complex.
 317         */
 318
 319        while (bio_phys_segments(q, bio) >= q->max_phys_segments
 320            || bio_hw_segments(q, bio) >= q->max_hw_segments) {
 321
 322                if (retried_segments)
 323                        return 0;
 324
 325                bio->bi_flags &= ~(1 << BIO_SEG_VALID);
 326                retried_segments = 1;
 327        }
 328
 329        /*
 330         * setup the new entry, we might clear it again later if we
 331         * cannot add the page
 332         */
 333        bvec = &bio->bi_io_vec[bio->bi_vcnt];
 334        bvec->bv_page = page;
 335        bvec->bv_len = len;
 336        bvec->bv_offset = offset;
 337
 338        /*
 339         * if queue has other restrictions (eg varying max sector size
 340         * depending on offset), it can specify a merge_bvec_fn in the
 341         * queue to get further control
 342         */
 343        if (q->merge_bvec_fn) {
 344                /*
 345                 * merge_bvec_fn() returns number of bytes it can accept
 346                 * at this offset
 347                 */
 348                if (q->merge_bvec_fn(q, bio, bvec) < len) {
 349                        bvec->bv_page = NULL;
 350                        bvec->bv_len = 0;
 351                        bvec->bv_offset = 0;
 352                        return 0;
 353                }
 354        }
 355
 356        bio->bi_vcnt++;
 357        bio->bi_phys_segments++;
 358        bio->bi_hw_segments++;
 359        bio->bi_size += len;
 360        return len;
 361}
 362
 363static struct bio *__bio_map_user(struct block_device *bdev,
 364                                  unsigned long uaddr, unsigned int len,
 365                                  int write_to_vm)
 366{
 367        unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 368        unsigned long start = uaddr >> PAGE_SHIFT;
 369        const int nr_pages = end - start;
 370        request_queue_t *q = bdev_get_queue(bdev);
 371        int ret, offset, i;
 372        struct page **pages;
 373        struct bio *bio;
 374
 375        /*
 376         * transfer and buffer must be aligned to at least hardsector
 377         * size for now, in the future we can relax this restriction
 378         */
 379        if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
 380                return NULL;
 381
 382        bio = bio_alloc(GFP_KERNEL, nr_pages);
 383        if (!bio)
 384                return NULL;
 385
 386        pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
 387        if (!pages)
 388                goto out;
 389
 390        down_read(&current->mm->mmap_sem);
 391        ret = get_user_pages(current, current->mm, uaddr, nr_pages,
 392                                                write_to_vm, 0, pages, NULL);
 393        up_read(&current->mm->mmap_sem);
 394
 395        if (ret < nr_pages)
 396                goto out;
 397
 398        bio->bi_bdev = bdev;
 399
 400        offset = uaddr & ~PAGE_MASK;
 401        for (i = 0; i < nr_pages; i++) {
 402                unsigned int bytes = PAGE_SIZE - offset;
 403
 404                if (len <= 0)
 405                        break;
 406
 407                if (bytes > len)
 408                        bytes = len;
 409
 410                /*
 411                 * sorry...
 412                 */
 413                if (bio_add_page(bio, pages[i], bytes, offset) < bytes)
 414                        break;
 415
 416                len -= bytes;
 417                offset = 0;
 418        }
 419
 420        /*
 421         * release the pages we didn't map into the bio, if any
 422         */
 423        while (i < nr_pages)
 424                page_cache_release(pages[i++]);
 425
 426        kfree(pages);
 427
 428        /*
 429         * set data direction, and check if mapped pages need bouncing
 430         */
 431        if (!write_to_vm)
 432                bio->bi_rw |= (1 << BIO_RW);
 433
 434        blk_queue_bounce(q, &bio);
 435        return bio;
 436out:
 437        kfree(pages);
 438        bio_put(bio);
 439        return NULL;
 440}
 441
 442/**
 443 *      bio_map_user    -       map user address into bio
 444 *      @bdev: destination block device
 445 *      @uaddr: start of user address
 446 *      @len: length in bytes
 447 *      @write_to_vm: bool indicating writing to pages or not
 448 *
 449 *      Map the user space address into a bio suitable for io to a block
 450 *      device.
 451 */
 452struct bio *bio_map_user(struct block_device *bdev, unsigned long uaddr,
 453                         unsigned int len, int write_to_vm)
 454{
 455        struct bio *bio;
 456
 457        bio = __bio_map_user(bdev, uaddr, len, write_to_vm);
 458
 459        if (bio) {
 460                /*
 461                 * subtle -- if __bio_map_user() ended up bouncing a bio,
 462                 * it would normally disappear when its bi_end_io is run.
 463                 * however, we need it for the unmap, so grab an extra
 464                 * reference to it
 465                 */
 466                bio_get(bio);
 467
 468                if (bio->bi_size < len) {
 469                        bio_endio(bio, bio->bi_size, 0);
 470                        bio_unmap_user(bio, 0);
 471                        return NULL;
 472                }
 473        }
 474
 475        return bio;
 476}
 477
 478static void __bio_unmap_user(struct bio *bio, int write_to_vm)
 479{
 480        struct bio_vec *bvec;
 481        int i;
 482
 483        /*
 484         * find original bio if it was bounced
 485         */
 486        if (bio->bi_private) {
 487                /*
 488                 * someone stole our bio, must not happen
 489                 */
 490                BUG_ON(!bio_flagged(bio, BIO_BOUNCED));
 491        
 492                bio = bio->bi_private;
 493        }
 494
 495        /*
 496         * make sure we dirty pages we wrote to
 497         */
 498        __bio_for_each_segment(bvec, bio, i, 0) {
 499                if (write_to_vm)
 500                        set_page_dirty_lock(bvec->bv_page);
 501
 502                page_cache_release(bvec->bv_page);
 503        }
 504
 505        bio_put(bio);
 506}
 507
 508/**
 509 *      bio_unmap_user  -       unmap a bio
 510 *      @bio:           the bio being unmapped
 511 *      @write_to_vm:   bool indicating whether pages were written to
 512 *
 513 *      Unmap a bio previously mapped by bio_map_user(). The @write_to_vm
 514 *      must be the same as passed into bio_map_user(). Must be called with
 515 *      a process context.
 516 *
 517 *      bio_unmap_user() may sleep.
 518 */
 519void bio_unmap_user(struct bio *bio, int write_to_vm)
 520{
 521        __bio_unmap_user(bio, write_to_vm);
 522        bio_put(bio);
 523}
 524
 525/*
 526 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
 527 * for performing direct-IO in BIOs.
 528 *
 529 * The problem is that we cannot run set_page_dirty() from interrupt context
 530 * because the required locks are not interrupt-safe.  So what we can do is to
 531 * mark the pages dirty _before_ performing IO.  And in interrupt context,
 532 * check that the pages are still dirty.   If so, fine.  If not, redirty them
 533 * in process context.
 534 *
 535 * We special-case compound pages here: normally this means reads into hugetlb
 536 * pages.  The logic in here doesn't really work right for compound pages
 537 * because the VM does not uniformly chase down the head page in all cases.
 538 * But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
 539 * handle them at all.  So we skip compound pages here at an early stage.
 540 *
 541 * Note that this code is very hard to test under normal circumstances because
 542 * direct-io pins the pages with get_user_pages().  This makes
 543 * is_page_cache_freeable return false, and the VM will not clean the pages.
 544 * But other code (eg, pdflush) could clean the pages if they are mapped
 545 * pagecache.
 546 *
 547 * Simply disabling the call to bio_set_pages_dirty() is a good way to test the
 548 * deferred bio dirtying paths.
 549 */
 550
 551/*
 552 * bio_set_pages_dirty() will mark all the bio's pages as dirty.
 553 */
 554void bio_set_pages_dirty(struct bio *bio)
 555{
 556        struct bio_vec *bvec = bio->bi_io_vec;
 557        int i;
 558
 559        for (i = 0; i < bio->bi_vcnt; i++) {
 560                struct page *page = bvec[i].bv_page;
 561
 562                if (page && !PageCompound(page))
 563                        set_page_dirty_lock(page);
 564        }
 565}
 566
 567static void bio_release_pages(struct bio *bio)
 568{
 569        struct bio_vec *bvec = bio->bi_io_vec;
 570        int i;
 571
 572        for (i = 0; i < bio->bi_vcnt; i++) {
 573                struct page *page = bvec[i].bv_page;
 574
 575                if (page)
 576                        put_page(page);
 577        }
 578}
 579
 580/*
 581 * bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
 582 * If they are, then fine.  If, however, some pages are clean then they must
 583 * have been written out during the direct-IO read.  So we take another ref on
 584 * the BIO and the offending pages and re-dirty the pages in process context.
 585 *
 586 * It is expected that bio_check_pages_dirty() will wholly own the BIO from
 587 * here on.  It will run one page_cache_release() against each page and will
 588 * run one bio_put() against the BIO.
 589 */
 590
 591static void bio_dirty_fn(void *data);
 592
 593static DECLARE_WORK(bio_dirty_work, bio_dirty_fn, NULL);
 594static spinlock_t bio_dirty_lock = SPIN_LOCK_UNLOCKED;
 595static struct bio *bio_dirty_list = NULL;
 596
 597/*
 598 * This runs in process context
 599 */
 600static void bio_dirty_fn(void *data)
 601{
 602        unsigned long flags;
 603        struct bio *bio;
 604
 605        spin_lock_irqsave(&bio_dirty_lock, flags);
 606        bio = bio_dirty_list;
 607        bio_dirty_list = NULL;
 608        spin_unlock_irqrestore(&bio_dirty_lock, flags);
 609
 610        while (bio) {
 611                struct bio *next = bio->bi_private;
 612
 613                bio_set_pages_dirty(bio);
 614                bio_release_pages(bio);
 615                bio_put(bio);
 616                bio = next;
 617        }
 618}
 619
 620void bio_check_pages_dirty(struct bio *bio)
 621{
 622        struct bio_vec *bvec = bio->bi_io_vec;
 623        int nr_clean_pages = 0;
 624        int i;
 625
 626        for (i = 0; i < bio->bi_vcnt; i++) {
 627                struct page *page = bvec[i].bv_page;
 628
 629                if (PageDirty(page) || PageCompound(page)) {
 630                        page_cache_release(page);
 631                        bvec[i].bv_page = NULL;
 632                } else {
 633                        nr_clean_pages++;
 634                }
 635        }
 636
 637        if (nr_clean_pages) {
 638                unsigned long flags;
 639
 640                spin_lock_irqsave(&bio_dirty_lock, flags);
 641                bio->bi_private = bio_dirty_list;
 642                bio_dirty_list = bio;
 643                spin_unlock_irqrestore(&bio_dirty_lock, flags);
 644                schedule_work(&bio_dirty_work);
 645        } else {
 646                bio_put(bio);
 647        }
 648}
 649
 650/**
 651 * bio_endio - end I/O on a bio
 652 * @bio:        bio
 653 * @bytes_done: number of bytes completed
 654 * @error:      error, if any
 655 *
 656 * Description:
 657 *   bio_endio() will end I/O on @bytes_done number of bytes. This may be
 658 *   just a partial part of the bio, or it may be the whole bio. bio_endio()
 659 *   is the preferred way to end I/O on a bio, it takes care of decrementing
 660 *   bi_size and clearing BIO_UPTODATE on error. @error is 0 on success, and
 661 *   and one of the established -Exxxx (-EIO, for instance) error values in
 662 *   case something went wrong. Noone should call bi_end_io() directly on
 663 *   a bio unless they own it and thus know that it has an end_io function.
 664 **/
 665void bio_endio(struct bio *bio, unsigned int bytes_done, int error)
 666{
 667        if (error)
 668                clear_bit(BIO_UPTODATE, &bio->bi_flags);
 669
 670        if (unlikely(bytes_done > bio->bi_size)) {
 671                printk("%s: want %u bytes done, only %u left\n", __FUNCTION__,
 672                                                bytes_done, bio->bi_size);
 673                bytes_done = bio->bi_size;
 674        }
 675
 676        bio->bi_size -= bytes_done;
 677        bio->bi_sector += (bytes_done >> 9);
 678
 679        if (bio->bi_end_io)
 680                bio->bi_end_io(bio, bytes_done, error);
 681}
 682
 683void bio_pair_release(struct bio_pair *bp)
 684{
 685        if (atomic_dec_and_test(&bp->cnt)) {
 686                struct bio *master = bp->bio1.bi_private;
 687
 688                bio_endio(master, master->bi_size, bp->error);
 689                mempool_free(bp, bp->bio2.bi_private);
 690        }
 691}
 692
 693static int bio_pair_end_1(struct bio * bi, unsigned int done, int err)
 694{
 695        struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
 696
 697        if (bi->bi_size)
 698                return 1;
 699        if (err)
 700                bp->error = err;
 701
 702        bio_pair_release(bp);
 703        return 0;
 704}
 705
 706static int bio_pair_end_2(struct bio * bi, unsigned int done, int err)
 707{
 708        struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
 709
 710        if (bi->bi_size)
 711                return 1;
 712        if (err)
 713                bp->error = err;
 714
 715        bio_pair_release(bp);
 716        return 0;
 717}
 718
 719/*
 720 * split a bio - only worry about a bio with a single page
 721 * in it's iovec
 722 */
 723struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
 724{
 725        struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO);
 726
 727        if (!bp)
 728                return bp;
 729
 730        BUG_ON(bi->bi_vcnt != 1);
 731        BUG_ON(bi->bi_idx != 0);
 732        atomic_set(&bp->cnt, 3);
 733        bp->error = 0;
 734        bp->bio1 = *bi;
 735        bp->bio2 = *bi;
 736        bp->bio2.bi_sector += first_sectors;
 737        bp->bio2.bi_size -= first_sectors << 9;
 738        bp->bio1.bi_size = first_sectors << 9;
 739
 740        bp->bv1 = bi->bi_io_vec[0];
 741        bp->bv2 = bi->bi_io_vec[0];
 742        bp->bv2.bv_offset += first_sectors << 9;
 743        bp->bv2.bv_len -= first_sectors << 9;
 744        bp->bv1.bv_len = first_sectors << 9;
 745
 746        bp->bio1.bi_io_vec = &bp->bv1;
 747        bp->bio2.bi_io_vec = &bp->bv2;
 748
 749        bp->bio1.bi_end_io = bio_pair_end_1;
 750        bp->bio2.bi_end_io = bio_pair_end_2;
 751
 752        bp->bio1.bi_private = bi;
 753        bp->bio2.bi_private = pool;
 754
 755        return bp;
 756}
 757
 758static void *bio_pair_alloc(int gfp_flags, void *data)
 759{
 760        return kmalloc(sizeof(struct bio_pair), gfp_flags);
 761}
 762
 763static void bio_pair_free(void *bp, void *data)
 764{
 765        kfree(bp);
 766}
 767
 768static void __init biovec_init_pools(void)
 769{
 770        int i, size, megabytes, pool_entries = BIO_POOL_SIZE;
 771        int scale = BIOVEC_NR_POOLS;
 772
 773        megabytes = nr_free_pages() >> (20 - PAGE_SHIFT);
 774
 775        /*
 776         * find out where to start scaling
 777         */
 778        if (megabytes <= 16)
 779                scale = 0;
 780        else if (megabytes <= 32)
 781                scale = 1;
 782        else if (megabytes <= 64)
 783                scale = 2;
 784        else if (megabytes <= 96)
 785                scale = 3;
 786        else if (megabytes <= 128)
 787                scale = 4;
 788
 789        /*
 790         * scale number of entries
 791         */
 792        pool_entries = megabytes * 2;
 793        if (pool_entries > 256)
 794                pool_entries = 256;
 795
 796        for (i = 0; i < BIOVEC_NR_POOLS; i++) {
 797                struct biovec_pool *bp = bvec_array + i;
 798
 799                size = bp->nr_vecs * sizeof(struct bio_vec);
 800
 801                bp->slab = kmem_cache_create(bp->name, size, 0,
 802                                                SLAB_HWCACHE_ALIGN, NULL, NULL);
 803                if (!bp->slab)
 804                        panic("biovec: can't init slab cache\n");
 805
 806                if (i >= scale)
 807                        pool_entries >>= 1;
 808
 809                bp->pool = mempool_create(pool_entries, mempool_alloc_slab,
 810                                        mempool_free_slab, bp->slab);
 811                if (!bp->pool)
 812                        panic("biovec: can't init mempool\n");
 813        }
 814}
 815
 816static int __init init_bio(void)
 817{
 818        bio_slab = kmem_cache_create("bio", sizeof(struct bio), 0,
 819                                        SLAB_HWCACHE_ALIGN, NULL, NULL);
 820        if (!bio_slab)
 821                panic("bio: can't create slab cache\n");
 822        bio_pool = mempool_create(BIO_POOL_SIZE, mempool_alloc_slab, mempool_free_slab, bio_slab);
 823        if (!bio_pool)
 824                panic("bio: can't create mempool\n");
 825
 826        biovec_init_pools();
 827
 828        bio_split_pool = mempool_create(BIO_SPLIT_ENTRIES, bio_pair_alloc, bio_pair_free, NULL);
 829        if (!bio_split_pool)
 830                panic("bio: can't create split pool\n");
 831
 832        return 0;
 833}
 834
 835subsys_initcall(init_bio);
 836
 837EXPORT_SYMBOL(bio_alloc);
 838EXPORT_SYMBOL(bio_put);
 839EXPORT_SYMBOL(bio_endio);
 840EXPORT_SYMBOL(bio_init);
 841EXPORT_SYMBOL(__bio_clone);
 842EXPORT_SYMBOL(bio_clone);
 843EXPORT_SYMBOL(bio_phys_segments);
 844EXPORT_SYMBOL(bio_hw_segments);
 845EXPORT_SYMBOL(bio_add_page);
 846EXPORT_SYMBOL(bio_get_nr_vecs);
 847EXPORT_SYMBOL(bio_map_user);
 848EXPORT_SYMBOL(bio_unmap_user);
 849EXPORT_SYMBOL(bio_pair_release);
 850EXPORT_SYMBOL(bio_split);
 851EXPORT_SYMBOL(bio_split_pool);
 852
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.