linux-old/drivers/block/ll_rw_blk.c
<<
>>
Prefs
   1/*
   2 *  linux/drivers/block/ll_rw_blk.c
   3 *
   4 * Copyright (C) 1991, 1992 Linus Torvalds
   5 * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
   6 * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
   7 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
   8 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
   9 */
  10
  11/*
  12 * This handles all read/write requests to block devices
  13 */
  14#include <linux/sched.h>
  15#include <linux/kernel.h>
  16#include <linux/kernel_stat.h>
  17#include <linux/errno.h>
  18#include <linux/string.h>
  19#include <linux/config.h>
  20#include <linux/locks.h>
  21#include <linux/mm.h>
  22#include <linux/swap.h>
  23#include <linux/init.h>
  24#include <linux/smp_lock.h>
  25#include <linux/completion.h>
  26#include <linux/bootmem.h>
  27
  28#include <asm/system.h>
  29#include <asm/io.h>
  30#include <linux/blk.h>
  31#include <linux/highmem.h>
  32#include <linux/slab.h>
  33#include <linux/module.h>
  34
  35/*
  36 * MAC Floppy IWM hooks
  37 */
  38
  39#ifdef CONFIG_MAC_FLOPPY_IWM
  40extern int mac_floppy_init(void);
  41#endif
  42
  43/*
  44 * For the allocated request tables
  45 */
  46static kmem_cache_t *request_cachep;
  47
  48/*
  49 * The "disk" task queue is used to start the actual requests
  50 * after a plug
  51 */
  52DECLARE_TASK_QUEUE(tq_disk);
  53
  54/*
  55 * Protect the request list against multiple users..
  56 *
  57 * With this spinlock the Linux block IO subsystem is 100% SMP threaded
  58 * from the IRQ event side, and almost 100% SMP threaded from the syscall
  59 * side (we still have protect against block device array operations, and
  60 * the do_request() side is casually still unsafe. The kernel lock protects
  61 * this part currently.).
  62 *
  63 * there is a fair chance that things will work just OK if these functions
  64 * are called with no global kernel lock held ...
  65 */
  66spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
  67
  68/* This specifies how many sectors to read ahead on the disk. */
  69
  70int read_ahead[MAX_BLKDEV];
  71
  72/* blk_dev_struct is:
  73 *      *request_fn
  74 *      *current_request
  75 */
  76struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
  77
  78/*
  79 * blk_size contains the size of all block-devices in units of 1024 byte
  80 * sectors:
  81 *
  82 * blk_size[MAJOR][MINOR]
  83 *
  84 * if (!blk_size[MAJOR]) then no minor size checking is done.
  85 */
  86int * blk_size[MAX_BLKDEV];
  87
  88/*
  89 * blksize_size contains the size of all block-devices:
  90 *
  91 * blksize_size[MAJOR][MINOR]
  92 *
  93 * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
  94 */
  95int * blksize_size[MAX_BLKDEV];
  96
  97/*
  98 * hardsect_size contains the size of the hardware sector of a device.
  99 *
 100 * hardsect_size[MAJOR][MINOR]
 101 *
 102 * if (!hardsect_size[MAJOR])
 103 *              then 512 bytes is assumed.
 104 * else
 105 *              sector_size is hardsect_size[MAJOR][MINOR]
 106 * This is currently set by some scsi devices and read by the msdos fs driver.
 107 * Other uses may appear later.
 108 */
 109int * hardsect_size[MAX_BLKDEV];
 110
 111/*
 112 * The following tunes the read-ahead algorithm in mm/filemap.c
 113 */
 114int * max_readahead[MAX_BLKDEV];
 115
 116/*
 117 * Max number of sectors per request
 118 */
 119int * max_sectors[MAX_BLKDEV];
 120
 121unsigned long blk_max_low_pfn, blk_max_pfn;
 122int blk_nohighio = 0;
 123
 124static inline int get_max_sectors(kdev_t dev)
 125{
 126        if (!max_sectors[MAJOR(dev)])
 127                return MAX_SECTORS;
 128        return max_sectors[MAJOR(dev)][MINOR(dev)];
 129}
 130
 131inline request_queue_t *blk_get_queue(kdev_t dev)
 132{
 133        struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
 134
 135        if (bdev->queue)
 136                return bdev->queue(dev);
 137        else
 138                return &blk_dev[MAJOR(dev)].request_queue;
 139}
 140
 141static int __blk_cleanup_queue(struct request_list *list)
 142{
 143        struct list_head *head = &list->free;
 144        struct request *rq;
 145        int i = 0;
 146
 147        while (!list_empty(head)) {
 148                rq = list_entry(head->next, struct request, queue);
 149                list_del(&rq->queue);
 150                kmem_cache_free(request_cachep, rq);
 151                i++;
 152        };
 153
 154        if (i != list->count)
 155                printk("request list leak!\n");
 156
 157        list->count = 0;
 158        return i;
 159}
 160
 161/**
 162 * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
 163 * @q:    the request queue to be released
 164 *
 165 * Description:
 166 *     blk_cleanup_queue is the pair to blk_init_queue().  It should
 167 *     be called when a request queue is being released; typically
 168 *     when a block device is being de-registered.  Currently, its
 169 *     primary task it to free all the &struct request structures that
 170 *     were allocated to the queue.
 171 * Caveat: 
 172 *     Hopefully the low level driver will have finished any
 173 *     outstanding requests first...
 174 **/
 175void blk_cleanup_queue(request_queue_t * q)
 176{
 177        int count = q->nr_requests;
 178
 179        count -= __blk_cleanup_queue(&q->rq);
 180
 181        if (count)
 182                printk("blk_cleanup_queue: leaked requests (%d)\n", count);
 183        if (atomic_read(&q->nr_sectors))
 184                printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors));
 185
 186        memset(q, 0, sizeof(*q));
 187}
 188
 189/**
 190 * blk_queue_headactive - indicate whether head of request queue may be active
 191 * @q:       The queue which this applies to.
 192 * @active:  A flag indication where the head of the queue is active.
 193 *
 194 * Description:
 195 *    The driver for a block device may choose to leave the currently active
 196 *    request on the request queue, removing it only when it has completed.
 197 *    The queue handling routines assume this by default for safety reasons
 198 *    and will not involve the head of the request queue in any merging or
 199 *    reordering of requests when the queue is unplugged (and thus may be
 200 *    working on this particular request).
 201 *
 202 *    If a driver removes requests from the queue before processing them, then
 203 *    it may indicate that it does so, there by allowing the head of the queue
 204 *    to be involved in merging and reordering.  This is done be calling
 205 *    blk_queue_headactive() with an @active flag of %0.
 206 *
 207 *    If a driver processes several requests at once, it must remove them (or
 208 *    at least all but one of them) from the request queue.
 209 *
 210 *    When a queue is plugged the head will be assumed to be inactive.
 211 **/
 212 
 213void blk_queue_headactive(request_queue_t * q, int active)
 214{
 215        q->head_active = active;
 216}
 217
 218/**
 219 * blk_queue_throttle_sectors - indicates you will call sector throttling funcs
 220 * @q:       The queue which this applies to.
 221 * @active:  A flag indication if you want sector throttling on
 222 *
 223 * Description:
 224 * The sector throttling code allows us to put a limit on the number of
 225 * sectors pending io to the disk at a given time, sending @active nonzero
 226 * indicates you will call blk_started_sectors and blk_finished_sectors in
 227 * addition to calling blk_started_io and blk_finished_io in order to
 228 * keep track of the number of sectors in flight.
 229 **/
 230 
 231void blk_queue_throttle_sectors(request_queue_t * q, int active)
 232{
 233        q->can_throttle = active;
 234}
 235
 236/**
 237 * blk_queue_make_request - define an alternate make_request function for a device
 238 * @q:  the request queue for the device to be affected
 239 * @mfn: the alternate make_request function
 240 *
 241 * Description:
 242 *    The normal way for &struct buffer_heads to be passed to a device
 243 *    driver is for them to be collected into requests on a request
 244 *    queue, and then to allow the device driver to select requests
 245 *    off that queue when it is ready.  This works well for many block
 246 *    devices. However some block devices (typically virtual devices
 247 *    such as md or lvm) do not benefit from the processing on the
 248 *    request queue, and are served best by having the requests passed
 249 *    directly to them.  This can be achieved by providing a function
 250 *    to blk_queue_make_request().
 251 *
 252 * Caveat:
 253 *    The driver that does this *must* be able to deal appropriately
 254 *    with buffers in "highmemory", either by calling bh_kmap() to get
 255 *    a kernel mapping, to by calling create_bounce() to create a
 256 *    buffer in normal memory.
 257 **/
 258
 259void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 260{
 261        q->make_request_fn = mfn;
 262}
 263
 264/**
 265 * blk_queue_bounce_limit - set bounce buffer limit for queue
 266 * @q:  the request queue for the device
 267 * @dma_addr:   bus address limit
 268 *
 269 * Description:
 270 *    Different hardware can have different requirements as to what pages
 271 *    it can do I/O directly to. A low level driver can call
 272 *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
 273 *    buffers for doing I/O to pages residing above @page. By default
 274 *    the block layer sets this to the highest numbered "low" memory page.
 275 **/
 276void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
 277{
 278        unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
 279        unsigned long mb = dma_addr >> 20;
 280        static request_queue_t *old_q;
 281
 282        /*
 283         * keep this for debugging for now...
 284         */
 285        if (dma_addr != BLK_BOUNCE_HIGH && q != old_q) {
 286                old_q = q;
 287                printk("blk: queue %p, ", q);
 288                if (dma_addr == BLK_BOUNCE_ANY)
 289                        printk("no I/O memory limit\n");
 290                else
 291                        printk("I/O limit %luMb (mask 0x%Lx)\n", mb,
 292                               (long long) dma_addr);
 293        }
 294
 295        q->bounce_pfn = bounce_pfn;
 296}
 297
 298
 299/*
 300 * can we merge the two segments, or do we need to start a new one?
 301 */
 302inline int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt)
 303{
 304        /*
 305         * if bh and nxt are contigous and don't cross a 4g boundary, it's ok
 306         */
 307        if (BH_CONTIG(bh, nxt) && BH_PHYS_4G(bh, nxt))
 308                return 1;
 309
 310        return 0;
 311}
 312
 313static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
 314{
 315        if (req->nr_segments < max_segments) {
 316                req->nr_segments++;
 317                return 1;
 318        }
 319        return 0;
 320}
 321
 322static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
 323                            struct buffer_head *bh, int max_segments)
 324{
 325        if (blk_seg_merge_ok(req->bhtail, bh))
 326                return 1;
 327
 328        return ll_new_segment(q, req, max_segments);
 329}
 330
 331static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
 332                             struct buffer_head *bh, int max_segments)
 333{
 334        if (blk_seg_merge_ok(bh, req->bh))
 335                return 1;
 336
 337        return ll_new_segment(q, req, max_segments);
 338}
 339
 340static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
 341                                struct request *next, int max_segments)
 342{
 343        int total_segments = req->nr_segments + next->nr_segments;
 344
 345        if (blk_seg_merge_ok(req->bhtail, next->bh))
 346                total_segments--;
 347
 348        if (total_segments > max_segments)
 349                return 0;
 350
 351        req->nr_segments = total_segments;
 352        return 1;
 353}
 354
 355/*
 356 * "plug" the device if there are no outstanding requests: this will
 357 * force the transfer to start only after we have put all the requests
 358 * on the list.
 359 *
 360 * This is called with interrupts off and no requests on the queue.
 361 * (and with the request spinlock acquired)
 362 */
 363static void generic_plug_device(request_queue_t *q, kdev_t dev)
 364{
 365        /*
 366         * no need to replug device
 367         */
 368        if (!list_empty(&q->queue_head) || q->plugged)
 369                return;
 370
 371        q->plugged = 1;
 372        queue_task(&q->plug_tq, &tq_disk);
 373}
 374
 375/*
 376 * remove the plug and let it rip..
 377 */
 378static inline void __generic_unplug_device(request_queue_t *q)
 379{
 380        if (q->plugged) {
 381                q->plugged = 0;
 382                if (!list_empty(&q->queue_head))
 383                        q->request_fn(q);
 384        }
 385}
 386
 387void generic_unplug_device(void *data)
 388{
 389        request_queue_t *q = (request_queue_t *) data;
 390        unsigned long flags;
 391
 392        spin_lock_irqsave(&io_request_lock, flags);
 393        __generic_unplug_device(q);
 394        spin_unlock_irqrestore(&io_request_lock, flags);
 395}
 396
 397/** blk_grow_request_list
 398 *  @q: The &request_queue_t
 399 *  @nr_requests: how many requests are desired
 400 *
 401 * More free requests are added to the queue's free lists, bringing
 402 * the total number of requests to @nr_requests.
 403 *
 404 * The requests are added equally to the request queue's read
 405 * and write freelists.
 406 *
 407 * This function can sleep.
 408 *
 409 * Returns the (new) number of requests which the queue has available.
 410 */
 411int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors)
 412{
 413        unsigned long flags;
 414        /* Several broken drivers assume that this function doesn't sleep,
 415         * this causes system hangs during boot.
 416         * As a temporary fix, make the function non-blocking.
 417         */
 418        spin_lock_irqsave(&io_request_lock, flags);
 419        while (q->nr_requests < nr_requests) {
 420                struct request *rq;
 421
 422                rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
 423                if (rq == NULL)
 424                        break;
 425                memset(rq, 0, sizeof(*rq));
 426                rq->rq_status = RQ_INACTIVE;
 427                list_add(&rq->queue, &q->rq.free);
 428                q->rq.count++;
 429
 430                q->nr_requests++;
 431        }
 432
 433        /*
 434         * Wakeup waiters after both one quarter of the
 435         * max-in-fligh queue and one quarter of the requests
 436         * are available again.
 437         */
 438
 439        q->batch_requests = q->nr_requests / 4;
 440        if (q->batch_requests > 32)
 441                q->batch_requests = 32;
 442        q->batch_sectors = max_queue_sectors / 4;
 443 
 444        q->max_queue_sectors = max_queue_sectors;
 445 
 446        BUG_ON(!q->batch_sectors);
 447        atomic_set(&q->nr_sectors, 0);
 448
 449        spin_unlock_irqrestore(&io_request_lock, flags);
 450        return q->nr_requests;
 451}
 452
 453static void blk_init_free_list(request_queue_t *q)
 454{
 455        struct sysinfo si;
 456        int megs;               /* Total memory, in megabytes */
 457        int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS;
 458  
 459        INIT_LIST_HEAD(&q->rq.free);
 460        q->rq.count = 0;
 461        q->rq.pending[READ] = q->rq.pending[WRITE] = 0;
 462        q->nr_requests = 0;
 463
 464        si_meminfo(&si);
 465        megs = si.totalram >> (20 - PAGE_SHIFT);
 466        nr_requests = MAX_NR_REQUESTS;
 467        if (megs < 30) {
 468                nr_requests /= 2;
 469                max_queue_sectors /= 2;
 470        }
 471        /* notice early if anybody screwed the defaults */
 472        BUG_ON(!nr_requests);
 473        BUG_ON(!max_queue_sectors);
 474 
 475        blk_grow_request_list(q, nr_requests, max_queue_sectors);
 476
 477        init_waitqueue_head(&q->wait_for_requests);
 478
 479        spin_lock_init(&q->queue_lock);
 480}
 481
 482static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
 483
 484/**
 485 * blk_init_queue  - prepare a request queue for use with a block device
 486 * @q:    The &request_queue_t to be initialised
 487 * @rfn:  The function to be called to process requests that have been
 488 *        placed on the queue.
 489 *
 490 * Description:
 491 *    If a block device wishes to use the standard request handling procedures,
 492 *    which sorts requests and coalesces adjacent requests, then it must
 493 *    call blk_init_queue().  The function @rfn will be called when there
 494 *    are requests on the queue that need to be processed.  If the device
 495 *    supports plugging, then @rfn may not be called immediately when requests
 496 *    are available on the queue, but may be called at some time later instead.
 497 *    Plugged queues are generally unplugged when a buffer belonging to one
 498 *    of the requests on the queue is needed, or due to memory pressure.
 499 *
 500 *    @rfn is not required, or even expected, to remove all requests off the
 501 *    queue, but only as many as it can handle at a time.  If it does leave
 502 *    requests on the queue, it is responsible for arranging that the requests
 503 *    get dealt with eventually.
 504 *
 505 *    A global spin lock $io_request_lock must be held while manipulating the
 506 *    requests on the request queue.
 507 *
 508 *    The request on the head of the queue is by default assumed to be
 509 *    potentially active, and it is not considered for re-ordering or merging
 510 *    whenever the given queue is unplugged. This behaviour can be changed with
 511 *    blk_queue_headactive().
 512 *
 513 * Note:
 514 *    blk_init_queue() must be paired with a blk_cleanup_queue() call
 515 *    when the block device is deactivated (such as at module unload).
 516 **/
 517void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
 518{
 519        INIT_LIST_HEAD(&q->queue_head);
 520        elevator_init(&q->elevator, ELEVATOR_LINUS);
 521        blk_init_free_list(q);
 522        q->request_fn           = rfn;
 523        q->back_merge_fn        = ll_back_merge_fn;
 524        q->front_merge_fn       = ll_front_merge_fn;
 525        q->merge_requests_fn    = ll_merge_requests_fn;
 526        q->make_request_fn      = __make_request;
 527        q->plug_tq.sync         = 0;
 528        q->plug_tq.routine      = &generic_unplug_device;
 529        q->plug_tq.data         = q;
 530        q->plugged              = 0;
 531        q->can_throttle         = 0;
 532
 533        /*
 534         * These booleans describe the queue properties.  We set the
 535         * default (and most common) values here.  Other drivers can
 536         * use the appropriate functions to alter the queue properties.
 537         * as appropriate.
 538         */
 539        q->plug_device_fn       = generic_plug_device;
 540        q->head_active          = 1;
 541
 542        blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 543}
 544
 545#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
 546/*
 547 * Get a free request. io_request_lock must be held and interrupts
 548 * disabled on the way in.  Returns NULL if there are no free requests.
 549 */
 550static struct request *get_request(request_queue_t *q, int rw)
 551{
 552        struct request *rq = NULL;
 553        struct request_list *rl = &q->rq;
 554
 555        if (blk_oversized_queue(q)) {
 556                int rlim = q->nr_requests >> 5;
 557
 558                if (rlim < 4)
 559                        rlim = 4;
 560
 561                /*
 562                 * if its a write, or we have more than a handful of reads
 563                 * pending, bail out
 564                 */
 565                if ((rw == WRITE) || (rw == READ && rl->pending[READ] > rlim))
 566                        return NULL;
 567                if (blk_oversized_queue_reads(q))
 568                        return NULL;
 569        }
 570        
 571        if (!list_empty(&rl->free)) {
 572                rq = blkdev_free_rq(&rl->free);
 573                list_del(&rq->queue);
 574                rl->count--;
 575                rl->pending[rw]++;
 576                rq->rq_status = RQ_ACTIVE;
 577                rq->cmd = rw;
 578                rq->special = NULL;
 579                rq->q = q;
 580        }
 581
 582        return rq;
 583}
 584
 585/*
 586 * Here's the request allocation design, low latency version:
 587 *
 588 * 1: Blocking on request exhaustion is a key part of I/O throttling.
 589 * 
 590 * 2: We want to be `fair' to all requesters.  We must avoid starvation, and
 591 *    attempt to ensure that all requesters sleep for a similar duration.  Hence
 592 *    no stealing requests when there are other processes waiting.
 593 *
 594 * There used to be more here, attempting to allow a process to send in a
 595 * number of requests once it has woken up.  But, there's no way to 
 596 * tell if a process has just been woken up, or if it is a new process
 597 * coming in to steal requests from the waiters.  So, we give up and force
 598 * everyone to wait fairly.
 599 * 
 600 * So here's what we do:
 601 * 
 602 *    a) A READA requester fails if free_requests < batch_requests
 603 * 
 604 *       We don't want READA requests to prevent sleepers from ever
 605 *       waking.  Note that READA is used extremely rarely - a few
 606 *       filesystems use it for directory readahead.
 607 * 
 608 *  When a process wants a new request:
 609 * 
 610 *    b) If free_requests == 0, the requester sleeps in FIFO manner, and
 611 *       the queue full condition is set.  The full condition is not
 612 *       cleared until there are no longer any waiters.  Once the full
 613 *       condition is set, all new io must wait, hopefully for a very
 614 *       short period of time.
 615 * 
 616 *  When a request is released:
 617 * 
 618 *    c) If free_requests < batch_requests, do nothing.
 619 * 
 620 *    d) If free_requests >= batch_requests, wake up a single waiter.
 621 *
 622 *   As each waiter gets a request, he wakes another waiter.  We do this
 623 *   to prevent a race where an unplug might get run before a request makes
 624 *   it's way onto the queue.  The result is a cascade of wakeups, so delaying
 625 *   the initial wakeup until we've got batch_requests available helps avoid
 626 *   wakeups where there aren't any requests available yet.
 627 */
 628
 629static struct request *__get_request_wait(request_queue_t *q, int rw)
 630{
 631        register struct request *rq;
 632        DECLARE_WAITQUEUE(wait, current);
 633
 634        add_wait_queue_exclusive(&q->wait_for_requests, &wait);
 635
 636        do {
 637                set_current_state(TASK_UNINTERRUPTIBLE);
 638                spin_lock_irq(&io_request_lock);
 639                if (blk_oversized_queue(q) || q->rq.count == 0) {
 640                        __generic_unplug_device(q);
 641                        spin_unlock_irq(&io_request_lock);
 642                        schedule();
 643                        spin_lock_irq(&io_request_lock);
 644                }
 645                rq = get_request(q, rw);
 646                spin_unlock_irq(&io_request_lock);
 647        } while (rq == NULL);
 648        remove_wait_queue(&q->wait_for_requests, &wait);
 649        current->state = TASK_RUNNING;
 650
 651        return rq;
 652}
 653
 654static void get_request_wait_wakeup(request_queue_t *q, int rw)
 655{
 656        /*
 657         * avoid losing an unplug if a second __get_request_wait did the
 658         * generic_unplug_device while our __get_request_wait was running
 659         * w/o the queue_lock held and w/ our request out of the queue.
 660         */     
 661        if (waitqueue_active(&q->wait_for_requests))
 662                wake_up(&q->wait_for_requests);
 663}
 664
 665/* RO fail safe mechanism */
 666
 667static long ro_bits[MAX_BLKDEV][8];
 668
 669int is_read_only(kdev_t dev)
 670{
 671        int minor,major;
 672
 673        major = MAJOR(dev);
 674        minor = MINOR(dev);
 675        if (major < 0 || major >= MAX_BLKDEV) return 0;
 676        return ro_bits[major][minor >> 5] & (1 << (minor & 31));
 677}
 678
 679void set_device_ro(kdev_t dev,int flag)
 680{
 681        int minor,major;
 682
 683        major = MAJOR(dev);
 684        minor = MINOR(dev);
 685        if (major < 0 || major >= MAX_BLKDEV) return;
 686        if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
 687        else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
 688}
 689
 690inline void drive_stat_acct (kdev_t dev, int rw,
 691                                unsigned long nr_sectors, int new_io)
 692{
 693        unsigned int major = MAJOR(dev);
 694        unsigned int index;
 695
 696        index = disk_index(dev);
 697        if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
 698                return;
 699
 700        kstat.dk_drive[major][index] += new_io;
 701        if (rw == READ) {
 702                kstat.dk_drive_rio[major][index] += new_io;
 703                kstat.dk_drive_rblk[major][index] += nr_sectors;
 704        } else if (rw == WRITE) {
 705                kstat.dk_drive_wio[major][index] += new_io;
 706                kstat.dk_drive_wblk[major][index] += nr_sectors;
 707        } else
 708                printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
 709}
 710
 711#ifdef CONFIG_BLK_STATS
 712/*
 713 * Return up to two hd_structs on which to do IO accounting for a given
 714 * request.
 715 *
 716 * On a partitioned device, we want to account both against the partition
 717 * and against the whole disk.
 718 */
 719static void locate_hd_struct(struct request *req, 
 720                             struct hd_struct **hd1,
 721                             struct hd_struct **hd2)
 722{
 723        struct gendisk *gd;
 724
 725        *hd1 = NULL;
 726        *hd2 = NULL;
 727        
 728        gd = get_gendisk(req->rq_dev);
 729        if (gd && gd->part) {
 730                /* Mask out the partition bits: account for the entire disk */
 731                int devnr = MINOR(req->rq_dev) >> gd->minor_shift;
 732                int whole_minor = devnr << gd->minor_shift;
 733
 734                *hd1 = &gd->part[whole_minor];
 735                if (whole_minor != MINOR(req->rq_dev))
 736                        *hd2= &gd->part[MINOR(req->rq_dev)];
 737        }
 738}
 739
 740/*
 741 * Round off the performance stats on an hd_struct.
 742 *
 743 * The average IO queue length and utilisation statistics are maintained
 744 * by observing the current state of the queue length and the amount of
 745 * time it has been in this state for.
 746 * Normally, that accounting is done on IO completion, but that can result
 747 * in more than a second's worth of IO being accounted for within any one
 748 * second, leading to >100% utilisation.  To deal with that, we do a
 749 * round-off before returning the results when reading /proc/partitions,
 750 * accounting immediately for all queue usage up to the current jiffies and
 751 * restarting the counters again.
 752 */
 753void disk_round_stats(struct hd_struct *hd)
 754{
 755        unsigned long now = jiffies;
 756        
 757        hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change));
 758        hd->last_queue_change = now;
 759
 760        if (hd->ios_in_flight)
 761                hd->io_ticks += (now - hd->last_idle_time);
 762        hd->last_idle_time = now;       
 763}
 764
 765static inline void down_ios(struct hd_struct *hd)
 766{
 767        disk_round_stats(hd);   
 768        --hd->ios_in_flight;
 769}
 770
 771static inline void up_ios(struct hd_struct *hd)
 772{
 773        disk_round_stats(hd);
 774        ++hd->ios_in_flight;
 775}
 776
 777static void account_io_start(struct hd_struct *hd, struct request *req,
 778                             int merge, int sectors)
 779{
 780        switch (req->cmd) {
 781        case READ:
 782                if (merge)
 783                        hd->rd_merges++;
 784                hd->rd_sectors += sectors;
 785                break;
 786        case WRITE:
 787                if (merge)
 788                        hd->wr_merges++;
 789                hd->wr_sectors += sectors;
 790                break;
 791        }
 792        if (!merge)
 793                up_ios(hd);
 794}
 795
 796static void account_io_end(struct hd_struct *hd, struct request *req)
 797{
 798        unsigned long duration = jiffies - req->start_time;
 799        switch (req->cmd) {
 800        case READ:
 801                hd->rd_ticks += duration;
 802                hd->rd_ios++;
 803                break;
 804        case WRITE:
 805                hd->wr_ticks += duration;
 806                hd->wr_ios++;
 807                break;
 808        }
 809        down_ios(hd);
 810}
 811
 812void req_new_io(struct request *req, int merge, int sectors)
 813{
 814        struct hd_struct *hd1, *hd2;
 815
 816        locate_hd_struct(req, &hd1, &hd2);
 817        if (hd1)
 818                account_io_start(hd1, req, merge, sectors);
 819        if (hd2)
 820                account_io_start(hd2, req, merge, sectors);
 821}
 822
 823void req_merged_io(struct request *req)
 824{
 825        struct hd_struct *hd1, *hd2;
 826
 827        locate_hd_struct(req, &hd1, &hd2);
 828        if (hd1)
 829                down_ios(hd1);
 830        if (hd2)        
 831                down_ios(hd2);
 832}
 833
 834void req_finished_io(struct request *req)
 835{
 836        struct hd_struct *hd1, *hd2;
 837
 838        locate_hd_struct(req, &hd1, &hd2);
 839        if (hd1)
 840                account_io_end(hd1, req);
 841        if (hd2)        
 842                account_io_end(hd2, req);
 843}
 844EXPORT_SYMBOL(req_finished_io);
 845#endif /* CONFIG_BLK_STATS */
 846
 847/*
 848 * add-request adds a request to the linked list.
 849 * io_request_lock is held and interrupts disabled, as we muck with the
 850 * request queue list.
 851 *
 852 * By this point, req->cmd is always either READ/WRITE, never READA,
 853 * which is important for drive_stat_acct() above.
 854 */
 855static inline void add_request(request_queue_t * q, struct request * req,
 856                               struct list_head *insert_here)
 857{
 858        drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
 859
 860        if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
 861                spin_unlock_irq(&io_request_lock);
 862                BUG();
 863        }
 864
 865        /*
 866         * elevator indicated where it wants this request to be
 867         * inserted at elevator_merge time
 868         */
 869        list_add(&req->queue, insert_here);
 870}
 871
 872/*
 873 * Must be called with io_request_lock held and interrupts disabled
 874 */
 875void blkdev_release_request(struct request *req)
 876{
 877        request_queue_t *q = req->q;
 878
 879        req->rq_status = RQ_INACTIVE;
 880        req->q = NULL;
 881
 882        /*
 883         * Request may not have originated from ll_rw_blk. if not,
 884         * assume it has free buffers and check waiters
 885         */
 886        if (q) {
 887                struct request_list *rl = &q->rq;
 888                int oversized_batch = 0;
 889
 890                if (q->can_throttle)
 891                        oversized_batch = blk_oversized_queue_batch(q);
 892                rl->count++;
 893                /*
 894                 * paranoia check
 895                 */
 896                if (req->cmd == READ || req->cmd == WRITE)
 897                        rl->pending[req->cmd]--;
 898                if (rl->pending[READ] > q->nr_requests)
 899                        printk("blk: reads: %u\n", rl->pending[READ]);
 900                if (rl->pending[WRITE] > q->nr_requests)
 901                        printk("blk: writes: %u\n", rl->pending[WRITE]);
 902                if (rl->pending[READ] + rl->pending[WRITE] > q->nr_requests)
 903                        printk("blk: r/w: %u + %u > %u\n", rl->pending[READ], rl->pending[WRITE], q->nr_requests);
 904                list_add(&req->queue, &rl->free);
 905                if (rl->count >= q->batch_requests && !oversized_batch) {
 906                        smp_mb();
 907                        if (waitqueue_active(&q->wait_for_requests))
 908                                wake_up(&q->wait_for_requests);
 909                }
 910        }
 911}
 912
 913/*
 914 * Has to be called with the request spinlock acquired
 915 */
 916static void attempt_merge(request_queue_t * q,
 917                          struct request *req,
 918                          int max_sectors,
 919                          int max_segments)
 920{
 921        struct request *next;
 922  
 923        next = blkdev_next_request(req);
 924        if (req->sector + req->nr_sectors != next->sector)
 925                return;
 926        if (req->cmd != next->cmd
 927            || req->rq_dev != next->rq_dev
 928            || req->nr_sectors + next->nr_sectors > max_sectors
 929            || next->waiting)
 930                return;
 931        /*
 932         * If we are not allowed to merge these requests, then
 933         * return.  If we are allowed to merge, then the count
 934         * will have been updated to the appropriate number,
 935         * and we shouldn't do it here too.
 936         */
 937        if (!q->merge_requests_fn(q, req, next, max_segments))
 938                return;
 939
 940        q->elevator.elevator_merge_req_fn(req, next);
 941        req->bhtail->b_reqnext = next->bh;
 942        req->bhtail = next->bhtail;
 943        req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
 944        list_del(&next->queue);
 945
 946        /* One last thing: we have removed a request, so we now have one
 947           less expected IO to complete for accounting purposes. */
 948        req_merged_io(req);
 949
 950        blkdev_release_request(next);
 951}
 952
 953static inline void attempt_back_merge(request_queue_t * q,
 954                                      struct request *req,
 955                                      int max_sectors,
 956                                      int max_segments)
 957{
 958        if (&req->queue == q->queue_head.prev)
 959                return;
 960        attempt_merge(q, req, max_sectors, max_segments);
 961}
 962
 963static inline void attempt_front_merge(request_queue_t * q,
 964                                       struct list_head * head,
 965                                       struct request *req,
 966                                       int max_sectors,
 967                                       int max_segments)
 968{
 969        struct list_head * prev;
 970
 971        prev = req->queue.prev;
 972        if (head == prev)
 973                return;
 974        attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
 975}
 976
 977static int __make_request(request_queue_t * q, int rw,
 978                                  struct buffer_head * bh)
 979{
 980        unsigned int sector, count, sync;
 981        int max_segments = MAX_SEGMENTS;
 982        struct request * req, *freereq = NULL;
 983        int rw_ahead, max_sectors, el_ret;
 984        struct list_head *head, *insert_here;
 985        int latency;
 986        elevator_t *elevator = &q->elevator;
 987        int should_wake = 0;
 988
 989        count = bh->b_size >> 9;
 990        sector = bh->b_rsector;
 991        sync = test_and_clear_bit(BH_Sync, &bh->b_state);
 992
 993        rw_ahead = 0;   /* normal case; gets changed below for READA */
 994        switch (rw) {
 995                case READA:
 996#if 0   /* bread() misinterprets failed READA attempts as IO errors on SMP */
 997                        rw_ahead = 1;
 998#endif
 999                        rw = READ;      /* drop into READ */
1000                case READ:
1001                case WRITE:
1002                        latency = elevator_request_latency(elevator, rw);
1003                        break;
1004                default:
1005                        BUG();
1006                        goto end_io;
1007        }
1008
1009        /* We'd better have a real physical mapping!
1010           Check this bit only if the buffer was dirty and just locked
1011           down by us so at this point flushpage will block and
1012           won't clear the mapped bit under us. */
1013        if (!buffer_mapped(bh))
1014                BUG();
1015
1016        /*
1017         * Temporary solution - in 2.5 this will be done by the lowlevel
1018         * driver. Create a bounce buffer if the buffer data points into
1019         * high memory - keep the original buffer otherwise.
1020         */
1021        bh = blk_queue_bounce(q, rw, bh);
1022
1023/* look for a free request. */
1024        /*
1025         * Try to coalesce the new request with old requests
1026         */
1027        max_sectors = get_max_sectors(bh->b_rdev);
1028
1029        req = NULL;
1030        head = &q->queue_head;
1031        /*
1032         * Now we acquire the request spinlock, we have to be mega careful
1033         * not to schedule or do something nonatomic
1034         */
1035        spin_lock_irq(&io_request_lock);
1036
1037again:
1038        insert_here = head->prev;
1039
1040        if (list_empty(head)) {
1041                q->plug_device_fn(q, bh->b_rdev); /* is atomic */
1042                goto get_rq;
1043        } else if (q->head_active && !q->plugged)
1044                head = head->next;
1045
1046        el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
1047        switch (el_ret) {
1048
1049                case ELEVATOR_BACK_MERGE:
1050                        if (!q->back_merge_fn(q, req, bh, max_segments)) {
1051                                insert_here = &req->queue;
1052                                break;
1053                        }
1054                        req->bhtail->b_reqnext = bh;
1055                        req->bhtail = bh;
1056                        req->nr_sectors = req->hard_nr_sectors += count;
1057                        blk_started_io(count);
1058                        blk_started_sectors(req, count);
1059                        drive_stat_acct(req->rq_dev, req->cmd, count, 0);
1060                        req_new_io(req, 1, count);
1061                        attempt_back_merge(q, req, max_sectors, max_segments);
1062                        goto out;
1063
1064                case ELEVATOR_FRONT_MERGE:
1065                        if (!q->front_merge_fn(q, req, bh, max_segments)) {
1066                                insert_here = req->queue.prev;
1067                                break;
1068                        }
1069                        bh->b_reqnext = req->bh;
1070                        req->bh = bh;
1071                        /*
1072                         * may not be valid, but queues not having bounce
1073                         * enabled for highmem pages must not look at
1074                         * ->buffer anyway
1075                         */
1076                        req->buffer = bh->b_data;
1077                        req->current_nr_sectors = req->hard_cur_sectors = count;
1078                        req->sector = req->hard_sector = sector;
1079                        req->nr_sectors = req->hard_nr_sectors += count;
1080                        blk_started_io(count);
1081                        blk_started_sectors(req, count);
1082                        drive_stat_acct(req->rq_dev, req->cmd, count, 0);
1083                        req_new_io(req, 1, count);
1084                        attempt_front_merge(q, head, req, max_sectors, max_segments);
1085                        goto out;
1086
1087                /*
1088                 * elevator says don't/can't merge. get new request
1089                 */
1090                case ELEVATOR_NO_MERGE:
1091                        /*
1092                         * use elevator hints as to where to insert the
1093                         * request. if no hints, just add it to the back
1094                         * of the queue
1095                         */
1096                        if (req)
1097                                insert_here = &req->queue;
1098                        break;
1099
1100                default:
1101                        printk("elevator returned crap (%d)\n", el_ret);
1102                        BUG();
1103        }
1104                
1105get_rq:
1106        if (freereq) {
1107                req = freereq;
1108                freereq = NULL;
1109        } else {
1110                /*
1111                 * See description above __get_request_wait()
1112                 */
1113                if (rw_ahead) {
1114                        if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) {
1115                                spin_unlock_irq(&io_request_lock);
1116                                goto end_io;
1117                        }
1118                        req = get_request(q, rw);
1119                        if (req == NULL)
1120                                BUG();
1121                } else {
1122                        req = get_request(q, rw);
1123                        if (req == NULL) {
1124                                spin_unlock_irq(&io_request_lock);
1125                                freereq = __get_request_wait(q, rw);
1126                                head = &q->queue_head;
1127                                spin_lock_irq(&io_request_lock);
1128                                should_wake = 1;
1129                                goto again;
1130                        }
1131                }
1132        }
1133
1134/* fill up the request-info, and add it to the queue */
1135        req->elevator_sequence = latency;
1136        req->cmd = rw;
1137        req->errors = 0;
1138        req->hard_sector = req->sector = sector;
1139        req->hard_nr_sectors = req->nr_sectors = count;
1140        req->current_nr_sectors = req->hard_cur_sectors = count;
1141        req->nr_segments = 1; /* Always 1 for a new request. */
1142        req->nr_hw_segments = 1; /* Always 1 for a new request. */
1143        req->buffer = bh->b_data;
1144        req->waiting = NULL;
1145        req->bh = bh;
1146        req->bhtail = bh;
1147        req->rq_dev = bh->b_rdev;
1148        req->start_time = jiffies;
1149        req_new_io(req, 0, count);
1150        blk_started_io(count);
1151        blk_started_sectors(req, count);
1152        add_request(q, req, insert_here);
1153out:
1154        if (freereq)
1155                blkdev_release_request(freereq);
1156        if (should_wake)
1157                get_request_wait_wakeup(q, rw);
1158        if (sync)
1159                __generic_unplug_device(q);
1160        spin_unlock_irq(&io_request_lock);
1161        return 0;
1162end_io:
1163        bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1164        return 0;
1165}
1166
1167/**
1168 * generic_make_request: hand a buffer head to it's device driver for I/O
1169 * @rw:  READ, WRITE, or READA - what sort of I/O is desired.
1170 * @bh:  The buffer head describing the location in memory and on the device.
1171 *
1172 * generic_make_request() is used to make I/O requests of block
1173 * devices. It is passed a &struct buffer_head and a &rw value.  The
1174 * %READ and %WRITE options are (hopefully) obvious in meaning.  The
1175 * %READA value means that a read is required, but that the driver is
1176 * free to fail the request if, for example, it cannot get needed
1177 * resources immediately.
1178 *
1179 * generic_make_request() does not return any status.  The
1180 * success/failure status of the request, along with notification of
1181 * completion, is delivered asynchronously through the bh->b_end_io
1182 * function described (one day) else where.
1183 *
1184 * The caller of generic_make_request must make sure that b_page,
1185 * b_addr, b_size are set to describe the memory buffer, that b_rdev
1186 * and b_rsector are set to describe the device address, and the
1187 * b_end_io and optionally b_private are set to describe how
1188 * completion notification should be signaled.  BH_Mapped should also
1189 * be set (to confirm that b_dev and b_blocknr are valid).
1190 *
1191 * generic_make_request and the drivers it calls may use b_reqnext,
1192 * and may change b_rdev and b_rsector.  So the values of these fields
1193 * should NOT be depended on after the call to generic_make_request.
1194 * Because of this, the caller should record the device address
1195 * information in b_dev and b_blocknr.
1196 *
1197 * Apart from those fields mentioned above, no other fields, and in
1198 * particular, no other flags, are changed by generic_make_request or
1199 * any lower level drivers.
1200 * */
1201void generic_make_request (int rw, struct buffer_head * bh)
1202{
1203        int major = MAJOR(bh->b_rdev);
1204        int minorsize = 0;
1205        request_queue_t *q;
1206
1207        if (!bh->b_end_io)
1208                BUG();
1209
1210        /* Test device size, when known. */
1211        if (blk_size[major])
1212                minorsize = blk_size[major][MINOR(bh->b_rdev)];
1213        if (minorsize) {
1214                unsigned long maxsector = (minorsize << 1) + 1;
1215                unsigned long sector = bh->b_rsector;
1216                unsigned int count = bh->b_size >> 9;
1217
1218                if (maxsector < count || maxsector - count < sector) {
1219                        /* Yecch */
1220                        bh->b_state &= ~(1 << BH_Dirty);
1221
1222                        /* This may well happen - the kernel calls bread()
1223                           without checking the size of the device, e.g.,
1224                           when mounting a device. */
1225                        printk(KERN_INFO
1226                               "attempt to access beyond end of device\n");
1227                        printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
1228                               kdevname(bh->b_rdev), rw,
1229                               (sector + count)>>1, minorsize);
1230
1231                        bh->b_end_io(bh, 0);
1232                        return;
1233                }
1234        }
1235
1236        /*
1237         * Resolve the mapping until finished. (drivers are
1238         * still free to implement/resolve their own stacking
1239         * by explicitly returning 0)
1240         */
1241        /* NOTE: we don't repeat the blk_size check for each new device.
1242         * Stacking drivers are expected to know what they are doing.
1243         */
1244        do {
1245                q = blk_get_queue(bh->b_rdev);
1246                if (!q) {
1247                        printk(KERN_ERR
1248                               "generic_make_request: Trying to access "
1249                               "nonexistent block-device %s (%ld)\n",
1250                               kdevname(bh->b_rdev), bh->b_rsector);
1251                        buffer_IO_error(bh);
1252                        break;
1253                }
1254        } while (q->make_request_fn(q, rw, bh));
1255}
1256
1257
1258/**
1259 * submit_bh: submit a buffer_head to the block device later for I/O
1260 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1261 * @bh: The &struct buffer_head which describes the I/O
1262 *
1263 * submit_bh() is very similar in purpose to generic_make_request(), and
1264 * uses that function to do most of the work.
1265 *
1266 * The extra functionality provided by submit_bh is to determine
1267 * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
1268 * This is is appropriate for IO requests that come from the buffer
1269 * cache and page cache which (currently) always use aligned blocks.
1270 */
1271void submit_bh(int rw, struct buffer_head * bh)
1272{
1273        int count = bh->b_size >> 9;
1274
1275        if (!test_bit(BH_Lock, &bh->b_state))
1276                BUG();
1277
1278        set_bit(BH_Req, &bh->b_state);
1279        set_bit(BH_Launder, &bh->b_state);
1280
1281        /*
1282         * First step, 'identity mapping' - RAID or LVM might
1283         * further remap this.
1284         */
1285        bh->b_rdev = bh->b_dev;
1286        bh->b_rsector = bh->b_blocknr * count;
1287
1288        get_bh(bh);
1289        generic_make_request(rw, bh);
1290
1291        /* fix race condition with wait_on_buffer() */
1292        smp_mb(); /* spin_unlock may have inclusive semantics */
1293        if (waitqueue_active(&bh->b_wait))
1294                wake_up(&bh->b_wait);
1295
1296        put_bh(bh);
1297        switch (rw) {
1298                case WRITE:
1299                        kstat.pgpgout += count;
1300                        break;
1301                default:
1302                        kstat.pgpgin += count;
1303                        break;
1304        }
1305}
1306
1307/**
1308 * ll_rw_block: low-level access to block devices
1309 * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
1310 * @nr: number of &struct buffer_heads in the array
1311 * @bhs: array of pointers to &struct buffer_head
1312 *
1313 * ll_rw_block() takes an array of pointers to &struct buffer_heads,
1314 * and requests an I/O operation on them, either a %READ or a %WRITE.
1315 * The third %READA option is described in the documentation for
1316 * generic_make_request() which ll_rw_block() calls.
1317 *
1318 * This function provides extra functionality that is not in
1319 * generic_make_request() that is relevant to buffers in the buffer
1320 * cache or page cache.  In particular it drops any buffer that it
1321 * cannot get a lock on (with the BH_Lock state bit), any buffer that
1322 * appears to be clean when doing a write request, and any buffer that
1323 * appears to be up-to-date when doing read request.  Further it marks
1324 * as clean buffers that are processed for writing (the buffer cache
1325 * wont assume that they are actually clean until the buffer gets
1326 * unlocked).
1327 *
1328 * ll_rw_block sets b_end_io to simple completion handler that marks
1329 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
1330 * any waiters.  As client that needs a more interesting completion
1331 * routine should call submit_bh() (or generic_make_request())
1332 * directly.
1333 *
1334 * Caveat:
1335 *  All of the buffers must be for the same device, and must also be
1336 *  of the current approved size for the device.  */
1337
1338void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
1339{
1340        unsigned int major;
1341        int correct_size;
1342        int i;
1343
1344        if (!nr)
1345                return;
1346
1347        major = MAJOR(bhs[0]->b_dev);
1348
1349        /* Determine correct block size for this device. */
1350        correct_size = get_hardsect_size(bhs[0]->b_dev);
1351
1352        /* Verify requested block sizes. */
1353        for (i = 0; i < nr; i++) {
1354                struct buffer_head *bh = bhs[i];
1355                if (bh->b_size % correct_size) {
1356                        printk(KERN_NOTICE "ll_rw_block: device %s: "
1357                               "only %d-char blocks implemented (%u)\n",
1358                               kdevname(bhs[0]->b_dev),
1359                               correct_size, bh->b_size);
1360                        goto sorry;
1361                }
1362        }
1363
1364        if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
1365                printk(KERN_NOTICE "Can't write to read-only device %s\n",
1366                       kdevname(bhs[0]->b_dev));
1367                goto sorry;
1368        }
1369
1370        for (i = 0; i < nr; i++) {
1371                struct buffer_head *bh = bhs[i];
1372
1373                /* Only one thread can actually submit the I/O. */
1374                if (test_and_set_bit(BH_Lock, &bh->b_state))
1375                        continue;
1376
1377                /* We have the buffer lock */
1378                atomic_inc(&bh->b_count);
1379                bh->b_end_io = end_buffer_io_sync;
1380
1381                switch(rw) {
1382                case WRITE:
1383                        if (!atomic_set_buffer_clean(bh))
1384                                /* Hmmph! Nothing to write */
1385                                goto end_io;
1386                        __mark_buffer_clean(bh);
1387                        break;
1388
1389                case READA:
1390                case READ:
1391                        if (buffer_uptodate(bh))
1392                                /* Hmmph! Already have it */
1393                                goto end_io;
1394                        break;
1395                default:
1396                        BUG();
1397        end_io:
1398                        bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1399                        continue;
1400                }
1401
1402                submit_bh(rw, bh);
1403        }
1404        return;
1405
1406sorry:
1407        /* Make sure we don't get infinite dirty retries.. */
1408        for (i = 0; i < nr; i++)
1409                mark_buffer_clean(bhs[i]);
1410}
1411
1412#ifdef CONFIG_STRAM_SWAP
1413extern int stram_device_init (void);
1414#endif
1415
1416
1417/**
1418 * end_that_request_first - end I/O on one buffer.
1419 * @req:      the request being processed
1420 * @uptodate: 0 for I/O error
1421 * @name:     the name printed for an I/O error
1422 *
1423 * Description:
1424 *     Ends I/O on the first buffer attached to @req, and sets it up
1425 *     for the next buffer_head (if any) in the cluster.
1426 *     
1427 * Return:
1428 *     0 - we are done with this request, call end_that_request_last()
1429 *     1 - still buffers pending for this request
1430 *
1431 * Caveat: 
1432 *     Drivers implementing their own end_request handling must call
1433 *     blk_finished_io() appropriately.
1434 **/
1435
1436int end_that_request_first (struct request *req, int uptodate, char *name)
1437{
1438        struct buffer_head * bh;
1439        int nsect;
1440
1441        req->errors = 0;
1442        if (!uptodate)
1443                printk("end_request: I/O error, dev %s (%s), sector %lu\n",
1444                        kdevname(req->rq_dev), name, req->sector);
1445
1446        if ((bh = req->bh) != NULL) {
1447                nsect = bh->b_size >> 9;
1448                blk_finished_io(nsect);
1449                blk_finished_sectors(req, nsect);
1450                req->bh = bh->b_reqnext;
1451                bh->b_reqnext = NULL;
1452                bh->b_end_io(bh, uptodate);
1453                if ((bh = req->bh) != NULL) {
1454                        req->hard_sector += nsect;
1455                        req->hard_nr_sectors -= nsect;
1456                        req->sector = req->hard_sector;
1457                        req->nr_sectors = req->hard_nr_sectors;
1458
1459                        req->current_nr_sectors = bh->b_size >> 9;
1460                        req->hard_cur_sectors = req->current_nr_sectors;
1461                        if (req->nr_sectors < req->current_nr_sectors) {
1462                                req->nr_sectors = req->current_nr_sectors;
1463                                printk("end_request: buffer-list destroyed\n");
1464                        }
1465                        req->buffer = bh->b_data;
1466                        return 1;
1467                }
1468        }
1469        return 0;
1470}
1471
1472void end_that_request_last(struct request *req)
1473{
1474        struct completion *waiting = req->waiting;
1475
1476        req_finished_io(req);
1477        blkdev_release_request(req);
1478        if (waiting)
1479                complete(waiting);
1480}
1481
1482int __init blk_dev_init(void)
1483{
1484        struct blk_dev_struct *dev;
1485
1486        request_cachep = kmem_cache_create("blkdev_requests",
1487                                           sizeof(struct request),
1488                                           0, SLAB_HWCACHE_ALIGN, NULL, NULL);
1489
1490        if (!request_cachep)
1491                panic("Can't create request pool slab cache\n");
1492
1493        for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
1494                dev->queue = NULL;
1495
1496        memset(ro_bits,0,sizeof(ro_bits));
1497        memset(max_readahead, 0, sizeof(max_readahead));
1498        memset(max_sectors, 0, sizeof(max_sectors));
1499
1500        blk_max_low_pfn = max_low_pfn - 1;
1501        blk_max_pfn = max_pfn - 1;
1502
1503#ifdef CONFIG_AMIGA_Z2RAM
1504        z2_init();
1505#endif
1506#ifdef CONFIG_STRAM_SWAP
1507        stram_device_init();
1508#endif
1509#ifdef CONFIG_ISP16_CDI
1510        isp16_init();
1511#endif
1512#ifdef CONFIG_BLK_DEV_PS2
1513        ps2esdi_init();
1514#endif
1515#ifdef CONFIG_BLK_DEV_XD
1516        xd_init();
1517#endif
1518#ifdef CONFIG_BLK_DEV_MFM
1519        mfm_init();
1520#endif
1521#ifdef CONFIG_PARIDE
1522        { extern void paride_init(void); paride_init(); };
1523#endif
1524#ifdef CONFIG_MAC_FLOPPY
1525        swim3_init();
1526#endif
1527#ifdef CONFIG_BLK_DEV_SWIM_IOP
1528        swimiop_init();
1529#endif
1530#ifdef CONFIG_AMIGA_FLOPPY
1531        amiga_floppy_init();
1532#endif
1533#ifdef CONFIG_ATARI_FLOPPY
1534        atari_floppy_init();
1535#endif
1536#ifdef CONFIG_BLK_DEV_FD
1537        floppy_init();
1538#else
1539#if defined(__i386__)   /* Do we even need this? */
1540        outb_p(0xc, 0x3f2);
1541#endif
1542#endif
1543#ifdef CONFIG_CDU31A
1544        cdu31a_init();
1545#endif
1546#ifdef CONFIG_ATARI_ACSI
1547        acsi_init();
1548#endif
1549#ifdef CONFIG_MCD
1550        mcd_init();
1551#endif
1552#ifdef CONFIG_MCDX
1553        mcdx_init();
1554#endif
1555#ifdef CONFIG_SBPCD
1556        sbpcd_init();
1557#endif
1558#ifdef CONFIG_AZTCD
1559        aztcd_init();
1560#endif
1561#ifdef CONFIG_CDU535
1562        sony535_init();
1563#endif
1564#ifdef CONFIG_GSCD
1565        gscd_init();
1566#endif
1567#ifdef CONFIG_CM206
1568        cm206_init();
1569#endif
1570#ifdef CONFIG_OPTCD
1571        optcd_init();
1572#endif
1573#ifdef CONFIG_SJCD
1574        sjcd_init();
1575#endif
1576#ifdef CONFIG_APBLOCK
1577        ap_init();
1578#endif
1579#ifdef CONFIG_DDV
1580        ddv_init();
1581#endif
1582#ifdef CONFIG_MDISK
1583        mdisk_init();
1584#endif
1585#ifdef CONFIG_DASD
1586        dasd_init();
1587#endif
1588#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
1589        tapeblock_init();
1590#endif
1591#ifdef CONFIG_BLK_DEV_XPRAM
1592        xpram_init();
1593#endif
1594
1595#ifdef CONFIG_SUN_JSFLASH
1596        jsfd_init();
1597#endif
1598        return 0;
1599};
1600
1601EXPORT_SYMBOL(io_request_lock);
1602EXPORT_SYMBOL(end_that_request_first);
1603EXPORT_SYMBOL(end_that_request_last);
1604EXPORT_SYMBOL(blk_grow_request_list);
1605EXPORT_SYMBOL(blk_init_queue);
1606EXPORT_SYMBOL(blk_get_queue);
1607EXPORT_SYMBOL(blk_cleanup_queue);
1608EXPORT_SYMBOL(blk_queue_headactive);
1609EXPORT_SYMBOL(blk_queue_throttle_sectors);
1610EXPORT_SYMBOL(blk_queue_make_request);
1611EXPORT_SYMBOL(generic_make_request);
1612EXPORT_SYMBOL(blkdev_release_request);
1613EXPORT_SYMBOL(generic_unplug_device);
1614EXPORT_SYMBOL(blk_queue_bounce_limit);
1615EXPORT_SYMBOL(blk_max_low_pfn);
1616EXPORT_SYMBOL(blk_max_pfn);
1617EXPORT_SYMBOL(blk_seg_merge_ok);
1618EXPORT_SYMBOL(blk_nohighio);
1619
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.