linux-old/drivers/block/ll_rw_blk.c
<<
>>
Prefs
   1/*
   2 *  linux/drivers/block/ll_rw_blk.c
   3 *
   4 * Copyright (C) 1991, 1992 Linus Torvalds
   5 * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
   6 * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
   7 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
   8 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
   9 */
  10
  11/*
  12 * This handles all read/write requests to block devices
  13 */
  14#include <linux/sched.h>
  15#include <linux/kernel.h>
  16#include <linux/kernel_stat.h>
  17#include <linux/errno.h>
  18#include <linux/string.h>
  19#include <linux/config.h>
  20#include <linux/locks.h>
  21#include <linux/mm.h>
  22#include <linux/swap.h>
  23#include <linux/init.h>
  24#include <linux/smp_lock.h>
  25#include <linux/completion.h>
  26#include <linux/bootmem.h>
  27
  28#include <asm/system.h>
  29#include <asm/io.h>
  30#include <linux/blk.h>
  31#include <linux/highmem.h>
  32#include <linux/slab.h>
  33#include <linux/module.h>
  34
  35/*
  36 * MAC Floppy IWM hooks
  37 */
  38
  39#ifdef CONFIG_MAC_FLOPPY_IWM
  40extern int mac_floppy_init(void);
  41#endif
  42
  43/*
  44 * For the allocated request tables
  45 */
  46static kmem_cache_t *request_cachep;
  47
  48/*
  49 * The "disk" task queue is used to start the actual requests
  50 * after a plug
  51 */
  52DECLARE_TASK_QUEUE(tq_disk);
  53
  54/*
  55 * Protect the request list against multiple users..
  56 *
  57 * With this spinlock the Linux block IO subsystem is 100% SMP threaded
  58 * from the IRQ event side, and almost 100% SMP threaded from the syscall
  59 * side (we still have protect against block device array operations, and
  60 * the do_request() side is casually still unsafe. The kernel lock protects
  61 * this part currently.).
  62 *
  63 * there is a fair chance that things will work just OK if these functions
  64 * are called with no global kernel lock held ...
  65 */
  66spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
  67
  68/* This specifies how many sectors to read ahead on the disk. */
  69
  70int read_ahead[MAX_BLKDEV];
  71
  72/* blk_dev_struct is:
  73 *      *request_fn
  74 *      *current_request
  75 */
  76struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
  77
  78/*
  79 * blk_size contains the size of all block-devices in units of 1024 byte
  80 * sectors:
  81 *
  82 * blk_size[MAJOR][MINOR]
  83 *
  84 * if (!blk_size[MAJOR]) then no minor size checking is done.
  85 */
  86int * blk_size[MAX_BLKDEV];
  87
  88/*
  89 * blksize_size contains the size of all block-devices:
  90 *
  91 * blksize_size[MAJOR][MINOR]
  92 *
  93 * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
  94 */
  95int * blksize_size[MAX_BLKDEV];
  96
  97/*
  98 * hardsect_size contains the size of the hardware sector of a device.
  99 *
 100 * hardsect_size[MAJOR][MINOR]
 101 *
 102 * if (!hardsect_size[MAJOR])
 103 *              then 512 bytes is assumed.
 104 * else
 105 *              sector_size is hardsect_size[MAJOR][MINOR]
 106 * This is currently set by some scsi devices and read by the msdos fs driver.
 107 * Other uses may appear later.
 108 */
 109int * hardsect_size[MAX_BLKDEV];
 110
 111/*
 112 * The following tunes the read-ahead algorithm in mm/filemap.c
 113 */
 114int * max_readahead[MAX_BLKDEV];
 115
 116/*
 117 * Max number of sectors per request
 118 */
 119int * max_sectors[MAX_BLKDEV];
 120
 121unsigned long blk_max_low_pfn, blk_max_pfn;
 122int blk_nohighio = 0;
 123
 124int block_dump = 0;
 125
 126static struct timer_list writeback_timer;
 127
 128static inline int get_max_sectors(kdev_t dev)
 129{
 130        if (!max_sectors[MAJOR(dev)])
 131                return MAX_SECTORS;
 132        return max_sectors[MAJOR(dev)][MINOR(dev)];
 133}
 134
 135static inline request_queue_t *__blk_get_queue(kdev_t dev)
 136{
 137        struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
 138
 139        if (bdev->queue)
 140                return bdev->queue(dev);
 141        else
 142                return &blk_dev[MAJOR(dev)].request_queue;
 143}
 144
 145request_queue_t *blk_get_queue(kdev_t dev)
 146{
 147        return __blk_get_queue(dev);
 148}
 149
 150static int __blk_cleanup_queue(struct request_list *list)
 151{
 152        struct list_head *head = &list->free;
 153        struct request *rq;
 154        int i = 0;
 155
 156        while (!list_empty(head)) {
 157                rq = list_entry(head->next, struct request, queue);
 158                list_del(&rq->queue);
 159                kmem_cache_free(request_cachep, rq);
 160                i++;
 161        };
 162
 163        if (i != list->count)
 164                printk("request list leak!\n");
 165
 166        list->count = 0;
 167        return i;
 168}
 169
 170/**
 171 * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
 172 * @q:    the request queue to be released
 173 *
 174 * Description:
 175 *     blk_cleanup_queue is the pair to blk_init_queue().  It should
 176 *     be called when a request queue is being released; typically
 177 *     when a block device is being de-registered.  Currently, its
 178 *     primary task it to free all the &struct request structures that
 179 *     were allocated to the queue.
 180 * Caveat: 
 181 *     Hopefully the low level driver will have finished any
 182 *     outstanding requests first...
 183 **/
 184void blk_cleanup_queue(request_queue_t * q)
 185{
 186        int count = q->nr_requests;
 187
 188        count -= __blk_cleanup_queue(&q->rq);
 189
 190        if (count)
 191                printk("blk_cleanup_queue: leaked requests (%d)\n", count);
 192        if (atomic_read(&q->nr_sectors))
 193                printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors));
 194
 195        memset(q, 0, sizeof(*q));
 196}
 197
 198/**
 199 * blk_queue_headactive - indicate whether head of request queue may be active
 200 * @q:       The queue which this applies to.
 201 * @active:  A flag indication where the head of the queue is active.
 202 *
 203 * Description:
 204 *    The driver for a block device may choose to leave the currently active
 205 *    request on the request queue, removing it only when it has completed.
 206 *    The queue handling routines assume this by default for safety reasons
 207 *    and will not involve the head of the request queue in any merging or
 208 *    reordering of requests when the queue is unplugged (and thus may be
 209 *    working on this particular request).
 210 *
 211 *    If a driver removes requests from the queue before processing them, then
 212 *    it may indicate that it does so, there by allowing the head of the queue
 213 *    to be involved in merging and reordering.  This is done be calling
 214 *    blk_queue_headactive() with an @active flag of %0.
 215 *
 216 *    If a driver processes several requests at once, it must remove them (or
 217 *    at least all but one of them) from the request queue.
 218 *
 219 *    When a queue is plugged the head will be assumed to be inactive.
 220 **/
 221 
 222void blk_queue_headactive(request_queue_t * q, int active)
 223{
 224        q->head_active = active;
 225}
 226
 227/**
 228 * blk_queue_throttle_sectors - indicates you will call sector throttling funcs
 229 * @q:       The queue which this applies to.
 230 * @active:  A flag indication if you want sector throttling on
 231 *
 232 * Description:
 233 * The sector throttling code allows us to put a limit on the number of
 234 * sectors pending io to the disk at a given time, sending @active nonzero
 235 * indicates you will call blk_started_sectors and blk_finished_sectors in
 236 * addition to calling blk_started_io and blk_finished_io in order to
 237 * keep track of the number of sectors in flight.
 238 **/
 239 
 240void blk_queue_throttle_sectors(request_queue_t * q, int active)
 241{
 242        q->can_throttle = active;
 243}
 244
 245/**
 246 * blk_queue_make_request - define an alternate make_request function for a device
 247 * @q:  the request queue for the device to be affected
 248 * @mfn: the alternate make_request function
 249 *
 250 * Description:
 251 *    The normal way for &struct buffer_heads to be passed to a device
 252 *    driver is for them to be collected into requests on a request
 253 *    queue, and then to allow the device driver to select requests
 254 *    off that queue when it is ready.  This works well for many block
 255 *    devices. However some block devices (typically virtual devices
 256 *    such as md or lvm) do not benefit from the processing on the
 257 *    request queue, and are served best by having the requests passed
 258 *    directly to them.  This can be achieved by providing a function
 259 *    to blk_queue_make_request().
 260 *
 261 * Caveat:
 262 *    The driver that does this *must* be able to deal appropriately
 263 *    with buffers in "highmemory", either by calling bh_kmap() to get
 264 *    a kernel mapping, to by calling create_bounce() to create a
 265 *    buffer in normal memory.
 266 **/
 267
 268void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 269{
 270        q->make_request_fn = mfn;
 271}
 272
 273/**
 274 * blk_queue_bounce_limit - set bounce buffer limit for queue
 275 * @q:  the request queue for the device
 276 * @dma_addr:   bus address limit
 277 *
 278 * Description:
 279 *    Different hardware can have different requirements as to what pages
 280 *    it can do I/O directly to. A low level driver can call
 281 *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
 282 *    buffers for doing I/O to pages residing above @page. By default
 283 *    the block layer sets this to the highest numbered "low" memory page.
 284 **/
 285void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
 286{
 287        unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
 288        unsigned long mb = dma_addr >> 20;
 289        static request_queue_t *old_q;
 290
 291        /*
 292         * keep this for debugging for now...
 293         */
 294        if (dma_addr != BLK_BOUNCE_HIGH && q != old_q) {
 295                old_q = q;
 296                printk("blk: queue %p, ", q);
 297                if (dma_addr == BLK_BOUNCE_ANY)
 298                        printk("no I/O memory limit\n");
 299                else
 300                        printk("I/O limit %luMb (mask 0x%Lx)\n", mb,
 301                               (long long) dma_addr);
 302        }
 303
 304        q->bounce_pfn = bounce_pfn;
 305}
 306
 307
 308/*
 309 * can we merge the two segments, or do we need to start a new one?
 310 */
 311static inline int __blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt)
 312{
 313        /*
 314         * if bh and nxt are contigous and don't cross a 4g boundary, it's ok
 315         */
 316        if (BH_CONTIG(bh, nxt) && BH_PHYS_4G(bh, nxt))
 317                return 1;
 318
 319        return 0;
 320}
 321
 322int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt)
 323{
 324        return __blk_seg_merge_ok(bh, nxt);
 325}
 326
 327static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
 328{
 329        if (req->nr_segments < max_segments) {
 330                req->nr_segments++;
 331                return 1;
 332        }
 333        return 0;
 334}
 335
 336static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
 337                            struct buffer_head *bh, int max_segments)
 338{
 339        if (__blk_seg_merge_ok(req->bhtail, bh))
 340                return 1;
 341
 342        return ll_new_segment(q, req, max_segments);
 343}
 344
 345static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
 346                             struct buffer_head *bh, int max_segments)
 347{
 348        if (__blk_seg_merge_ok(bh, req->bh))
 349                return 1;
 350
 351        return ll_new_segment(q, req, max_segments);
 352}
 353
 354static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
 355                                struct request *next, int max_segments)
 356{
 357        int total_segments = req->nr_segments + next->nr_segments;
 358
 359        if (__blk_seg_merge_ok(req->bhtail, next->bh))
 360                total_segments--;
 361
 362        if (total_segments > max_segments)
 363                return 0;
 364
 365        req->nr_segments = total_segments;
 366        return 1;
 367}
 368
 369/*
 370 * "plug" the device if there are no outstanding requests: this will
 371 * force the transfer to start only after we have put all the requests
 372 * on the list.
 373 *
 374 * This is called with interrupts off and no requests on the queue.
 375 * (and with the request spinlock acquired)
 376 */
 377static void generic_plug_device(request_queue_t *q, kdev_t dev)
 378{
 379        /*
 380         * no need to replug device
 381         */
 382        if (!list_empty(&q->queue_head) || q->plugged)
 383                return;
 384
 385        q->plugged = 1;
 386        queue_task(&q->plug_tq, &tq_disk);
 387}
 388
 389/*
 390 * remove the plug and let it rip..
 391 */
 392static inline void __generic_unplug_device(request_queue_t *q)
 393{
 394        if (q->plugged) {
 395                q->plugged = 0;
 396                if (!list_empty(&q->queue_head))
 397                        q->request_fn(q);
 398        }
 399}
 400
 401void generic_unplug_device(void *data)
 402{
 403        request_queue_t *q = (request_queue_t *) data;
 404        unsigned long flags;
 405
 406        spin_lock_irqsave(&io_request_lock, flags);
 407        __generic_unplug_device(q);
 408        spin_unlock_irqrestore(&io_request_lock, flags);
 409}
 410
 411/** blk_grow_request_list
 412 *  @q: The &request_queue_t
 413 *  @nr_requests: how many requests are desired
 414 *
 415 * More free requests are added to the queue's free lists, bringing
 416 * the total number of requests to @nr_requests.
 417 *
 418 * The requests are added equally to the request queue's read
 419 * and write freelists.
 420 *
 421 * This function can sleep.
 422 *
 423 * Returns the (new) number of requests which the queue has available.
 424 */
 425int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors)
 426{
 427        unsigned long flags;
 428        /* Several broken drivers assume that this function doesn't sleep,
 429         * this causes system hangs during boot.
 430         * As a temporary fix, make the function non-blocking.
 431         */
 432        spin_lock_irqsave(&io_request_lock, flags);
 433        while (q->nr_requests < nr_requests) {
 434                struct request *rq;
 435
 436                rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
 437                if (rq == NULL)
 438                        break;
 439                memset(rq, 0, sizeof(*rq));
 440                rq->rq_status = RQ_INACTIVE;
 441                list_add(&rq->queue, &q->rq.free);
 442                q->rq.count++;
 443
 444                q->nr_requests++;
 445        }
 446
 447        /*
 448         * Wakeup waiters after both one quarter of the
 449         * max-in-fligh queue and one quarter of the requests
 450         * are available again.
 451         */
 452
 453        q->batch_requests = q->nr_requests / 4;
 454        if (q->batch_requests > 32)
 455                q->batch_requests = 32;
 456        q->batch_sectors = max_queue_sectors / 4;
 457 
 458        q->max_queue_sectors = max_queue_sectors;
 459 
 460        BUG_ON(!q->batch_sectors);
 461        atomic_set(&q->nr_sectors, 0);
 462
 463        spin_unlock_irqrestore(&io_request_lock, flags);
 464        return q->nr_requests;
 465}
 466
 467static void blk_init_free_list(request_queue_t *q)
 468{
 469        struct sysinfo si;
 470        int megs;               /* Total memory, in megabytes */
 471        int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS;
 472  
 473        INIT_LIST_HEAD(&q->rq.free);
 474        q->rq.count = 0;
 475        q->rq.pending[READ] = q->rq.pending[WRITE] = 0;
 476        q->nr_requests = 0;
 477
 478        si_meminfo(&si);
 479        megs = si.totalram >> (20 - PAGE_SHIFT);
 480        nr_requests = MAX_NR_REQUESTS;
 481        if (megs < 30) {
 482                nr_requests /= 2;
 483                max_queue_sectors /= 2;
 484        }
 485        /* notice early if anybody screwed the defaults */
 486        BUG_ON(!nr_requests);
 487        BUG_ON(!max_queue_sectors);
 488 
 489        blk_grow_request_list(q, nr_requests, max_queue_sectors);
 490
 491        init_waitqueue_head(&q->wait_for_requests);
 492
 493        spin_lock_init(&q->queue_lock);
 494}
 495
 496static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
 497
 498/**
 499 * blk_init_queue  - prepare a request queue for use with a block device
 500 * @q:    The &request_queue_t to be initialised
 501 * @rfn:  The function to be called to process requests that have been
 502 *        placed on the queue.
 503 *
 504 * Description:
 505 *    If a block device wishes to use the standard request handling procedures,
 506 *    which sorts requests and coalesces adjacent requests, then it must
 507 *    call blk_init_queue().  The function @rfn will be called when there
 508 *    are requests on the queue that need to be processed.  If the device
 509 *    supports plugging, then @rfn may not be called immediately when requests
 510 *    are available on the queue, but may be called at some time later instead.
 511 *    Plugged queues are generally unplugged when a buffer belonging to one
 512 *    of the requests on the queue is needed, or due to memory pressure.
 513 *
 514 *    @rfn is not required, or even expected, to remove all requests off the
 515 *    queue, but only as many as it can handle at a time.  If it does leave
 516 *    requests on the queue, it is responsible for arranging that the requests
 517 *    get dealt with eventually.
 518 *
 519 *    A global spin lock $io_request_lock must be held while manipulating the
 520 *    requests on the request queue.
 521 *
 522 *    The request on the head of the queue is by default assumed to be
 523 *    potentially active, and it is not considered for re-ordering or merging
 524 *    whenever the given queue is unplugged. This behaviour can be changed with
 525 *    blk_queue_headactive().
 526 *
 527 * Note:
 528 *    blk_init_queue() must be paired with a blk_cleanup_queue() call
 529 *    when the block device is deactivated (such as at module unload).
 530 **/
 531void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
 532{
 533        INIT_LIST_HEAD(&q->queue_head);
 534        elevator_init(&q->elevator, ELEVATOR_LINUS);
 535        blk_init_free_list(q);
 536        q->request_fn           = rfn;
 537        q->back_merge_fn        = ll_back_merge_fn;
 538        q->front_merge_fn       = ll_front_merge_fn;
 539        q->merge_requests_fn    = ll_merge_requests_fn;
 540        q->make_request_fn      = __make_request;
 541        q->plug_tq.sync         = 0;
 542        q->plug_tq.routine      = &generic_unplug_device;
 543        q->plug_tq.data         = q;
 544        q->plugged              = 0;
 545        q->can_throttle         = 0;
 546
 547        /*
 548         * These booleans describe the queue properties.  We set the
 549         * default (and most common) values here.  Other drivers can
 550         * use the appropriate functions to alter the queue properties.
 551         * as appropriate.
 552         */
 553        q->plug_device_fn       = generic_plug_device;
 554        q->head_active          = 1;
 555
 556        blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 557}
 558
 559#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
 560/*
 561 * Get a free request. io_request_lock must be held and interrupts
 562 * disabled on the way in.  Returns NULL if there are no free requests.
 563 */
 564static struct request *get_request(request_queue_t *q, int rw)
 565{
 566        struct request *rq = NULL;
 567        struct request_list *rl = &q->rq;
 568
 569        if (blk_oversized_queue(q)) {
 570                int rlim = q->nr_requests >> 5;
 571
 572                if (rlim < 4)
 573                        rlim = 4;
 574
 575                /*
 576                 * if its a write, or we have more than a handful of reads
 577                 * pending, bail out
 578                 */
 579                if ((rw == WRITE) || (rw == READ && rl->pending[READ] > rlim))
 580                        return NULL;
 581                if (blk_oversized_queue_reads(q))
 582                        return NULL;
 583        }
 584        
 585        if (!list_empty(&rl->free)) {
 586                rq = blkdev_free_rq(&rl->free);
 587                list_del(&rq->queue);
 588                rl->count--;
 589                rl->pending[rw]++;
 590                rq->rq_status = RQ_ACTIVE;
 591                rq->cmd = rw;
 592                rq->special = NULL;
 593                rq->q = q;
 594        }
 595
 596        return rq;
 597}
 598
 599/*
 600 * Here's the request allocation design, low latency version:
 601 *
 602 * 1: Blocking on request exhaustion is a key part of I/O throttling.
 603 * 
 604 * 2: We want to be `fair' to all requesters.  We must avoid starvation, and
 605 *    attempt to ensure that all requesters sleep for a similar duration.  Hence
 606 *    no stealing requests when there are other processes waiting.
 607 *
 608 * There used to be more here, attempting to allow a process to send in a
 609 * number of requests once it has woken up.  But, there's no way to 
 610 * tell if a process has just been woken up, or if it is a new process
 611 * coming in to steal requests from the waiters.  So, we give up and force
 612 * everyone to wait fairly.
 613 * 
 614 * So here's what we do:
 615 * 
 616 *    a) A READA requester fails if free_requests < batch_requests
 617 * 
 618 *       We don't want READA requests to prevent sleepers from ever
 619 *       waking.  Note that READA is used extremely rarely - a few
 620 *       filesystems use it for directory readahead.
 621 * 
 622 *  When a process wants a new request:
 623 * 
 624 *    b) If free_requests == 0, the requester sleeps in FIFO manner, and
 625 *       the queue full condition is set.  The full condition is not
 626 *       cleared until there are no longer any waiters.  Once the full
 627 *       condition is set, all new io must wait, hopefully for a very
 628 *       short period of time.
 629 * 
 630 *  When a request is released:
 631 * 
 632 *    c) If free_requests < batch_requests, do nothing.
 633 * 
 634 *    d) If free_requests >= batch_requests, wake up a single waiter.
 635 *
 636 *   As each waiter gets a request, he wakes another waiter.  We do this
 637 *   to prevent a race where an unplug might get run before a request makes
 638 *   it's way onto the queue.  The result is a cascade of wakeups, so delaying
 639 *   the initial wakeup until we've got batch_requests available helps avoid
 640 *   wakeups where there aren't any requests available yet.
 641 */
 642
 643static struct request *__get_request_wait(request_queue_t *q, int rw)
 644{
 645        register struct request *rq;
 646        DECLARE_WAITQUEUE(wait, current);
 647
 648        add_wait_queue_exclusive(&q->wait_for_requests, &wait);
 649
 650        do {
 651                set_current_state(TASK_UNINTERRUPTIBLE);
 652                spin_lock_irq(&io_request_lock);
 653                if (blk_oversized_queue(q) || q->rq.count == 0) {
 654                        __generic_unplug_device(q);
 655                        spin_unlock_irq(&io_request_lock);
 656                        schedule();
 657                        spin_lock_irq(&io_request_lock);
 658                }
 659                rq = get_request(q, rw);
 660                spin_unlock_irq(&io_request_lock);
 661        } while (rq == NULL);
 662        remove_wait_queue(&q->wait_for_requests, &wait);
 663        current->state = TASK_RUNNING;
 664
 665        return rq;
 666}
 667
 668static void get_request_wait_wakeup(request_queue_t *q, int rw)
 669{
 670        /*
 671         * avoid losing an unplug if a second __get_request_wait did the
 672         * generic_unplug_device while our __get_request_wait was running
 673         * w/o the queue_lock held and w/ our request out of the queue.
 674         */     
 675        if (waitqueue_active(&q->wait_for_requests))
 676                wake_up(&q->wait_for_requests);
 677}
 678
 679/* RO fail safe mechanism */
 680
 681static long ro_bits[MAX_BLKDEV][8];
 682
 683int is_read_only(kdev_t dev)
 684{
 685        int minor,major;
 686
 687        major = MAJOR(dev);
 688        minor = MINOR(dev);
 689        if (major < 0 || major >= MAX_BLKDEV) return 0;
 690        return ro_bits[major][minor >> 5] & (1 << (minor & 31));
 691}
 692
 693void set_device_ro(kdev_t dev,int flag)
 694{
 695        int minor,major;
 696
 697        major = MAJOR(dev);
 698        minor = MINOR(dev);
 699        if (major < 0 || major >= MAX_BLKDEV) return;
 700        if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
 701        else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
 702}
 703
 704inline void drive_stat_acct (kdev_t dev, int rw,
 705                                unsigned long nr_sectors, int new_io)
 706{
 707        unsigned int major = MAJOR(dev);
 708        unsigned int index;
 709
 710        index = disk_index(dev);
 711        if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
 712                return;
 713
 714        kstat.dk_drive[major][index] += new_io;
 715        if (rw == READ) {
 716                kstat.dk_drive_rio[major][index] += new_io;
 717                kstat.dk_drive_rblk[major][index] += nr_sectors;
 718        } else if (rw == WRITE) {
 719                kstat.dk_drive_wio[major][index] += new_io;
 720                kstat.dk_drive_wblk[major][index] += nr_sectors;
 721        } else
 722                printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
 723}
 724
 725#ifdef CONFIG_BLK_STATS
 726/*
 727 * Return up to two hd_structs on which to do IO accounting for a given
 728 * request.
 729 *
 730 * On a partitioned device, we want to account both against the partition
 731 * and against the whole disk.
 732 */
 733static void locate_hd_struct(struct request *req, 
 734                             struct hd_struct **hd1,
 735                             struct hd_struct **hd2)
 736{
 737        struct gendisk *gd;
 738
 739        *hd1 = NULL;
 740        *hd2 = NULL;
 741        
 742        gd = get_gendisk(req->rq_dev);
 743        if (gd && gd->part) {
 744                /* Mask out the partition bits: account for the entire disk */
 745                int devnr = MINOR(req->rq_dev) >> gd->minor_shift;
 746                int whole_minor = devnr << gd->minor_shift;
 747
 748                *hd1 = &gd->part[whole_minor];
 749                if (whole_minor != MINOR(req->rq_dev))
 750                        *hd2= &gd->part[MINOR(req->rq_dev)];
 751        }
 752}
 753
 754/*
 755 * Round off the performance stats on an hd_struct.
 756 *
 757 * The average IO queue length and utilisation statistics are maintained
 758 * by observing the current state of the queue length and the amount of
 759 * time it has been in this state for.
 760 * Normally, that accounting is done on IO completion, but that can result
 761 * in more than a second's worth of IO being accounted for within any one
 762 * second, leading to >100% utilisation.  To deal with that, we do a
 763 * round-off before returning the results when reading /proc/partitions,
 764 * accounting immediately for all queue usage up to the current jiffies and
 765 * restarting the counters again.
 766 */
 767void disk_round_stats(struct hd_struct *hd)
 768{
 769        unsigned long now = jiffies;
 770        
 771        hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change));
 772        hd->last_queue_change = now;
 773
 774        if (hd->ios_in_flight)
 775                hd->io_ticks += (now - hd->last_idle_time);
 776        hd->last_idle_time = now;       
 777}
 778
 779static inline void down_ios(struct hd_struct *hd)
 780{
 781        disk_round_stats(hd);   
 782        --hd->ios_in_flight;
 783}
 784
 785static inline void up_ios(struct hd_struct *hd)
 786{
 787        disk_round_stats(hd);
 788        ++hd->ios_in_flight;
 789}
 790
 791static void account_io_start(struct hd_struct *hd, struct request *req,
 792                             int merge, int sectors)
 793{
 794        switch (req->cmd) {
 795        case READ:
 796                if (merge)
 797                        hd->rd_merges++;
 798                hd->rd_sectors += sectors;
 799                break;
 800        case WRITE:
 801                if (merge)
 802                        hd->wr_merges++;
 803                hd->wr_sectors += sectors;
 804                break;
 805        }
 806        if (!merge)
 807                up_ios(hd);
 808}
 809
 810static void account_io_end(struct hd_struct *hd, struct request *req)
 811{
 812        unsigned long duration = jiffies - req->start_time;
 813        switch (req->cmd) {
 814        case READ:
 815                hd->rd_ticks += duration;
 816                hd->rd_ios++;
 817                break;
 818        case WRITE:
 819                hd->wr_ticks += duration;
 820                hd->wr_ios++;
 821                break;
 822        }
 823        down_ios(hd);
 824}
 825
 826void req_new_io(struct request *req, int merge, int sectors)
 827{
 828        struct hd_struct *hd1, *hd2;
 829
 830        locate_hd_struct(req, &hd1, &hd2);
 831        if (hd1)
 832                account_io_start(hd1, req, merge, sectors);
 833        if (hd2)
 834                account_io_start(hd2, req, merge, sectors);
 835}
 836
 837void req_merged_io(struct request *req)
 838{
 839        struct hd_struct *hd1, *hd2;
 840
 841        locate_hd_struct(req, &hd1, &hd2);
 842        if (hd1)
 843                down_ios(hd1);
 844        if (hd2)        
 845                down_ios(hd2);
 846}
 847
 848void req_finished_io(struct request *req)
 849{
 850        struct hd_struct *hd1, *hd2;
 851
 852        locate_hd_struct(req, &hd1, &hd2);
 853        if (hd1)
 854                account_io_end(hd1, req);
 855        if (hd2)        
 856                account_io_end(hd2, req);
 857}
 858EXPORT_SYMBOL(req_finished_io);
 859#endif /* CONFIG_BLK_STATS */
 860
 861/*
 862 * add-request adds a request to the linked list.
 863 * io_request_lock is held and interrupts disabled, as we muck with the
 864 * request queue list.
 865 *
 866 * By this point, req->cmd is always either READ/WRITE, never READA,
 867 * which is important for drive_stat_acct() above.
 868 */
 869static inline void add_request(request_queue_t * q, struct request * req,
 870                               struct list_head *insert_here)
 871{
 872        drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
 873
 874        if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
 875                spin_unlock_irq(&io_request_lock);
 876                BUG();
 877        }
 878
 879        /*
 880         * elevator indicated where it wants this request to be
 881         * inserted at elevator_merge time
 882         */
 883        list_add(&req->queue, insert_here);
 884}
 885
 886/*
 887 * Must be called with io_request_lock held and interrupts disabled
 888 */
 889void blkdev_release_request(struct request *req)
 890{
 891        request_queue_t *q = req->q;
 892
 893        req->rq_status = RQ_INACTIVE;
 894        req->q = NULL;
 895
 896        /*
 897         * Request may not have originated from ll_rw_blk. if not,
 898         * assume it has free buffers and check waiters
 899         */
 900        if (q) {
 901                struct request_list *rl = &q->rq;
 902                int oversized_batch = 0;
 903
 904                if (q->can_throttle)
 905                        oversized_batch = blk_oversized_queue_batch(q);
 906                rl->count++;
 907                /*
 908                 * paranoia check
 909                 */
 910                if (req->cmd == READ || req->cmd == WRITE)
 911                        rl->pending[req->cmd]--;
 912                if (rl->pending[READ] > q->nr_requests)
 913                        printk("blk: reads: %u\n", rl->pending[READ]);
 914                if (rl->pending[WRITE] > q->nr_requests)
 915                        printk("blk: writes: %u\n", rl->pending[WRITE]);
 916                if (rl->pending[READ] + rl->pending[WRITE] > q->nr_requests)
 917                        printk("blk: r/w: %u + %u > %u\n", rl->pending[READ], rl->pending[WRITE], q->nr_requests);
 918                list_add(&req->queue, &rl->free);
 919                if (rl->count >= q->batch_requests && !oversized_batch) {
 920                        smp_mb();
 921                        if (waitqueue_active(&q->wait_for_requests))
 922                                wake_up(&q->wait_for_requests);
 923                }
 924        }
 925}
 926
 927/*
 928 * Has to be called with the request spinlock acquired
 929 */
 930static void attempt_merge(request_queue_t * q,
 931                          struct request *req,
 932                          int max_sectors,
 933                          int max_segments)
 934{
 935        struct request *next;
 936  
 937        next = blkdev_next_request(req);
 938        if (req->sector + req->nr_sectors != next->sector)
 939                return;
 940        if (req->cmd != next->cmd
 941            || req->rq_dev != next->rq_dev
 942            || req->nr_sectors + next->nr_sectors > max_sectors
 943            || next->waiting)
 944                return;
 945        /*
 946         * If we are not allowed to merge these requests, then
 947         * return.  If we are allowed to merge, then the count
 948         * will have been updated to the appropriate number,
 949         * and we shouldn't do it here too.
 950         */
 951        if (!q->merge_requests_fn(q, req, next, max_segments))
 952                return;
 953
 954        q->elevator.elevator_merge_req_fn(req, next);
 955        
 956        /* At this point we have either done a back merge
 957         * or front merge. We need the smaller start_time of
 958         * the merged requests to be the current request
 959         * for accounting purposes.
 960         */
 961        if (time_after(req->start_time, next->start_time))
 962                req->start_time = next->start_time;
 963                
 964        req->bhtail->b_reqnext = next->bh;
 965        req->bhtail = next->bhtail;
 966        req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
 967        list_del(&next->queue);
 968
 969        /* One last thing: we have removed a request, so we now have one
 970           less expected IO to complete for accounting purposes. */
 971        req_merged_io(req);
 972
 973        blkdev_release_request(next);
 974}
 975
 976static inline void attempt_back_merge(request_queue_t * q,
 977                                      struct request *req,
 978                                      int max_sectors,
 979                                      int max_segments)
 980{
 981        if (&req->queue == q->queue_head.prev)
 982                return;
 983        attempt_merge(q, req, max_sectors, max_segments);
 984}
 985
 986static inline void attempt_front_merge(request_queue_t * q,
 987                                       struct list_head * head,
 988                                       struct request *req,
 989                                       int max_sectors,
 990                                       int max_segments)
 991{
 992        struct list_head * prev;
 993
 994        prev = req->queue.prev;
 995        if (head == prev)
 996                return;
 997        attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
 998}
 999
1000static int __make_request(request_queue_t * q, int rw,
1001                                  struct buffer_head * bh)
1002{
1003        unsigned int sector, count, sync;
1004        int max_segments = MAX_SEGMENTS;
1005        struct request * req, *freereq = NULL;
1006        int rw_ahead, max_sectors, el_ret;
1007        struct list_head *head, *insert_here;
1008        int latency;
1009        elevator_t *elevator = &q->elevator;
1010        int should_wake = 0;
1011
1012        count = bh->b_size >> 9;
1013        sector = bh->b_rsector;
1014        sync = test_and_clear_bit(BH_Sync, &bh->b_state);
1015
1016        rw_ahead = 0;   /* normal case; gets changed below for READA */
1017        switch (rw) {
1018                case READA:
1019#if 0   /* bread() misinterprets failed READA attempts as IO errors on SMP */
1020                        rw_ahead = 1;
1021#endif
1022                        rw = READ;      /* drop into READ */
1023                case READ:
1024                case WRITE:
1025                        latency = elevator_request_latency(elevator, rw);
1026                        break;
1027                default:
1028                        BUG();
1029                        goto end_io;
1030        }
1031
1032        /* We'd better have a real physical mapping!
1033           Check this bit only if the buffer was dirty and just locked
1034           down by us so at this point flushpage will block and
1035           won't clear the mapped bit under us. */
1036        if (!buffer_mapped(bh))
1037                BUG();
1038
1039        /*
1040         * Temporary solution - in 2.5 this will be done by the lowlevel
1041         * driver. Create a bounce buffer if the buffer data points into
1042         * high memory - keep the original buffer otherwise.
1043         */
1044        bh = blk_queue_bounce(q, rw, bh);
1045
1046/* look for a free request. */
1047        /*
1048         * Try to coalesce the new request with old requests
1049         */
1050        max_sectors = get_max_sectors(bh->b_rdev);
1051
1052        req = NULL;
1053        head = &q->queue_head;
1054        /*
1055         * Now we acquire the request spinlock, we have to be mega careful
1056         * not to schedule or do something nonatomic
1057         */
1058        spin_lock_irq(&io_request_lock);
1059
1060again:
1061        insert_here = head->prev;
1062
1063        if (list_empty(head)) {
1064                q->plug_device_fn(q, bh->b_rdev); /* is atomic */
1065                goto get_rq;
1066        } else if (q->head_active && !q->plugged)
1067                head = head->next;
1068
1069        el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
1070        switch (el_ret) {
1071
1072                case ELEVATOR_BACK_MERGE:
1073                        if (!q->back_merge_fn(q, req, bh, max_segments)) {
1074                                insert_here = &req->queue;
1075                                break;
1076                        }
1077                        req->bhtail->b_reqnext = bh;
1078                        req->bhtail = bh;
1079                        req->nr_sectors = req->hard_nr_sectors += count;
1080                        blk_started_io(count);
1081                        blk_started_sectors(req, count);
1082                        drive_stat_acct(req->rq_dev, req->cmd, count, 0);
1083                        req_new_io(req, 1, count);
1084                        attempt_back_merge(q, req, max_sectors, max_segments);
1085                        goto out;
1086
1087                case ELEVATOR_FRONT_MERGE:
1088                        if (!q->front_merge_fn(q, req, bh, max_segments)) {
1089                                insert_here = req->queue.prev;
1090                                break;
1091                        }
1092                        bh->b_reqnext = req->bh;
1093                        req->bh = bh;
1094                        /*
1095                         * may not be valid, but queues not having bounce
1096                         * enabled for highmem pages must not look at
1097                         * ->buffer anyway
1098                         */
1099                        req->buffer = bh->b_data;
1100                        req->current_nr_sectors = req->hard_cur_sectors = count;
1101                        req->sector = req->hard_sector = sector;
1102                        req->nr_sectors = req->hard_nr_sectors += count;
1103                        blk_started_io(count);
1104                        blk_started_sectors(req, count);
1105                        drive_stat_acct(req->rq_dev, req->cmd, count, 0);
1106                        req_new_io(req, 1, count);
1107                        attempt_front_merge(q, head, req, max_sectors, max_segments);
1108                        goto out;
1109
1110                /*
1111                 * elevator says don't/can't merge. get new request
1112                 */
1113                case ELEVATOR_NO_MERGE:
1114                        /*
1115                         * use elevator hints as to where to insert the
1116                         * request. if no hints, just add it to the back
1117                         * of the queue
1118                         */
1119                        if (req)
1120                                insert_here = &req->queue;
1121                        break;
1122
1123                default:
1124                        printk("elevator returned crap (%d)\n", el_ret);
1125                        BUG();
1126        }
1127                
1128get_rq:
1129        if (freereq) {
1130                req = freereq;
1131                freereq = NULL;
1132        } else {
1133                /*
1134                 * See description above __get_request_wait()
1135                 */
1136                if (rw_ahead) {
1137                        if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) {
1138                                spin_unlock_irq(&io_request_lock);
1139                                goto end_io;
1140                        }
1141                        req = get_request(q, rw);
1142                        if (req == NULL)
1143                                BUG();
1144                } else {
1145                        req = get_request(q, rw);
1146                        if (req == NULL) {
1147                                spin_unlock_irq(&io_request_lock);
1148                                freereq = __get_request_wait(q, rw);
1149                                head = &q->queue_head;
1150                                spin_lock_irq(&io_request_lock);
1151                                should_wake = 1;
1152                                goto again;
1153                        }
1154                }
1155        }
1156
1157/* fill up the request-info, and add it to the queue */
1158        req->elevator_sequence = latency;
1159        req->cmd = rw;
1160        req->errors = 0;
1161        req->hard_sector = req->sector = sector;
1162        req->hard_nr_sectors = req->nr_sectors = count;
1163        req->current_nr_sectors = req->hard_cur_sectors = count;
1164        req->nr_segments = 1; /* Always 1 for a new request. */
1165        req->nr_hw_segments = 1; /* Always 1 for a new request. */
1166        req->buffer = bh->b_data;
1167        req->waiting = NULL;
1168        req->bh = bh;
1169        req->bhtail = bh;
1170        req->rq_dev = bh->b_rdev;
1171        req->start_time = jiffies;
1172        req_new_io(req, 0, count);
1173        blk_started_io(count);
1174        blk_started_sectors(req, count);
1175        add_request(q, req, insert_here);
1176out:
1177        if (freereq)
1178                blkdev_release_request(freereq);
1179        if (should_wake)
1180                get_request_wait_wakeup(q, rw);
1181        if (sync)
1182                __generic_unplug_device(q);
1183        spin_unlock_irq(&io_request_lock);
1184        return 0;
1185end_io:
1186        bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1187        return 0;
1188}
1189
1190/**
1191 * generic_make_request: hand a buffer head to it's device driver for I/O
1192 * @rw:  READ, WRITE, or READA - what sort of I/O is desired.
1193 * @bh:  The buffer head describing the location in memory and on the device.
1194 *
1195 * generic_make_request() is used to make I/O requests of block
1196 * devices. It is passed a &struct buffer_head and a &rw value.  The
1197 * %READ and %WRITE options are (hopefully) obvious in meaning.  The
1198 * %READA value means that a read is required, but that the driver is
1199 * free to fail the request if, for example, it cannot get needed
1200 * resources immediately.
1201 *
1202 * generic_make_request() does not return any status.  The
1203 * success/failure status of the request, along with notification of
1204 * completion, is delivered asynchronously through the bh->b_end_io
1205 * function described (one day) else where.
1206 *
1207 * The caller of generic_make_request must make sure that b_page,
1208 * b_addr, b_size are set to describe the memory buffer, that b_rdev
1209 * and b_rsector are set to describe the device address, and the
1210 * b_end_io and optionally b_private are set to describe how
1211 * completion notification should be signaled.  BH_Mapped should also
1212 * be set (to confirm that b_dev and b_blocknr are valid).
1213 *
1214 * generic_make_request and the drivers it calls may use b_reqnext,
1215 * and may change b_rdev and b_rsector.  So the values of these fields
1216 * should NOT be depended on after the call to generic_make_request.
1217 * Because of this, the caller should record the device address
1218 * information in b_dev and b_blocknr.
1219 *
1220 * Apart from those fields mentioned above, no other fields, and in
1221 * particular, no other flags, are changed by generic_make_request or
1222 * any lower level drivers.
1223 * */
1224void generic_make_request (int rw, struct buffer_head * bh)
1225{
1226        int major = MAJOR(bh->b_rdev);
1227        int minorsize = 0;
1228        request_queue_t *q;
1229
1230        if (!bh->b_end_io)
1231                BUG();
1232
1233        /* Test device size, when known. */
1234        if (blk_size[major])
1235                minorsize = blk_size[major][MINOR(bh->b_rdev)];
1236        if (minorsize) {
1237                unsigned long maxsector = (minorsize << 1) + 1;
1238                unsigned long sector = bh->b_rsector;
1239                unsigned int count = bh->b_size >> 9;
1240
1241                if (maxsector < count || maxsector - count < sector) {
1242                        /* Yecch */
1243                        bh->b_state &= ~(1 << BH_Dirty);
1244
1245                        /* This may well happen - the kernel calls bread()
1246                           without checking the size of the device, e.g.,
1247                           when mounting a device. */
1248                        printk(KERN_INFO
1249                               "attempt to access beyond end of device\n");
1250                        printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
1251                               kdevname(bh->b_rdev), rw,
1252                               (sector + count)>>1, minorsize);
1253
1254                        bh->b_end_io(bh, 0);
1255                        return;
1256                }
1257        }
1258
1259        /*
1260         * Resolve the mapping until finished. (drivers are
1261         * still free to implement/resolve their own stacking
1262         * by explicitly returning 0)
1263         */
1264        /* NOTE: we don't repeat the blk_size check for each new device.
1265         * Stacking drivers are expected to know what they are doing.
1266         */
1267        do {
1268                q = __blk_get_queue(bh->b_rdev);
1269                if (!q) {
1270                        printk(KERN_ERR
1271                               "generic_make_request: Trying to access "
1272                               "nonexistent block-device %s (%ld)\n",
1273                               kdevname(bh->b_rdev), bh->b_rsector);
1274                        buffer_IO_error(bh);
1275                        break;
1276                }
1277        } while (q->make_request_fn(q, rw, bh));
1278}
1279
1280
1281/**
1282 * submit_bh: submit a buffer_head to the block device later for I/O
1283 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1284 * @bh: The &struct buffer_head which describes the I/O
1285 *
1286 * submit_bh() is very similar in purpose to generic_make_request(), and
1287 * uses that function to do most of the work.
1288 *
1289 * The extra functionality provided by submit_bh is to determine
1290 * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
1291 * This is is appropriate for IO requests that come from the buffer
1292 * cache and page cache which (currently) always use aligned blocks.
1293 */
1294void submit_bh(int rw, struct buffer_head * bh)
1295{
1296        int count = bh->b_size >> 9;
1297
1298        if (!test_bit(BH_Lock, &bh->b_state))
1299                BUG();
1300
1301        set_bit(BH_Req, &bh->b_state);
1302        set_bit(BH_Launder, &bh->b_state);
1303
1304        /*
1305         * First step, 'identity mapping' - RAID or LVM might
1306         * further remap this.
1307         */
1308        bh->b_rdev = bh->b_dev;
1309        bh->b_rsector = bh->b_blocknr * count;
1310
1311        get_bh(bh);
1312        generic_make_request(rw, bh);
1313
1314        /* fix race condition with wait_on_buffer() */
1315        smp_mb(); /* spin_unlock may have inclusive semantics */
1316        if (waitqueue_active(&bh->b_wait))
1317                wake_up(&bh->b_wait);
1318
1319        if (block_dump)
1320                printk(KERN_DEBUG "%s: %s block %lu/%u on %s\n", current->comm, rw == WRITE ? "WRITE" : "READ", bh->b_rsector, count, kdevname(bh->b_rdev));
1321
1322        put_bh(bh);
1323        switch (rw) {
1324                case WRITE:
1325                        kstat.pgpgout += count;
1326                        break;
1327                default:
1328                        kstat.pgpgin += count;
1329                        break;
1330        }
1331}
1332
1333/**
1334 * ll_rw_block: low-level access to block devices
1335 * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
1336 * @nr: number of &struct buffer_heads in the array
1337 * @bhs: array of pointers to &struct buffer_head
1338 *
1339 * ll_rw_block() takes an array of pointers to &struct buffer_heads,
1340 * and requests an I/O operation on them, either a %READ or a %WRITE.
1341 * The third %READA option is described in the documentation for
1342 * generic_make_request() which ll_rw_block() calls.
1343 *
1344 * This function provides extra functionality that is not in
1345 * generic_make_request() that is relevant to buffers in the buffer
1346 * cache or page cache.  In particular it drops any buffer that it
1347 * cannot get a lock on (with the BH_Lock state bit), any buffer that
1348 * appears to be clean when doing a write request, and any buffer that
1349 * appears to be up-to-date when doing read request.  Further it marks
1350 * as clean buffers that are processed for writing (the buffer cache
1351 * wont assume that they are actually clean until the buffer gets
1352 * unlocked).
1353 *
1354 * ll_rw_block sets b_end_io to simple completion handler that marks
1355 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
1356 * any waiters.  As client that needs a more interesting completion
1357 * routine should call submit_bh() (or generic_make_request())
1358 * directly.
1359 *
1360 * Caveat:
1361 *  All of the buffers must be for the same device, and must also be
1362 *  of the current approved size for the device.  */
1363
1364void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
1365{
1366        unsigned int major;
1367        int correct_size;
1368        int i;
1369
1370        if (!nr)
1371                return;
1372
1373        major = MAJOR(bhs[0]->b_dev);
1374
1375        /* Determine correct block size for this device. */
1376        correct_size = get_hardsect_size(bhs[0]->b_dev);
1377
1378        /* Verify requested block sizes. */
1379        for (i = 0; i < nr; i++) {
1380                struct buffer_head *bh = bhs[i];
1381                if (bh->b_size % correct_size) {
1382                        printk(KERN_NOTICE "ll_rw_block: device %s: "
1383                               "only %d-char blocks implemented (%u)\n",
1384                               kdevname(bhs[0]->b_dev),
1385                               correct_size, bh->b_size);
1386                        goto sorry;
1387                }
1388        }
1389
1390        if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
1391                printk(KERN_NOTICE "Can't write to read-only device %s\n",
1392                       kdevname(bhs[0]->b_dev));
1393                goto sorry;
1394        }
1395
1396        for (i = 0; i < nr; i++) {
1397                struct buffer_head *bh = bhs[i];
1398
1399                lock_buffer(bh);
1400
1401                /* We have the buffer lock */
1402                atomic_inc(&bh->b_count);
1403                bh->b_end_io = end_buffer_io_sync;
1404
1405                switch(rw) {
1406                case WRITE:
1407                        if (!atomic_set_buffer_clean(bh))
1408                                /* Hmmph! Nothing to write */
1409                                goto end_io;
1410                        __mark_buffer_clean(bh);
1411                        break;
1412
1413                case READA:
1414                case READ:
1415                        if (buffer_uptodate(bh))
1416                                /* Hmmph! Already have it */
1417                                goto end_io;
1418                        break;
1419                default:
1420                        BUG();
1421        end_io:
1422                        bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1423                        continue;
1424                }
1425
1426                submit_bh(rw, bh);
1427        }
1428        return;
1429
1430sorry:
1431        /* Make sure we don't get infinite dirty retries.. */
1432        for (i = 0; i < nr; i++)
1433                mark_buffer_clean(bhs[i]);
1434}
1435
1436#ifdef CONFIG_STRAM_SWAP
1437extern int stram_device_init (void);
1438#endif
1439
1440static void blk_writeback_timer(unsigned long data)
1441{
1442        wakeup_bdflush();
1443        wakeup_kupdate();
1444}
1445
1446/**
1447 * end_that_request_first - end I/O on one buffer.
1448 * @req:      the request being processed
1449 * @uptodate: 0 for I/O error
1450 * @name:     the name printed for an I/O error
1451 *
1452 * Description:
1453 *     Ends I/O on the first buffer attached to @req, and sets it up
1454 *     for the next buffer_head (if any) in the cluster.
1455 *     
1456 * Return:
1457 *     0 - we are done with this request, call end_that_request_last()
1458 *     1 - still buffers pending for this request
1459 *
1460 * Caveat: 
1461 *     Drivers implementing their own end_request handling must call
1462 *     blk_finished_io() appropriately.
1463 **/
1464
1465int end_that_request_first (struct request *req, int uptodate, char *name)
1466{
1467        struct buffer_head * bh;
1468        int nsect;
1469
1470        req->errors = 0;
1471        if (!uptodate)
1472                printk("end_request: I/O error, dev %s (%s), sector %lu\n",
1473                        kdevname(req->rq_dev), name, req->sector);
1474
1475        if ((bh = req->bh) != NULL) {
1476                nsect = bh->b_size >> 9;
1477                blk_finished_io(nsect);
1478                blk_finished_sectors(req, nsect);
1479                req->bh = bh->b_reqnext;
1480                bh->b_reqnext = NULL;
1481                bh->b_end_io(bh, uptodate);
1482                if ((bh = req->bh) != NULL) {
1483                        req->hard_sector += nsect;
1484                        req->hard_nr_sectors -= nsect;
1485                        req->sector = req->hard_sector;
1486                        req->nr_sectors = req->hard_nr_sectors;
1487
1488                        req->current_nr_sectors = bh->b_size >> 9;
1489                        req->hard_cur_sectors = req->current_nr_sectors;
1490                        if (req->nr_sectors < req->current_nr_sectors) {
1491                                req->nr_sectors = req->current_nr_sectors;
1492                                printk("end_request: buffer-list destroyed\n");
1493                        }
1494                        req->buffer = bh->b_data;
1495                        return 1;
1496                }
1497        }
1498        return 0;
1499}
1500
1501extern int laptop_mode;
1502
1503void end_that_request_last(struct request *req)
1504{
1505        struct completion *waiting = req->waiting;
1506
1507        /*
1508         * schedule the writeout of pending dirty data when the disk is idle
1509         */
1510        if (laptop_mode && req->cmd == READ)
1511                mod_timer(&writeback_timer, jiffies + 5 * HZ);
1512
1513        req_finished_io(req);
1514        blkdev_release_request(req);
1515        if (waiting)
1516                complete(waiting);
1517}
1518
1519int __init blk_dev_init(void)
1520{
1521        struct blk_dev_struct *dev;
1522
1523        request_cachep = kmem_cache_create("blkdev_requests",
1524                                           sizeof(struct request),
1525                                           0, SLAB_HWCACHE_ALIGN, NULL, NULL);
1526
1527        if (!request_cachep)
1528                panic("Can't create request pool slab cache\n");
1529
1530        for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
1531                dev->queue = NULL;
1532
1533        memset(ro_bits,0,sizeof(ro_bits));
1534        memset(max_readahead, 0, sizeof(max_readahead));
1535        memset(max_sectors, 0, sizeof(max_sectors));
1536
1537        blk_max_low_pfn = max_low_pfn - 1;
1538        blk_max_pfn = max_pfn - 1;
1539
1540        init_timer(&writeback_timer);
1541        writeback_timer.function = blk_writeback_timer;
1542
1543#ifdef CONFIG_AMIGA_Z2RAM
1544        z2_init();
1545#endif
1546#ifdef CONFIG_STRAM_SWAP
1547        stram_device_init();
1548#endif
1549#ifdef CONFIG_ISP16_CDI
1550        isp16_init();
1551#endif
1552#ifdef CONFIG_BLK_DEV_PS2
1553        ps2esdi_init();
1554#endif
1555#ifdef CONFIG_BLK_DEV_XD
1556        xd_init();
1557#endif
1558#ifdef CONFIG_BLK_DEV_MFM
1559        mfm_init();
1560#endif
1561#ifdef CONFIG_PARIDE
1562        { extern void paride_init(void); paride_init(); };
1563#endif
1564#ifdef CONFIG_MAC_FLOPPY
1565        swim3_init();
1566#endif
1567#ifdef CONFIG_BLK_DEV_SWIM_IOP
1568        swimiop_init();
1569#endif
1570#ifdef CONFIG_AMIGA_FLOPPY
1571        amiga_floppy_init();
1572#endif
1573#ifdef CONFIG_ATARI_FLOPPY
1574        atari_floppy_init();
1575#endif
1576#ifdef CONFIG_BLK_DEV_FD
1577        floppy_init();
1578#else
1579#if defined(__i386__)   /* Do we even need this? */
1580        outb_p(0xc, 0x3f2);
1581#endif
1582#endif
1583#ifdef CONFIG_CDU31A
1584        cdu31a_init();
1585#endif
1586#ifdef CONFIG_ATARI_ACSI
1587        acsi_init();
1588#endif
1589#ifdef CONFIG_MCD
1590        mcd_init();
1591#endif
1592#ifdef CONFIG_MCDX
1593        mcdx_init();
1594#endif
1595#ifdef CONFIG_SBPCD
1596        sbpcd_init();
1597#endif
1598#ifdef CONFIG_AZTCD
1599        aztcd_init();
1600#endif
1601#ifdef CONFIG_CDU535
1602        sony535_init();
1603#endif
1604#ifdef CONFIG_GSCD
1605        gscd_init();
1606#endif
1607#ifdef CONFIG_CM206
1608        cm206_init();
1609#endif
1610#ifdef CONFIG_OPTCD
1611        optcd_init();
1612#endif
1613#ifdef CONFIG_SJCD
1614        sjcd_init();
1615#endif
1616#ifdef CONFIG_APBLOCK
1617        ap_init();
1618#endif
1619#ifdef CONFIG_DDV
1620        ddv_init();
1621#endif
1622#ifdef CONFIG_MDISK
1623        mdisk_init();
1624#endif
1625#ifdef CONFIG_DASD
1626        dasd_init();
1627#endif
1628#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
1629        tapeblock_init();
1630#endif
1631#ifdef CONFIG_BLK_DEV_XPRAM
1632        xpram_init();
1633#endif
1634
1635#ifdef CONFIG_SUN_JSFLASH
1636        jsfd_init();
1637#endif
1638        return 0;
1639};
1640
1641EXPORT_SYMBOL(io_request_lock);
1642EXPORT_SYMBOL(end_that_request_first);
1643EXPORT_SYMBOL(end_that_request_last);
1644EXPORT_SYMBOL(blk_grow_request_list);
1645EXPORT_SYMBOL(blk_init_queue);
1646EXPORT_SYMBOL(blk_get_queue);
1647EXPORT_SYMBOL(blk_cleanup_queue);
1648EXPORT_SYMBOL(blk_queue_headactive);
1649EXPORT_SYMBOL(blk_queue_throttle_sectors);
1650EXPORT_SYMBOL(blk_queue_make_request);
1651EXPORT_SYMBOL(generic_make_request);
1652EXPORT_SYMBOL(blkdev_release_request);
1653EXPORT_SYMBOL(generic_unplug_device);
1654EXPORT_SYMBOL(blk_queue_bounce_limit);
1655EXPORT_SYMBOL(blk_max_low_pfn);
1656EXPORT_SYMBOL(blk_max_pfn);
1657EXPORT_SYMBOL(blk_seg_merge_ok);
1658EXPORT_SYMBOL(blk_nohighio);
1659
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.