linux/block/blk-barrier.c
<<
>>
Prefs
   1/*
   2 * Functions related to barrier IO handling
   3 */
   4#include <linux/kernel.h>
   5#include <linux/module.h>
   6#include <linux/bio.h>
   7#include <linux/blkdev.h>
   8#include <linux/gfp.h>
   9
  10#include "blk.h"
  11
  12/**
  13 * blk_queue_ordered - does this queue support ordered writes
  14 * @q:        the request queue
  15 * @ordered:  one of QUEUE_ORDERED_*
  16 * @prepare_flush_fn: rq setup helper for cache flush ordered writes
  17 *
  18 * Description:
  19 *   For journalled file systems, doing ordered writes on a commit
  20 *   block instead of explicitly doing wait_on_buffer (which is bad
  21 *   for performance) can be a big win. Block drivers supporting this
  22 *   feature should call this function and indicate so.
  23 *
  24 **/
  25int blk_queue_ordered(struct request_queue *q, unsigned ordered,
  26                      prepare_flush_fn *prepare_flush_fn)
  27{
  28        if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
  29                                             QUEUE_ORDERED_DO_POSTFLUSH))) {
  30                printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
  31                return -EINVAL;
  32        }
  33
  34        if (ordered != QUEUE_ORDERED_NONE &&
  35            ordered != QUEUE_ORDERED_DRAIN &&
  36            ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
  37            ordered != QUEUE_ORDERED_DRAIN_FUA &&
  38            ordered != QUEUE_ORDERED_TAG &&
  39            ordered != QUEUE_ORDERED_TAG_FLUSH &&
  40            ordered != QUEUE_ORDERED_TAG_FUA) {
  41                printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
  42                return -EINVAL;
  43        }
  44
  45        q->ordered = ordered;
  46        q->next_ordered = ordered;
  47        q->prepare_flush_fn = prepare_flush_fn;
  48
  49        return 0;
  50}
  51EXPORT_SYMBOL(blk_queue_ordered);
  52
  53/*
  54 * Cache flushing for ordered writes handling
  55 */
  56unsigned blk_ordered_cur_seq(struct request_queue *q)
  57{
  58        if (!q->ordseq)
  59                return 0;
  60        return 1 << ffz(q->ordseq);
  61}
  62
  63unsigned blk_ordered_req_seq(struct request *rq)
  64{
  65        struct request_queue *q = rq->q;
  66
  67        BUG_ON(q->ordseq == 0);
  68
  69        if (rq == &q->pre_flush_rq)
  70                return QUEUE_ORDSEQ_PREFLUSH;
  71        if (rq == &q->bar_rq)
  72                return QUEUE_ORDSEQ_BAR;
  73        if (rq == &q->post_flush_rq)
  74                return QUEUE_ORDSEQ_POSTFLUSH;
  75
  76        /*
  77         * !fs requests don't need to follow barrier ordering.  Always
  78         * put them at the front.  This fixes the following deadlock.
  79         *
  80         * http://thread.gmane.org/gmane.linux.kernel/537473
  81         */
  82        if (!blk_fs_request(rq))
  83                return QUEUE_ORDSEQ_DRAIN;
  84
  85        if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
  86            (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
  87                return QUEUE_ORDSEQ_DRAIN;
  88        else
  89                return QUEUE_ORDSEQ_DONE;
  90}
  91
  92bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
  93{
  94        struct request *rq;
  95
  96        if (error && !q->orderr)
  97                q->orderr = error;
  98
  99        BUG_ON(q->ordseq & seq);
 100        q->ordseq |= seq;
 101
 102        if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
 103                return false;
 104
 105        /*
 106         * Okay, sequence complete.
 107         */
 108        q->ordseq = 0;
 109        rq = q->orig_bar_rq;
 110        __blk_end_request_all(rq, q->orderr);
 111        return true;
 112}
 113
 114static void pre_flush_end_io(struct request *rq, int error)
 115{
 116        elv_completed_request(rq->q, rq);
 117        blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
 118}
 119
 120static void bar_end_io(struct request *rq, int error)
 121{
 122        elv_completed_request(rq->q, rq);
 123        blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
 124}
 125
 126static void post_flush_end_io(struct request *rq, int error)
 127{
 128        elv_completed_request(rq->q, rq);
 129        blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
 130}
 131
 132static void queue_flush(struct request_queue *q, unsigned which)
 133{
 134        struct request *rq;
 135        rq_end_io_fn *end_io;
 136
 137        if (which == QUEUE_ORDERED_DO_PREFLUSH) {
 138                rq = &q->pre_flush_rq;
 139                end_io = pre_flush_end_io;
 140        } else {
 141                rq = &q->post_flush_rq;
 142                end_io = post_flush_end_io;
 143        }
 144
 145        blk_rq_init(q, rq);
 146        rq->cmd_flags = REQ_HARDBARRIER;
 147        rq->rq_disk = q->bar_rq.rq_disk;
 148        rq->end_io = end_io;
 149        q->prepare_flush_fn(q, rq);
 150
 151        elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 152}
 153
 154static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 155{
 156        struct request *rq = *rqp;
 157        unsigned skip = 0;
 158
 159        q->orderr = 0;
 160        q->ordered = q->next_ordered;
 161        q->ordseq |= QUEUE_ORDSEQ_STARTED;
 162
 163        /*
 164         * For an empty barrier, there's no actual BAR request, which
 165         * in turn makes POSTFLUSH unnecessary.  Mask them off.
 166         */
 167        if (!blk_rq_sectors(rq)) {
 168                q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
 169                                QUEUE_ORDERED_DO_POSTFLUSH);
 170                /*
 171                 * Empty barrier on a write-through device w/ ordered
 172                 * tag has no command to issue and without any command
 173                 * to issue, ordering by tag can't be used.  Drain
 174                 * instead.
 175                 */
 176                if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
 177                    !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
 178                        q->ordered &= ~QUEUE_ORDERED_BY_TAG;
 179                        q->ordered |= QUEUE_ORDERED_BY_DRAIN;
 180                }
 181        }
 182
 183        /* stash away the original request */
 184        blk_dequeue_request(rq);
 185        q->orig_bar_rq = rq;
 186        rq = NULL;
 187
 188        /*
 189         * Queue ordered sequence.  As we stack them at the head, we
 190         * need to queue in reverse order.  Note that we rely on that
 191         * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
 192         * request gets inbetween ordered sequence.
 193         */
 194        if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
 195                queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
 196                rq = &q->post_flush_rq;
 197        } else
 198                skip |= QUEUE_ORDSEQ_POSTFLUSH;
 199
 200        if (q->ordered & QUEUE_ORDERED_DO_BAR) {
 201                rq = &q->bar_rq;
 202
 203                /* initialize proxy request and queue it */
 204                blk_rq_init(q, rq);
 205                if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
 206                        rq->cmd_flags |= REQ_RW;
 207                if (q->ordered & QUEUE_ORDERED_DO_FUA)
 208                        rq->cmd_flags |= REQ_FUA;
 209                init_request_from_bio(rq, q->orig_bar_rq->bio);
 210                rq->end_io = bar_end_io;
 211
 212                elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 213        } else
 214                skip |= QUEUE_ORDSEQ_BAR;
 215
 216        if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
 217                queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
 218                rq = &q->pre_flush_rq;
 219        } else
 220                skip |= QUEUE_ORDSEQ_PREFLUSH;
 221
 222        if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
 223                rq = NULL;
 224        else
 225                skip |= QUEUE_ORDSEQ_DRAIN;
 226
 227        *rqp = rq;
 228
 229        /*
 230         * Complete skipped sequences.  If whole sequence is complete,
 231         * return false to tell elevator that this request is gone.
 232         */
 233        return !blk_ordered_complete_seq(q, skip, 0);
 234}
 235
 236bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 237{
 238        struct request *rq = *rqp;
 239        const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
 240
 241        if (!q->ordseq) {
 242                if (!is_barrier)
 243                        return true;
 244
 245                if (q->next_ordered != QUEUE_ORDERED_NONE)
 246                        return start_ordered(q, rqp);
 247                else {
 248                        /*
 249                         * Queue ordering not supported.  Terminate
 250                         * with prejudice.
 251                         */
 252                        blk_dequeue_request(rq);
 253                        __blk_end_request_all(rq, -EOPNOTSUPP);
 254                        *rqp = NULL;
 255                        return false;
 256                }
 257        }
 258
 259        /*
 260         * Ordered sequence in progress
 261         */
 262
 263        /* Special requests are not subject to ordering rules. */
 264        if (!blk_fs_request(rq) &&
 265            rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
 266                return true;
 267
 268        if (q->ordered & QUEUE_ORDERED_BY_TAG) {
 269                /* Ordered by tag.  Blocking the next barrier is enough. */
 270                if (is_barrier && rq != &q->bar_rq)
 271                        *rqp = NULL;
 272        } else {
 273                /* Ordered by draining.  Wait for turn. */
 274                WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
 275                if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
 276                        *rqp = NULL;
 277        }
 278
 279        return true;
 280}
 281
 282static void bio_end_empty_barrier(struct bio *bio, int err)
 283{
 284        if (err) {
 285                if (err == -EOPNOTSUPP)
 286                        set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
 287                clear_bit(BIO_UPTODATE, &bio->bi_flags);
 288        }
 289        if (bio->bi_private)
 290                complete(bio->bi_private);
 291        bio_put(bio);
 292}
 293
 294/**
 295 * blkdev_issue_flush - queue a flush
 296 * @bdev:       blockdev to issue flush for
 297 * @gfp_mask:   memory allocation flags (for bio_alloc)
 298 * @error_sector:       error sector
 299 * @flags:      BLKDEV_IFL_* flags to control behaviour
 300 *
 301 * Description:
 302 *    Issue a flush for the block device in question. Caller can supply
 303 *    room for storing the error offset in case of a flush error, if they
 304 *    wish to. If WAIT flag is not passed then caller may check only what
 305 *    request was pushed in some internal queue for later handling.
 306 */
 307int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 308                sector_t *error_sector, unsigned long flags)
 309{
 310        DECLARE_COMPLETION_ONSTACK(wait);
 311        struct request_queue *q;
 312        struct bio *bio;
 313        int ret = 0;
 314
 315        if (bdev->bd_disk == NULL)
 316                return -ENXIO;
 317
 318        q = bdev_get_queue(bdev);
 319        if (!q)
 320                return -ENXIO;
 321
 322        bio = bio_alloc(gfp_mask, 0);
 323        bio->bi_end_io = bio_end_empty_barrier;
 324        bio->bi_bdev = bdev;
 325        if (test_bit(BLKDEV_WAIT, &flags))
 326                bio->bi_private = &wait;
 327
 328        bio_get(bio);
 329        submit_bio(WRITE_BARRIER, bio);
 330        if (test_bit(BLKDEV_WAIT, &flags)) {
 331                wait_for_completion(&wait);
 332                /*
 333                 * The driver must store the error location in ->bi_sector, if
 334                 * it supports it. For non-stacked drivers, this should be
 335                 * copied from blk_rq_pos(rq).
 336                 */
 337                if (error_sector)
 338                        *error_sector = bio->bi_sector;
 339        }
 340
 341        if (bio_flagged(bio, BIO_EOPNOTSUPP))
 342                ret = -EOPNOTSUPP;
 343        else if (!bio_flagged(bio, BIO_UPTODATE))
 344                ret = -EIO;
 345
 346        bio_put(bio);
 347        return ret;
 348}
 349EXPORT_SYMBOL(blkdev_issue_flush);
 350