linux/block/blk-barrier.c
<<
>>
Prefs
   1/*
   2 * Functions related to barrier IO handling
   3 */
   4#include <linux/kernel.h>
   5#include <linux/module.h>
   6#include <linux/bio.h>
   7#include <linux/blkdev.h>
   8
   9#include "blk.h"
  10
  11/**
  12 * blk_queue_ordered - does this queue support ordered writes
  13 * @q:        the request queue
  14 * @ordered:  one of QUEUE_ORDERED_*
  15 * @prepare_flush_fn: rq setup helper for cache flush ordered writes
  16 *
  17 * Description:
  18 *   For journalled file systems, doing ordered writes on a commit
  19 *   block instead of explicitly doing wait_on_buffer (which is bad
  20 *   for performance) can be a big win. Block drivers supporting this
  21 *   feature should call this function and indicate so.
  22 *
  23 **/
  24int blk_queue_ordered(struct request_queue *q, unsigned ordered,
  25                      prepare_flush_fn *prepare_flush_fn)
  26{
  27        if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
  28                                             QUEUE_ORDERED_DO_POSTFLUSH))) {
  29                printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
  30                return -EINVAL;
  31        }
  32
  33        if (ordered != QUEUE_ORDERED_NONE &&
  34            ordered != QUEUE_ORDERED_DRAIN &&
  35            ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
  36            ordered != QUEUE_ORDERED_DRAIN_FUA &&
  37            ordered != QUEUE_ORDERED_TAG &&
  38            ordered != QUEUE_ORDERED_TAG_FLUSH &&
  39            ordered != QUEUE_ORDERED_TAG_FUA) {
  40                printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
  41                return -EINVAL;
  42        }
  43
  44        q->ordered = ordered;
  45        q->next_ordered = ordered;
  46        q->prepare_flush_fn = prepare_flush_fn;
  47
  48        return 0;
  49}
  50EXPORT_SYMBOL(blk_queue_ordered);
  51
  52/*
  53 * Cache flushing for ordered writes handling
  54 */
  55unsigned blk_ordered_cur_seq(struct request_queue *q)
  56{
  57        if (!q->ordseq)
  58                return 0;
  59        return 1 << ffz(q->ordseq);
  60}
  61
  62unsigned blk_ordered_req_seq(struct request *rq)
  63{
  64        struct request_queue *q = rq->q;
  65
  66        BUG_ON(q->ordseq == 0);
  67
  68        if (rq == &q->pre_flush_rq)
  69                return QUEUE_ORDSEQ_PREFLUSH;
  70        if (rq == &q->bar_rq)
  71                return QUEUE_ORDSEQ_BAR;
  72        if (rq == &q->post_flush_rq)
  73                return QUEUE_ORDSEQ_POSTFLUSH;
  74
  75        /*
  76         * !fs requests don't need to follow barrier ordering.  Always
  77         * put them at the front.  This fixes the following deadlock.
  78         *
  79         * http://thread.gmane.org/gmane.linux.kernel/537473
  80         */
  81        if (!blk_fs_request(rq))
  82                return QUEUE_ORDSEQ_DRAIN;
  83
  84        if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
  85            (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
  86                return QUEUE_ORDSEQ_DRAIN;
  87        else
  88                return QUEUE_ORDSEQ_DONE;
  89}
  90
  91bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
  92{
  93        struct request *rq;
  94
  95        if (error && !q->orderr)
  96                q->orderr = error;
  97
  98        BUG_ON(q->ordseq & seq);
  99        q->ordseq |= seq;
 100
 101        if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
 102                return false;
 103
 104        /*
 105         * Okay, sequence complete.
 106         */
 107        q->ordseq = 0;
 108        rq = q->orig_bar_rq;
 109
 110        if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
 111                BUG();
 112
 113        return true;
 114}
 115
 116static void pre_flush_end_io(struct request *rq, int error)
 117{
 118        elv_completed_request(rq->q, rq);
 119        blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
 120}
 121
 122static void bar_end_io(struct request *rq, int error)
 123{
 124        elv_completed_request(rq->q, rq);
 125        blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
 126}
 127
 128static void post_flush_end_io(struct request *rq, int error)
 129{
 130        elv_completed_request(rq->q, rq);
 131        blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
 132}
 133
 134static void queue_flush(struct request_queue *q, unsigned which)
 135{
 136        struct request *rq;
 137        rq_end_io_fn *end_io;
 138
 139        if (which == QUEUE_ORDERED_DO_PREFLUSH) {
 140                rq = &q->pre_flush_rq;
 141                end_io = pre_flush_end_io;
 142        } else {
 143                rq = &q->post_flush_rq;
 144                end_io = post_flush_end_io;
 145        }
 146
 147        blk_rq_init(q, rq);
 148        rq->cmd_flags = REQ_HARDBARRIER;
 149        rq->rq_disk = q->bar_rq.rq_disk;
 150        rq->end_io = end_io;
 151        q->prepare_flush_fn(q, rq);
 152
 153        elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 154}
 155
 156static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 157{
 158        struct request *rq = *rqp;
 159        unsigned skip = 0;
 160
 161        q->orderr = 0;
 162        q->ordered = q->next_ordered;
 163        q->ordseq |= QUEUE_ORDSEQ_STARTED;
 164
 165        /*
 166         * For an empty barrier, there's no actual BAR request, which
 167         * in turn makes POSTFLUSH unnecessary.  Mask them off.
 168         */
 169        if (!rq->hard_nr_sectors) {
 170                q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
 171                                QUEUE_ORDERED_DO_POSTFLUSH);
 172                /*
 173                 * Empty barrier on a write-through device w/ ordered
 174                 * tag has no command to issue and without any command
 175                 * to issue, ordering by tag can't be used.  Drain
 176                 * instead.
 177                 */
 178                if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
 179                    !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
 180                        q->ordered &= ~QUEUE_ORDERED_BY_TAG;
 181                        q->ordered |= QUEUE_ORDERED_BY_DRAIN;
 182                }
 183        }
 184
 185        /* stash away the original request */
 186        elv_dequeue_request(q, rq);
 187        q->orig_bar_rq = rq;
 188        rq = NULL;
 189
 190        /*
 191         * Queue ordered sequence.  As we stack them at the head, we
 192         * need to queue in reverse order.  Note that we rely on that
 193         * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
 194         * request gets inbetween ordered sequence.
 195         */
 196        if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
 197                queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
 198                rq = &q->post_flush_rq;
 199        } else
 200                skip |= QUEUE_ORDSEQ_POSTFLUSH;
 201
 202        if (q->ordered & QUEUE_ORDERED_DO_BAR) {
 203                rq = &q->bar_rq;
 204
 205                /* initialize proxy request and queue it */
 206                blk_rq_init(q, rq);
 207                if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
 208                        rq->cmd_flags |= REQ_RW;
 209                if (q->ordered & QUEUE_ORDERED_DO_FUA)
 210                        rq->cmd_flags |= REQ_FUA;
 211                init_request_from_bio(rq, q->orig_bar_rq->bio);
 212                rq->end_io = bar_end_io;
 213
 214                elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 215        } else
 216                skip |= QUEUE_ORDSEQ_BAR;
 217
 218        if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
 219                queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
 220                rq = &q->pre_flush_rq;
 221        } else
 222                skip |= QUEUE_ORDSEQ_PREFLUSH;
 223
 224        if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
 225                rq = NULL;
 226        else
 227                skip |= QUEUE_ORDSEQ_DRAIN;
 228
 229        *rqp = rq;
 230
 231        /*
 232         * Complete skipped sequences.  If whole sequence is complete,
 233         * return false to tell elevator that this request is gone.
 234         */
 235        return !blk_ordered_complete_seq(q, skip, 0);
 236}
 237
 238bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 239{
 240        struct request *rq = *rqp;
 241        const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
 242
 243        if (!q->ordseq) {
 244                if (!is_barrier)
 245                        return true;
 246
 247                if (q->next_ordered != QUEUE_ORDERED_NONE)
 248                        return start_ordered(q, rqp);
 249                else {
 250                        /*
 251                         * Queue ordering not supported.  Terminate
 252                         * with prejudice.
 253                         */
 254                        elv_dequeue_request(q, rq);
 255                        if (__blk_end_request(rq, -EOPNOTSUPP,
 256                                              blk_rq_bytes(rq)))
 257                                BUG();
 258                        *rqp = NULL;
 259                        return false;
 260                }
 261        }
 262
 263        /*
 264         * Ordered sequence in progress
 265         */
 266
 267        /* Special requests are not subject to ordering rules. */
 268        if (!blk_fs_request(rq) &&
 269            rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
 270                return true;
 271
 272        if (q->ordered & QUEUE_ORDERED_BY_TAG) {
 273                /* Ordered by tag.  Blocking the next barrier is enough. */
 274                if (is_barrier && rq != &q->bar_rq)
 275                        *rqp = NULL;
 276        } else {
 277                /* Ordered by draining.  Wait for turn. */
 278                WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
 279                if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
 280                        *rqp = NULL;
 281        }
 282
 283        return true;
 284}
 285
 286static void bio_end_empty_barrier(struct bio *bio, int err)
 287{
 288        if (err) {
 289                if (err == -EOPNOTSUPP)
 290                        set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
 291                clear_bit(BIO_UPTODATE, &bio->bi_flags);
 292        }
 293
 294        complete(bio->bi_private);
 295}
 296
 297/**
 298 * blkdev_issue_flush - queue a flush
 299 * @bdev:       blockdev to issue flush for
 300 * @error_sector:       error sector
 301 *
 302 * Description:
 303 *    Issue a flush for the block device in question. Caller can supply
 304 *    room for storing the error offset in case of a flush error, if they
 305 *    wish to.
 306 */
 307int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 308{
 309        DECLARE_COMPLETION_ONSTACK(wait);
 310        struct request_queue *q;
 311        struct bio *bio;
 312        int ret;
 313
 314        if (bdev->bd_disk == NULL)
 315                return -ENXIO;
 316
 317        q = bdev_get_queue(bdev);
 318        if (!q)
 319                return -ENXIO;
 320
 321        bio = bio_alloc(GFP_KERNEL, 0);
 322        if (!bio)
 323                return -ENOMEM;
 324
 325        bio->bi_end_io = bio_end_empty_barrier;
 326        bio->bi_private = &wait;
 327        bio->bi_bdev = bdev;
 328        submit_bio(WRITE_BARRIER, bio);
 329
 330        wait_for_completion(&wait);
 331
 332        /*
 333         * The driver must store the error location in ->bi_sector, if
 334         * it supports it. For non-stacked drivers, this should be copied
 335         * from rq->sector.
 336         */
 337        if (error_sector)
 338                *error_sector = bio->bi_sector;
 339
 340        ret = 0;
 341        if (bio_flagged(bio, BIO_EOPNOTSUPP))
 342                ret = -EOPNOTSUPP;
 343        else if (!bio_flagged(bio, BIO_UPTODATE))
 344                ret = -EIO;
 345
 346        bio_put(bio);
 347        return ret;
 348}
 349EXPORT_SYMBOL(blkdev_issue_flush);
 350
 351static void blkdev_discard_end_io(struct bio *bio, int err)
 352{
 353        if (err) {
 354                if (err == -EOPNOTSUPP)
 355                        set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
 356                clear_bit(BIO_UPTODATE, &bio->bi_flags);
 357        }
 358
 359        bio_put(bio);
 360}
 361
 362/**
 363 * blkdev_issue_discard - queue a discard
 364 * @bdev:       blockdev to issue discard for
 365 * @sector:     start sector
 366 * @nr_sects:   number of sectors to discard
 367 * @gfp_mask:   memory allocation flags (for bio_alloc)
 368 *
 369 * Description:
 370 *    Issue a discard request for the sectors in question. Does not wait.
 371 */
 372int blkdev_issue_discard(struct block_device *bdev,
 373                         sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
 374{
 375        struct request_queue *q;
 376        struct bio *bio;
 377        int ret = 0;
 378
 379        if (bdev->bd_disk == NULL)
 380                return -ENXIO;
 381
 382        q = bdev_get_queue(bdev);
 383        if (!q)
 384                return -ENXIO;
 385
 386        if (!q->prepare_discard_fn)
 387                return -EOPNOTSUPP;
 388
 389        while (nr_sects && !ret) {
 390                bio = bio_alloc(gfp_mask, 0);
 391                if (!bio)
 392                        return -ENOMEM;
 393
 394                bio->bi_end_io = blkdev_discard_end_io;
 395                bio->bi_bdev = bdev;
 396
 397                bio->bi_sector = sector;
 398
 399                if (nr_sects > q->max_hw_sectors) {
 400                        bio->bi_size = q->max_hw_sectors << 9;
 401                        nr_sects -= q->max_hw_sectors;
 402                        sector += q->max_hw_sectors;
 403                } else {
 404                        bio->bi_size = nr_sects << 9;
 405                        nr_sects = 0;
 406                }
 407                bio_get(bio);
 408                submit_bio(DISCARD_BARRIER, bio);
 409
 410                /* Check if it failed immediately */
 411                if (bio_flagged(bio, BIO_EOPNOTSUPP))
 412                        ret = -EOPNOTSUPP;
 413                else if (!bio_flagged(bio, BIO_UPTODATE))
 414                        ret = -EIO;
 415                bio_put(bio);
 416        }
 417        return ret;
 418}
 419EXPORT_SYMBOL(blkdev_issue_discard);
 420