linux/block/blk-lib.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Functions related to generic helpers functions
   4 */
   5#include <linux/kernel.h>
   6#include <linux/module.h>
   7#include <linux/bio.h>
   8#include <linux/blkdev.h>
   9#include <linux/scatterlist.h>
  10
  11#include "blk.h"
  12
  13struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
  14{
  15        struct bio *new = bio_alloc(gfp, nr_pages);
  16
  17        if (bio) {
  18                bio_chain(bio, new);
  19                submit_bio(bio);
  20        }
  21
  22        return new;
  23}
  24EXPORT_SYMBOL_GPL(blk_next_bio);
  25
  26int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
  27                sector_t nr_sects, gfp_t gfp_mask, int flags,
  28                struct bio **biop)
  29{
  30        struct request_queue *q = bdev_get_queue(bdev);
  31        struct bio *bio = *biop;
  32        unsigned int op;
  33        sector_t bs_mask, part_offset = 0;
  34
  35        if (!q)
  36                return -ENXIO;
  37
  38        if (bdev_read_only(bdev))
  39                return -EPERM;
  40
  41        if (flags & BLKDEV_DISCARD_SECURE) {
  42                if (!blk_queue_secure_erase(q))
  43                        return -EOPNOTSUPP;
  44                op = REQ_OP_SECURE_ERASE;
  45        } else {
  46                if (!blk_queue_discard(q))
  47                        return -EOPNOTSUPP;
  48                op = REQ_OP_DISCARD;
  49        }
  50
  51        /* In case the discard granularity isn't set by buggy device driver */
  52        if (WARN_ON_ONCE(!q->limits.discard_granularity)) {
  53                char dev_name[BDEVNAME_SIZE];
  54
  55                bdevname(bdev, dev_name);
  56                pr_err_ratelimited("%s: Error: discard_granularity is 0.\n", dev_name);
  57                return -EOPNOTSUPP;
  58        }
  59
  60        bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
  61        if ((sector | nr_sects) & bs_mask)
  62                return -EINVAL;
  63
  64        if (!nr_sects)
  65                return -EINVAL;
  66
  67        /* In case the discard request is in a partition */
  68        if (bdev_is_partition(bdev))
  69                part_offset = bdev->bd_start_sect;
  70
  71        while (nr_sects) {
  72                sector_t granularity_aligned_lba, req_sects;
  73                sector_t sector_mapped = sector + part_offset;
  74
  75                granularity_aligned_lba = round_up(sector_mapped,
  76                                q->limits.discard_granularity >> SECTOR_SHIFT);
  77
  78                /*
  79                 * Check whether the discard bio starts at a discard_granularity
  80                 * aligned LBA,
  81                 * - If no: set (granularity_aligned_lba - sector_mapped) to
  82                 *   bi_size of the first split bio, then the second bio will
  83                 *   start at a discard_granularity aligned LBA on the device.
  84                 * - If yes: use bio_aligned_discard_max_sectors() as the max
  85                 *   possible bi_size of the first split bio. Then when this bio
  86                 *   is split in device drive, the split ones are very probably
  87                 *   to be aligned to discard_granularity of the device's queue.
  88                 */
  89                if (granularity_aligned_lba == sector_mapped)
  90                        req_sects = min_t(sector_t, nr_sects,
  91                                          bio_aligned_discard_max_sectors(q));
  92                else
  93                        req_sects = min_t(sector_t, nr_sects,
  94                                          granularity_aligned_lba - sector_mapped);
  95
  96                WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
  97
  98                bio = blk_next_bio(bio, 0, gfp_mask);
  99                bio->bi_iter.bi_sector = sector;
 100                bio_set_dev(bio, bdev);
 101                bio_set_op_attrs(bio, op, 0);
 102
 103                bio->bi_iter.bi_size = req_sects << 9;
 104                sector += req_sects;
 105                nr_sects -= req_sects;
 106
 107                /*
 108                 * We can loop for a long time in here, if someone does
 109                 * full device discards (like mkfs). Be nice and allow
 110                 * us to schedule out to avoid softlocking if preempt
 111                 * is disabled.
 112                 */
 113                cond_resched();
 114        }
 115
 116        *biop = bio;
 117        return 0;
 118}
 119EXPORT_SYMBOL(__blkdev_issue_discard);
 120
 121/**
 122 * blkdev_issue_discard - queue a discard
 123 * @bdev:       blockdev to issue discard for
 124 * @sector:     start sector
 125 * @nr_sects:   number of sectors to discard
 126 * @gfp_mask:   memory allocation flags (for bio_alloc)
 127 * @flags:      BLKDEV_DISCARD_* flags to control behaviour
 128 *
 129 * Description:
 130 *    Issue a discard request for the sectors in question.
 131 */
 132int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 133                sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
 134{
 135        struct bio *bio = NULL;
 136        struct blk_plug plug;
 137        int ret;
 138
 139        blk_start_plug(&plug);
 140        ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
 141                        &bio);
 142        if (!ret && bio) {
 143                ret = submit_bio_wait(bio);
 144                if (ret == -EOPNOTSUPP)
 145                        ret = 0;
 146                bio_put(bio);
 147        }
 148        blk_finish_plug(&plug);
 149
 150        return ret;
 151}
 152EXPORT_SYMBOL(blkdev_issue_discard);
 153
 154/**
 155 * __blkdev_issue_write_same - generate number of bios with same page
 156 * @bdev:       target blockdev
 157 * @sector:     start sector
 158 * @nr_sects:   number of sectors to write
 159 * @gfp_mask:   memory allocation flags (for bio_alloc)
 160 * @page:       page containing data to write
 161 * @biop:       pointer to anchor bio
 162 *
 163 * Description:
 164 *  Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page.
 165 */
 166static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 167                sector_t nr_sects, gfp_t gfp_mask, struct page *page,
 168                struct bio **biop)
 169{
 170        struct request_queue *q = bdev_get_queue(bdev);
 171        unsigned int max_write_same_sectors;
 172        struct bio *bio = *biop;
 173        sector_t bs_mask;
 174
 175        if (!q)
 176                return -ENXIO;
 177
 178        if (bdev_read_only(bdev))
 179                return -EPERM;
 180
 181        bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
 182        if ((sector | nr_sects) & bs_mask)
 183                return -EINVAL;
 184
 185        if (!bdev_write_same(bdev))
 186                return -EOPNOTSUPP;
 187
 188        /* Ensure that max_write_same_sectors doesn't overflow bi_size */
 189        max_write_same_sectors = bio_allowed_max_sectors(q);
 190
 191        while (nr_sects) {
 192                bio = blk_next_bio(bio, 1, gfp_mask);
 193                bio->bi_iter.bi_sector = sector;
 194                bio_set_dev(bio, bdev);
 195                bio->bi_vcnt = 1;
 196                bio->bi_io_vec->bv_page = page;
 197                bio->bi_io_vec->bv_offset = 0;
 198                bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
 199                bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
 200
 201                if (nr_sects > max_write_same_sectors) {
 202                        bio->bi_iter.bi_size = max_write_same_sectors << 9;
 203                        nr_sects -= max_write_same_sectors;
 204                        sector += max_write_same_sectors;
 205                } else {
 206                        bio->bi_iter.bi_size = nr_sects << 9;
 207                        nr_sects = 0;
 208                }
 209                cond_resched();
 210        }
 211
 212        *biop = bio;
 213        return 0;
 214}
 215
 216/**
 217 * blkdev_issue_write_same - queue a write same operation
 218 * @bdev:       target blockdev
 219 * @sector:     start sector
 220 * @nr_sects:   number of sectors to write
 221 * @gfp_mask:   memory allocation flags (for bio_alloc)
 222 * @page:       page containing data
 223 *
 224 * Description:
 225 *    Issue a write same request for the sectors in question.
 226 */
 227int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 228                                sector_t nr_sects, gfp_t gfp_mask,
 229                                struct page *page)
 230{
 231        struct bio *bio = NULL;
 232        struct blk_plug plug;
 233        int ret;
 234
 235        blk_start_plug(&plug);
 236        ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page,
 237                        &bio);
 238        if (ret == 0 && bio) {
 239                ret = submit_bio_wait(bio);
 240                bio_put(bio);
 241        }
 242        blk_finish_plug(&plug);
 243        return ret;
 244}
 245EXPORT_SYMBOL(blkdev_issue_write_same);
 246
 247static int __blkdev_issue_write_zeroes(struct block_device *bdev,
 248                sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
 249                struct bio **biop, unsigned flags)
 250{
 251        struct bio *bio = *biop;
 252        unsigned int max_write_zeroes_sectors;
 253        struct request_queue *q = bdev_get_queue(bdev);
 254
 255        if (!q)
 256                return -ENXIO;
 257
 258        if (bdev_read_only(bdev))
 259                return -EPERM;
 260
 261        /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
 262        max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);
 263
 264        if (max_write_zeroes_sectors == 0)
 265                return -EOPNOTSUPP;
 266
 267        while (nr_sects) {
 268                bio = blk_next_bio(bio, 0, gfp_mask);
 269                bio->bi_iter.bi_sector = sector;
 270                bio_set_dev(bio, bdev);
 271                bio->bi_opf = REQ_OP_WRITE_ZEROES;
 272                if (flags & BLKDEV_ZERO_NOUNMAP)
 273                        bio->bi_opf |= REQ_NOUNMAP;
 274
 275                if (nr_sects > max_write_zeroes_sectors) {
 276                        bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
 277                        nr_sects -= max_write_zeroes_sectors;
 278                        sector += max_write_zeroes_sectors;
 279                } else {
 280                        bio->bi_iter.bi_size = nr_sects << 9;
 281                        nr_sects = 0;
 282                }
 283                cond_resched();
 284        }
 285
 286        *biop = bio;
 287        return 0;
 288}
 289
 290/*
 291 * Convert a number of 512B sectors to a number of pages.
 292 * The result is limited to a number of pages that can fit into a BIO.
 293 * Also make sure that the result is always at least 1 (page) for the cases
 294 * where nr_sects is lower than the number of sectors in a page.
 295 */
 296static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
 297{
 298        sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
 299
 300        return min(pages, (sector_t)BIO_MAX_VECS);
 301}
 302
 303static int __blkdev_issue_zero_pages(struct block_device *bdev,
 304                sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
 305                struct bio **biop)
 306{
 307        struct request_queue *q = bdev_get_queue(bdev);
 308        struct bio *bio = *biop;
 309        int bi_size = 0;
 310        unsigned int sz;
 311
 312        if (!q)
 313                return -ENXIO;
 314
 315        if (bdev_read_only(bdev))
 316                return -EPERM;
 317
 318        while (nr_sects != 0) {
 319                bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
 320                                   gfp_mask);
 321                bio->bi_iter.bi_sector = sector;
 322                bio_set_dev(bio, bdev);
 323                bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 324
 325                while (nr_sects != 0) {
 326                        sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
 327                        bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
 328                        nr_sects -= bi_size >> 9;
 329                        sector += bi_size >> 9;
 330                        if (bi_size < sz)
 331                                break;
 332                }
 333                cond_resched();
 334        }
 335
 336        *biop = bio;
 337        return 0;
 338}
 339
 340/**
 341 * __blkdev_issue_zeroout - generate number of zero filed write bios
 342 * @bdev:       blockdev to issue
 343 * @sector:     start sector
 344 * @nr_sects:   number of sectors to write
 345 * @gfp_mask:   memory allocation flags (for bio_alloc)
 346 * @biop:       pointer to anchor bio
 347 * @flags:      controls detailed behavior
 348 *
 349 * Description:
 350 *  Zero-fill a block range, either using hardware offload or by explicitly
 351 *  writing zeroes to the device.
 352 *
 353 *  If a device is using logical block provisioning, the underlying space will
 354 *  not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
 355 *
 356 *  If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
 357 *  -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
 358 */
 359int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 360                sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
 361                unsigned flags)
 362{
 363        int ret;
 364        sector_t bs_mask;
 365
 366        bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
 367        if ((sector | nr_sects) & bs_mask)
 368                return -EINVAL;
 369
 370        ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
 371                        biop, flags);
 372        if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
 373                return ret;
 374
 375        return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
 376                                         biop);
 377}
 378EXPORT_SYMBOL(__blkdev_issue_zeroout);
 379
 380/**
 381 * blkdev_issue_zeroout - zero-fill a block range
 382 * @bdev:       blockdev to write
 383 * @sector:     start sector
 384 * @nr_sects:   number of sectors to write
 385 * @gfp_mask:   memory allocation flags (for bio_alloc)
 386 * @flags:      controls detailed behavior
 387 *
 388 * Description:
 389 *  Zero-fill a block range, either using hardware offload or by explicitly
 390 *  writing zeroes to the device.  See __blkdev_issue_zeroout() for the
 391 *  valid values for %flags.
 392 */
 393int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 394                sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
 395{
 396        int ret = 0;
 397        sector_t bs_mask;
 398        struct bio *bio;
 399        struct blk_plug plug;
 400        bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
 401
 402        bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
 403        if ((sector | nr_sects) & bs_mask)
 404                return -EINVAL;
 405
 406retry:
 407        bio = NULL;
 408        blk_start_plug(&plug);
 409        if (try_write_zeroes) {
 410                ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
 411                                                  gfp_mask, &bio, flags);
 412        } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
 413                ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
 414                                                gfp_mask, &bio);
 415        } else {
 416                /* No zeroing offload support */
 417                ret = -EOPNOTSUPP;
 418        }
 419        if (ret == 0 && bio) {
 420                ret = submit_bio_wait(bio);
 421                bio_put(bio);
 422        }
 423        blk_finish_plug(&plug);
 424        if (ret && try_write_zeroes) {
 425                if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
 426                        try_write_zeroes = false;
 427                        goto retry;
 428                }
 429                if (!bdev_write_zeroes_sectors(bdev)) {
 430                        /*
 431                         * Zeroing offload support was indicated, but the
 432                         * device reported ILLEGAL REQUEST (for some devices
 433                         * there is no non-destructive way to verify whether
 434                         * WRITE ZEROES is actually supported).
 435                         */
 436                        ret = -EOPNOTSUPP;
 437                }
 438        }
 439
 440        return ret;
 441}
 442EXPORT_SYMBOL(blkdev_issue_zeroout);
 443