linux/drivers/md/linear.c
<<
>>
Prefs
   1/*
   2   linear.c : Multiple Devices driver for Linux
   3              Copyright (C) 1994-96 Marc ZYNGIER
   4              <zyngier@ufr-info-p7.ibp.fr> or
   5              <maz@gloups.fdn.fr>
   6
   7   Linear mode management functions.
   8
   9   This program is free software; you can redistribute it and/or modify
  10   it under the terms of the GNU General Public License as published by
  11   the Free Software Foundation; either version 2, or (at your option)
  12   any later version.
  13   
  14   You should have received a copy of the GNU General Public License
  15   (for example /usr/src/linux/COPYING); if not, write to the Free
  16   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
  17*/
  18
  19#include <linux/raid/linear.h>
  20
  21/*
  22 * find which device holds a particular offset 
  23 */
  24static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
  25{
  26        dev_info_t *hash;
  27        linear_conf_t *conf = mddev_to_conf(mddev);
  28
  29        /*
  30         * sector_div(a,b) returns the remainer and sets a to a/b
  31         */
  32        sector >>= conf->sector_shift;
  33        (void)sector_div(sector, conf->spacing);
  34        hash = conf->hash_table[sector];
  35
  36        while (sector >= hash->num_sectors + hash->start_sector)
  37                hash++;
  38        return hash;
  39}
  40
  41/**
  42 *      linear_mergeable_bvec -- tell bio layer if two requests can be merged
  43 *      @q: request queue
  44 *      @bvm: properties of new bio
  45 *      @biovec: the request that could be merged to it.
  46 *
  47 *      Return amount of bytes we can take at this offset
  48 */
  49static int linear_mergeable_bvec(struct request_queue *q,
  50                                 struct bvec_merge_data *bvm,
  51                                 struct bio_vec *biovec)
  52{
  53        mddev_t *mddev = q->queuedata;
  54        dev_info_t *dev0;
  55        unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
  56        sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
  57
  58        dev0 = which_dev(mddev, sector);
  59        maxsectors = dev0->num_sectors - (sector - dev0->start_sector);
  60
  61        if (maxsectors < bio_sectors)
  62                maxsectors = 0;
  63        else
  64                maxsectors -= bio_sectors;
  65
  66        if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0)
  67                return biovec->bv_len;
  68        /* The bytes available at this offset could be really big,
  69         * so we cap at 2^31 to avoid overflow */
  70        if (maxsectors > (1 << (31-9)))
  71                return 1<<31;
  72        return maxsectors << 9;
  73}
  74
  75static void linear_unplug(struct request_queue *q)
  76{
  77        mddev_t *mddev = q->queuedata;
  78        linear_conf_t *conf = mddev_to_conf(mddev);
  79        int i;
  80
  81        for (i=0; i < mddev->raid_disks; i++) {
  82                struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev);
  83                blk_unplug(r_queue);
  84        }
  85}
  86
  87static int linear_congested(void *data, int bits)
  88{
  89        mddev_t *mddev = data;
  90        linear_conf_t *conf = mddev_to_conf(mddev);
  91        int i, ret = 0;
  92
  93        for (i = 0; i < mddev->raid_disks && !ret ; i++) {
  94                struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
  95                ret |= bdi_congested(&q->backing_dev_info, bits);
  96        }
  97        return ret;
  98}
  99
 100static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 101{
 102        linear_conf_t *conf;
 103        dev_info_t **table;
 104        mdk_rdev_t *rdev;
 105        int i, nb_zone, cnt;
 106        sector_t min_sectors;
 107        sector_t curr_sector;
 108        struct list_head *tmp;
 109
 110        conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t),
 111                        GFP_KERNEL);
 112        if (!conf)
 113                return NULL;
 114
 115        cnt = 0;
 116        conf->array_sectors = 0;
 117
 118        rdev_for_each(rdev, tmp, mddev) {
 119                int j = rdev->raid_disk;
 120                dev_info_t *disk = conf->disks + j;
 121
 122                if (j < 0 || j >= raid_disks || disk->rdev) {
 123                        printk("linear: disk numbering problem. Aborting!\n");
 124                        goto out;
 125                }
 126
 127                disk->rdev = rdev;
 128
 129                blk_queue_stack_limits(mddev->queue,
 130                                       rdev->bdev->bd_disk->queue);
 131                /* as we don't honour merge_bvec_fn, we must never risk
 132                 * violating it, so limit ->max_sector to one PAGE, as
 133                 * a one page request is never in violation.
 134                 */
 135                if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
 136                    mddev->queue->max_sectors > (PAGE_SIZE>>9))
 137                        blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 138
 139                disk->num_sectors = rdev->size * 2;
 140                conf->array_sectors += rdev->size * 2;
 141
 142                cnt++;
 143        }
 144        if (cnt != raid_disks) {
 145                printk("linear: not enough drives present. Aborting!\n");
 146                goto out;
 147        }
 148
 149        min_sectors = conf->array_sectors;
 150        sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *));
 151        if (min_sectors == 0)
 152                min_sectors = 1;
 153
 154        /* min_sectors is the minimum spacing that will fit the hash
 155         * table in one PAGE.  This may be much smaller than needed.
 156         * We find the smallest non-terminal set of consecutive devices
 157         * that is larger than min_sectors and use the size of that as
 158         * the actual spacing
 159         */
 160        conf->spacing = conf->array_sectors;
 161        for (i=0; i < cnt-1 ; i++) {
 162                sector_t tmp = 0;
 163                int j;
 164                for (j = i; j < cnt - 1 && tmp < min_sectors; j++)
 165                        tmp += conf->disks[j].num_sectors;
 166                if (tmp >= min_sectors && tmp < conf->spacing)
 167                        conf->spacing = tmp;
 168        }
 169
 170        /* spacing may be too large for sector_div to work with,
 171         * so we might need to pre-shift
 172         */
 173        conf->sector_shift = 0;
 174        if (sizeof(sector_t) > sizeof(u32)) {
 175                sector_t space = conf->spacing;
 176                while (space > (sector_t)(~(u32)0)) {
 177                        space >>= 1;
 178                        conf->sector_shift++;
 179                }
 180        }
 181        /*
 182         * This code was restructured to work around a gcc-2.95.3 internal
 183         * compiler error.  Alter it with care.
 184         */
 185        {
 186                sector_t sz;
 187                unsigned round;
 188                unsigned long base;
 189
 190                sz = conf->array_sectors >> conf->sector_shift;
 191                sz += 1; /* force round-up */
 192                base = conf->spacing >> conf->sector_shift;
 193                round = sector_div(sz, base);
 194                nb_zone = sz + (round ? 1 : 0);
 195        }
 196        BUG_ON(nb_zone > PAGE_SIZE / sizeof(struct dev_info *));
 197
 198        conf->hash_table = kmalloc (sizeof (struct dev_info *) * nb_zone,
 199                                        GFP_KERNEL);
 200        if (!conf->hash_table)
 201                goto out;
 202
 203        /*
 204         * Here we generate the linear hash table
 205         * First calculate the device offsets.
 206         */
 207        conf->disks[0].start_sector = 0;
 208        for (i = 1; i < raid_disks; i++)
 209                conf->disks[i].start_sector =
 210                        conf->disks[i-1].start_sector +
 211                        conf->disks[i-1].num_sectors;
 212
 213        table = conf->hash_table;
 214        i = 0;
 215        for (curr_sector = 0;
 216             curr_sector < conf->array_sectors;
 217             curr_sector += conf->spacing) {
 218
 219                while (i < raid_disks-1 &&
 220                       curr_sector >= conf->disks[i+1].start_sector)
 221                        i++;
 222
 223                *table ++ = conf->disks + i;
 224        }
 225
 226        if (conf->sector_shift) {
 227                conf->spacing >>= conf->sector_shift;
 228                /* round spacing up so that when we divide by it,
 229                 * we err on the side of "too-low", which is safest.
 230                 */
 231                conf->spacing++;
 232        }
 233
 234        BUG_ON(table - conf->hash_table > nb_zone);
 235
 236        return conf;
 237
 238out:
 239        kfree(conf);
 240        return NULL;
 241}
 242
 243static int linear_run (mddev_t *mddev)
 244{
 245        linear_conf_t *conf;
 246
 247        mddev->queue->queue_lock = &mddev->queue->__queue_lock;
 248        conf = linear_conf(mddev, mddev->raid_disks);
 249
 250        if (!conf)
 251                return 1;
 252        mddev->private = conf;
 253        mddev->array_sectors = conf->array_sectors;
 254
 255        blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
 256        mddev->queue->unplug_fn = linear_unplug;
 257        mddev->queue->backing_dev_info.congested_fn = linear_congested;
 258        mddev->queue->backing_dev_info.congested_data = mddev;
 259        return 0;
 260}
 261
 262static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
 263{
 264        /* Adding a drive to a linear array allows the array to grow.
 265         * It is permitted if the new drive has a matching superblock
 266         * already on it, with raid_disk equal to raid_disks.
 267         * It is achieved by creating a new linear_private_data structure
 268         * and swapping it in in-place of the current one.
 269         * The current one is never freed until the array is stopped.
 270         * This avoids races.
 271         */
 272        linear_conf_t *newconf;
 273
 274        if (rdev->saved_raid_disk != mddev->raid_disks)
 275                return -EINVAL;
 276
 277        rdev->raid_disk = rdev->saved_raid_disk;
 278
 279        newconf = linear_conf(mddev,mddev->raid_disks+1);
 280
 281        if (!newconf)
 282                return -ENOMEM;
 283
 284        newconf->prev = mddev_to_conf(mddev);
 285        mddev->private = newconf;
 286        mddev->raid_disks++;
 287        mddev->array_sectors = newconf->array_sectors;
 288        set_capacity(mddev->gendisk, mddev->array_sectors);
 289        return 0;
 290}
 291
 292static int linear_stop (mddev_t *mddev)
 293{
 294        linear_conf_t *conf = mddev_to_conf(mddev);
 295  
 296        blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 297        do {
 298                linear_conf_t *t = conf->prev;
 299                kfree(conf->hash_table);
 300                kfree(conf);
 301                conf = t;
 302        } while (conf);
 303
 304        return 0;
 305}
 306
 307static int linear_make_request (struct request_queue *q, struct bio *bio)
 308{
 309        const int rw = bio_data_dir(bio);
 310        mddev_t *mddev = q->queuedata;
 311        dev_info_t *tmp_dev;
 312        int cpu;
 313
 314        if (unlikely(bio_barrier(bio))) {
 315                bio_endio(bio, -EOPNOTSUPP);
 316                return 0;
 317        }
 318
 319        cpu = part_stat_lock();
 320        part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
 321        part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
 322                      bio_sectors(bio));
 323        part_stat_unlock();
 324
 325        tmp_dev = which_dev(mddev, bio->bi_sector);
 326    
 327        if (unlikely(bio->bi_sector >= (tmp_dev->num_sectors +
 328                                        tmp_dev->start_sector)
 329                     || (bio->bi_sector <
 330                         tmp_dev->start_sector))) {
 331                char b[BDEVNAME_SIZE];
 332
 333                printk("linear_make_request: Sector %llu out of bounds on "
 334                        "dev %s: %llu sectors, offset %llu\n",
 335                        (unsigned long long)bio->bi_sector,
 336                        bdevname(tmp_dev->rdev->bdev, b),
 337                        (unsigned long long)tmp_dev->num_sectors,
 338                        (unsigned long long)tmp_dev->start_sector);
 339                bio_io_error(bio);
 340                return 0;
 341        }
 342        if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
 343                     tmp_dev->start_sector + tmp_dev->num_sectors)) {
 344                /* This bio crosses a device boundary, so we have to
 345                 * split it.
 346                 */
 347                struct bio_pair *bp;
 348
 349                bp = bio_split(bio,
 350                               tmp_dev->start_sector + tmp_dev->num_sectors
 351                               - bio->bi_sector);
 352
 353                if (linear_make_request(q, &bp->bio1))
 354                        generic_make_request(&bp->bio1);
 355                if (linear_make_request(q, &bp->bio2))
 356                        generic_make_request(&bp->bio2);
 357                bio_pair_release(bp);
 358                return 0;
 359        }
 360                    
 361        bio->bi_bdev = tmp_dev->rdev->bdev;
 362        bio->bi_sector = bio->bi_sector - tmp_dev->start_sector
 363                + tmp_dev->rdev->data_offset;
 364
 365        return 1;
 366}
 367
 368static void linear_status (struct seq_file *seq, mddev_t *mddev)
 369{
 370
 371        seq_printf(seq, " %dk rounding", mddev->chunk_size/1024);
 372}
 373
 374
 375static struct mdk_personality linear_personality =
 376{
 377        .name           = "linear",
 378        .level          = LEVEL_LINEAR,
 379        .owner          = THIS_MODULE,
 380        .make_request   = linear_make_request,
 381        .run            = linear_run,
 382        .stop           = linear_stop,
 383        .status         = linear_status,
 384        .hot_add_disk   = linear_add,
 385};
 386
 387static int __init linear_init (void)
 388{
 389        return register_md_personality (&linear_personality);
 390}
 391
 392static void linear_exit (void)
 393{
 394        unregister_md_personality (&linear_personality);
 395}
 396
 397
 398module_init(linear_init);
 399module_exit(linear_exit);
 400MODULE_LICENSE("GPL");
 401MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
 402MODULE_ALIAS("md-linear");
 403MODULE_ALIAS("md-level--1");
 404
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.