linux-bk/drivers/md/multipath.c
<<
>>
Prefs
   1/*
   2 * multipath.c : Multiple Devices driver for Linux
   3 *
   4 * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
   5 *
   6 * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
   7 *
   8 * MULTIPATH management functions.
   9 *
  10 * derived from raid1.c.
  11 *
  12 * This program is free software; you can redistribute it and/or modify
  13 * it under the terms of the GNU General Public License as published by
  14 * the Free Software Foundation; either version 2, or (at your option)
  15 * any later version.
  16 *
  17 * You should have received a copy of the GNU General Public License
  18 * (for example /usr/src/linux/COPYING); if not, write to the Free
  19 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 */
  21
  22#include <linux/module.h>
  23#include <linux/slab.h>
  24#include <linux/spinlock.h>
  25#include <linux/raid/multipath.h>
  26#include <linux/bio.h>
  27#include <linux/buffer_head.h>
  28#include <asm/atomic.h>
  29
  30#define MAJOR_NR MD_MAJOR
  31#define MD_DRIVER
  32#define MD_PERSONALITY
  33#define DEVICE_NR(device) (minor(device))
  34
  35#define MAX_WORK_PER_DISK 128
  36
  37#define NR_RESERVED_BUFS        32
  38
  39
  40/*
  41 * The following can be used to debug the driver
  42 */
  43#define MULTIPATH_DEBUG 0
  44
  45#if MULTIPATH_DEBUG
  46#define PRINTK(x...)   printk(x)
  47#define inline
  48#define __inline__
  49#else
  50#define PRINTK(x...)  do { } while (0)
  51#endif
  52
  53
  54static mdk_personality_t multipath_personality;
  55static spinlock_t retry_list_lock = SPIN_LOCK_UNLOCKED;
  56struct multipath_bh *multipath_retry_list = NULL, **multipath_retry_tail;
  57
  58
  59static void *mp_pool_alloc(int gfp_flags, void *data)
  60{
  61        struct multipath_bh *mpb;
  62        mpb = kmalloc(sizeof(*mpb), gfp_flags);
  63        if (mpb) 
  64                memset(mpb, 0, sizeof(*mpb));
  65        return mpb;
  66}
  67
  68static void mp_pool_free(void *mpb, void *data)
  69{
  70        kfree(mpb);
  71}
  72
  73static int multipath_map (mddev_t *mddev, struct block_device **bdev)
  74{
  75        multipath_conf_t *conf = mddev_to_conf(mddev);
  76        int i, disks = MD_SB_DISKS;
  77
  78        /*
  79         * Later we do read balancing on the read side 
  80         * now we use the first available disk.
  81         */
  82
  83        for (i = 0; i < disks; i++) {
  84                if (conf->multipaths[i].operational) {
  85                        *bdev = conf->multipaths[i].bdev;
  86                        return (0);
  87                }
  88        }
  89
  90        printk (KERN_ERR "multipath_map(): no more operational IO paths?\n");
  91        return (-1);
  92}
  93
  94static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
  95{
  96        unsigned long flags;
  97        mddev_t *mddev = mp_bh->mddev;
  98        multipath_conf_t *conf = mddev_to_conf(mddev);
  99
 100        spin_lock_irqsave(&retry_list_lock, flags);
 101        if (multipath_retry_list == NULL)
 102                multipath_retry_tail = &multipath_retry_list;
 103        *multipath_retry_tail = mp_bh;
 104        multipath_retry_tail = &mp_bh->next_mp;
 105        mp_bh->next_mp = NULL;
 106        spin_unlock_irqrestore(&retry_list_lock, flags);
 107        md_wakeup_thread(conf->thread);
 108}
 109
 110
 111/*
 112 * multipath_end_bh_io() is called when we have finished servicing a multipathed
 113 * operation and are ready to return a success/failure code to the buffer
 114 * cache layer.
 115 */
 116static void multipath_end_bh_io (struct multipath_bh *mp_bh, int uptodate)
 117{
 118        struct bio *bio = mp_bh->master_bio;
 119        multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev);
 120
 121        bio_endio(bio, uptodate);
 122        mempool_free(mp_bh, conf->pool);
 123}
 124
 125void multipath_end_request(struct bio *bio)
 126{
 127        int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 128        struct multipath_bh * mp_bh = (struct multipath_bh *)(bio->bi_private);
 129        multipath_conf_t *conf;
 130        struct block_device *bdev;
 131        if (uptodate) {
 132                multipath_end_bh_io(mp_bh, uptodate);
 133                return;
 134        }
 135        /*
 136         * oops, IO error:
 137         */
 138        conf = mddev_to_conf(mp_bh->mddev);
 139        bdev = conf->multipaths[mp_bh->path].bdev;
 140        md_error (mp_bh->mddev, bdev);
 141        printk(KERN_ERR "multipath: %s: rescheduling sector %lu\n", 
 142                 bdev_partition_name(bdev), bio->bi_sector);
 143        multipath_reschedule_retry(mp_bh);
 144        return;
 145}
 146
 147/*
 148 * This routine returns the disk from which the requested read should
 149 * be done.
 150 */
 151
 152static int multipath_read_balance (multipath_conf_t *conf)
 153{
 154        int disk;
 155
 156        for (disk = 0; disk < MD_SB_DISKS; disk++)      
 157                if (conf->multipaths[disk].operational)
 158                        return disk;
 159        BUG();
 160        return 0;
 161}
 162
 163static int multipath_make_request (request_queue_t *q, struct bio * bio)
 164{
 165        mddev_t *mddev = q->queuedata;
 166        multipath_conf_t *conf = mddev_to_conf(mddev);
 167        struct multipath_bh * mp_bh;
 168        struct multipath_info *multipath;
 169
 170        mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
 171
 172        mp_bh->master_bio = bio;
 173        mp_bh->mddev = mddev;
 174
 175        /*
 176         * read balancing logic:
 177         */
 178        mp_bh->path = multipath_read_balance(conf);
 179        multipath = conf->multipaths + mp_bh->path;
 180
 181        mp_bh->bio = *bio;
 182        mp_bh->bio.bi_bdev = multipath->bdev;
 183        mp_bh->bio.bi_end_io = multipath_end_request;
 184        mp_bh->bio.bi_private = mp_bh;
 185        generic_make_request(&mp_bh->bio);
 186        return 0;
 187}
 188
 189static int multipath_status (char *page, mddev_t *mddev)
 190{
 191        multipath_conf_t *conf = mddev_to_conf(mddev);
 192        int sz = 0, i;
 193        
 194        sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks,
 195                                                 conf->working_disks);
 196        for (i = 0; i < conf->raid_disks; i++)
 197                sz += sprintf (page+sz, "%s",
 198                        conf->multipaths[i].operational ? "U" : "_");
 199        sz += sprintf (page+sz, "]");
 200        return sz;
 201}
 202
 203#define LAST_DISK KERN_ALERT \
 204"multipath: only one IO path left and IO error.\n"
 205
 206#define NO_SPARE_DISK KERN_ALERT \
 207"multipath: no spare IO path left!\n"
 208
 209#define DISK_FAILED KERN_ALERT \
 210"multipath: IO failure on %s, disabling IO path. \n" \
 211"       Operation continuing on %d IO paths.\n"
 212
 213static void mark_disk_bad (mddev_t *mddev, int failed)
 214{
 215        multipath_conf_t *conf = mddev_to_conf(mddev);
 216        struct multipath_info *multipath = conf->multipaths+failed;
 217
 218        multipath->operational = 0;
 219        mddev->sb_dirty = 1;
 220        conf->working_disks--;
 221        printk (DISK_FAILED, bdev_partition_name (multipath->bdev),
 222                                 conf->working_disks);
 223}
 224
 225/*
 226 * Careful, this can execute in IRQ contexts as well!
 227 */
 228static int multipath_error (mddev_t *mddev, struct block_device *bdev)
 229{
 230        multipath_conf_t *conf = mddev_to_conf(mddev);
 231        struct multipath_info * multipaths = conf->multipaths;
 232        int disks = MD_SB_DISKS;
 233        int i;
 234
 235
 236        if (conf->working_disks <= 1) {
 237                /*
 238                 * Uh oh, we can do nothing if this is our last path, but
 239                 * first check if this is a queued request for a device
 240                 * which has just failed.
 241                 */
 242                for (i = 0; i < disks; i++) {
 243                        if (multipaths[i].bdev == bdev && !multipaths[i].operational)
 244                                return 0;
 245                }
 246                printk (LAST_DISK);
 247                return 1; /* leave it active... it's all we have */
 248        } else {
 249                /*
 250                 * Mark disk as unusable
 251                 */
 252                for (i = 0; i < disks; i++) {
 253                        if (multipaths[i].bdev == bdev && multipaths[i].operational) {
 254                                mark_disk_bad(mddev, i);
 255                                break;
 256                        }
 257                }
 258        }
 259        return 0;
 260}
 261
 262#undef LAST_DISK
 263#undef NO_SPARE_DISK
 264#undef DISK_FAILED
 265
 266
 267static void print_multipath_conf (multipath_conf_t *conf)
 268{
 269        int i;
 270        struct multipath_info *tmp;
 271
 272        printk("MULTIPATH conf printout:\n");
 273        if (!conf) {
 274                printk("(conf==NULL)\n");
 275                return;
 276        }
 277        printk(" --- wd:%d rd:%d\n", conf->working_disks,
 278                         conf->raid_disks);
 279
 280        for (i = 0; i < MD_SB_DISKS; i++) {
 281                tmp = conf->multipaths + i;
 282                if (tmp->operational || tmp->used_slot)
 283                        printk(" disk%d, o:%d, us:%d dev:%s\n",
 284                                i,tmp->operational,
 285                                tmp->used_slot,
 286                                bdev_partition_name(tmp->bdev));
 287        }
 288}
 289
 290
 291static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 292{
 293        multipath_conf_t *conf = mddev->private;
 294        int err = 1;
 295        struct multipath_info *p = conf->multipaths + rdev->raid_disk;
 296
 297        print_multipath_conf(conf);
 298        spin_lock_irq(&conf->device_lock);
 299        if (!p->used_slot) {
 300                p->bdev = rdev->bdev;
 301                p->operational = 1;
 302                p->used_slot = 1;
 303                conf->working_disks++;
 304                err = 0;
 305        }
 306        if (err)
 307                MD_BUG();
 308        spin_unlock_irq(&conf->device_lock);
 309
 310        print_multipath_conf(conf);
 311        return err;
 312}
 313
 314static int multipath_remove_disk(mddev_t *mddev, int number)
 315{
 316        multipath_conf_t *conf = mddev->private;
 317        int err = 1;
 318        struct multipath_info *p = conf->multipaths + number;
 319
 320        print_multipath_conf(conf);
 321        spin_lock_irq(&conf->device_lock);
 322
 323        if (p->used_slot) {
 324                if (p->operational) {
 325                        printk(KERN_ERR "hot-remove-disk, slot %d is identified but is still operational!\n", number);
 326                        err = -EBUSY;
 327                        goto abort;
 328                }
 329                p->bdev = NULL;
 330                p->used_slot = 0;
 331                err = 0;
 332        }
 333        if (err)
 334                MD_BUG();
 335abort:
 336        spin_unlock_irq(&conf->device_lock);
 337
 338        print_multipath_conf(conf);
 339        return err;
 340}
 341
 342#define IO_ERROR KERN_ALERT \
 343"multipath: %s: unrecoverable IO read error for block %lu\n"
 344
 345#define REDIRECT_SECTOR KERN_ERR \
 346"multipath: %s: redirecting sector %lu to another IO path\n"
 347
 348/*
 349 * This is a kernel thread which:
 350 *
 351 *      1.      Retries failed read operations on working multipaths.
 352 *      2.      Updates the raid superblock when problems encounter.
 353 *      3.      Performs writes following reads for array syncronising.
 354 */
 355
 356static void multipathd (void *data)
 357{
 358        struct multipath_bh *mp_bh;
 359        struct bio *bio;
 360        unsigned long flags;
 361        mddev_t *mddev;
 362        struct block_device *bdev;
 363
 364        for (;;) {
 365                spin_lock_irqsave(&retry_list_lock, flags);
 366                mp_bh = multipath_retry_list;
 367                if (!mp_bh)
 368                        break;
 369                multipath_retry_list = mp_bh->next_mp;
 370                spin_unlock_irqrestore(&retry_list_lock, flags);
 371
 372                mddev = mp_bh->mddev;
 373                bio = &mp_bh->bio;
 374                bio->bi_sector = mp_bh->master_bio->bi_sector;
 375                bdev = bio->bi_bdev;
 376                
 377                multipath_map (mddev, &bio->bi_bdev);
 378                if (bio->bi_bdev == bdev) {
 379                        printk(IO_ERROR,
 380                                bdev_partition_name(bio->bi_bdev), bio->bi_sector);
 381                        multipath_end_bh_io(mp_bh, 0);
 382                } else {
 383                        printk(REDIRECT_SECTOR,
 384                                bdev_partition_name(bio->bi_bdev), bio->bi_sector);
 385                        generic_make_request(bio);
 386                }
 387        }
 388        spin_unlock_irqrestore(&retry_list_lock, flags);
 389}
 390#undef IO_ERROR
 391#undef REDIRECT_SECTOR
 392
 393#define INVALID_LEVEL KERN_WARNING \
 394"multipath: md%d: raid level not set to multipath IO (%d)\n"
 395
 396#define NO_SB KERN_ERR \
 397"multipath: disabled IO path %s (couldn't access raid superblock)\n"
 398
 399#define ERRORS KERN_ERR \
 400"multipath: disabled IO path %s (errors detected)\n"
 401
 402#define NOT_IN_SYNC KERN_ERR \
 403"multipath: making IO path %s a spare path (not in sync)\n"
 404
 405#define INCONSISTENT KERN_ERR \
 406"multipath: disabled IO path %s (inconsistent descriptor)\n"
 407
 408#define ALREADY_RUNNING KERN_ERR \
 409"multipath: disabled IO path %s (multipath %d already operational)\n"
 410
 411#define OPERATIONAL KERN_INFO \
 412"multipath: device %s operational as IO path %d\n"
 413
 414#define MEM_ERROR KERN_ERR \
 415"multipath: couldn't allocate memory for md%d\n"
 416
 417#define SPARE KERN_INFO \
 418"multipath: spare IO path %s\n"
 419
 420#define NONE_OPERATIONAL KERN_ERR \
 421"multipath: no operational IO paths for md%d\n"
 422
 423#define SB_DIFFERENCES KERN_ERR \
 424"multipath: detected IO path differences!\n"
 425
 426#define ARRAY_IS_ACTIVE KERN_INFO \
 427"multipath: array md%d active with %d out of %d IO paths\n"
 428
 429#define THREAD_ERROR KERN_ERR \
 430"multipath: couldn't allocate thread for md%d\n"
 431
 432static int multipath_run (mddev_t *mddev)
 433{
 434        multipath_conf_t *conf;
 435        int disk_idx;
 436        struct multipath_info *disk;
 437        mdk_rdev_t *rdev;
 438        struct list_head *tmp;
 439        int num_rdevs = 0;
 440
 441        MOD_INC_USE_COUNT;
 442
 443        if (mddev->level != LEVEL_MULTIPATH) {
 444                printk(INVALID_LEVEL, mdidx(mddev), mddev->level);
 445                goto out;
 446        }
 447        /*
 448         * copy the already verified devices into our private MULTIPATH
 449         * bookkeeping area. [whatever we allocate in multipath_run(),
 450         * should be freed in multipath_stop()]
 451         */
 452
 453        conf = kmalloc(sizeof(multipath_conf_t), GFP_KERNEL);
 454        mddev->private = conf;
 455        if (!conf) {
 456                printk(MEM_ERROR, mdidx(mddev));
 457                goto out;
 458        }
 459        memset(conf, 0, sizeof(*conf));
 460
 461        ITERATE_RDEV(mddev,rdev,tmp) {
 462                if (rdev->faulty) {
 463                        /* this is a "should never happen" case and if it */
 464                        /* ever does happen, a continue; won't help */
 465                        printk(ERRORS, bdev_partition_name(rdev->bdev));
 466                        continue;
 467                } else {
 468                        /* this is a "should never happen" case and if it */
 469                        /* ever does happen, a continue; won't help */
 470                        if (!rdev->sb) {
 471                                MD_BUG();
 472                                continue;
 473                        }
 474                }
 475                if (rdev->desc_nr == -1) {
 476                        MD_BUG();
 477                        continue;
 478                }
 479
 480                disk_idx = rdev->raid_disk;
 481                disk = conf->multipaths + disk_idx;
 482
 483                /*
 484                 * Mark all disks as active to start with, there are no
 485                 * spares.  multipath_read_balance deals with choose
 486                 * the "best" operational device.
 487                 */
 488                disk->bdev = rdev->bdev;
 489                disk->operational = 1;
 490                disk->used_slot = 1;
 491                num_rdevs++;
 492        }
 493
 494        conf->raid_disks = mddev->raid_disks = num_rdevs;
 495        mddev->sb_dirty = 1;
 496        conf->mddev = mddev;
 497        conf->device_lock = SPIN_LOCK_UNLOCKED;
 498
 499        if (!conf->working_disks) {
 500                printk(NONE_OPERATIONAL, mdidx(mddev));
 501                goto out_free_conf;
 502        }
 503
 504        conf->pool = mempool_create(NR_RESERVED_BUFS,
 505                                    mp_pool_alloc, mp_pool_free,
 506                                    NULL);
 507        if (conf->pool == NULL) {
 508                printk(MEM_ERROR, mdidx(mddev));
 509                goto out_free_conf;
 510        }
 511
 512        {
 513                const char * name = "multipathd";
 514
 515                conf->thread = md_register_thread(multipathd, conf, name);
 516                if (!conf->thread) {
 517                        printk(THREAD_ERROR, mdidx(mddev));
 518                        goto out_free_conf;
 519                }
 520        }
 521
 522        printk(ARRAY_IS_ACTIVE, mdidx(mddev), conf->working_disks,
 523                        mddev->raid_disks);
 524        /*
 525         * Ok, everything is just fine now
 526         */
 527        return 0;
 528
 529out_free_conf:
 530        if (conf->pool)
 531                mempool_destroy(conf->pool);
 532        kfree(conf);
 533        mddev->private = NULL;
 534out:
 535        MOD_DEC_USE_COUNT;
 536        return -EIO;
 537}
 538
 539#undef INVALID_LEVEL
 540#undef NO_SB
 541#undef ERRORS
 542#undef NOT_IN_SYNC
 543#undef INCONSISTENT
 544#undef ALREADY_RUNNING
 545#undef OPERATIONAL
 546#undef SPARE
 547#undef NONE_OPERATIONAL
 548#undef SB_DIFFERENCES
 549#undef ARRAY_IS_ACTIVE
 550
 551static int multipath_stop (mddev_t *mddev)
 552{
 553        multipath_conf_t *conf = mddev_to_conf(mddev);
 554
 555        md_unregister_thread(conf->thread);
 556        mempool_destroy(conf->pool);
 557        kfree(conf);
 558        mddev->private = NULL;
 559        MOD_DEC_USE_COUNT;
 560        return 0;
 561}
 562
 563static mdk_personality_t multipath_personality=
 564{
 565        .name           = "multipath",
 566        .make_request   = multipath_make_request,
 567        .run            = multipath_run,
 568        .stop           = multipath_stop,
 569        .status         = multipath_status,
 570        .error_handler  = multipath_error,
 571        .hot_add_disk   = multipath_add_disk,
 572        .hot_remove_disk= multipath_remove_disk,
 573};
 574
 575static int __init multipath_init (void)
 576{
 577        return register_md_personality (MULTIPATH, &multipath_personality);
 578}
 579
 580static void __exit multipath_exit (void)
 581{
 582        unregister_md_personality (MULTIPATH);
 583}
 584
 585module_init(multipath_init);
 586module_exit(multipath_exit);
 587MODULE_LICENSE("GPL");
 588
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.