1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/module.h>
23#include <linux/slab.h>
24#include <linux/spinlock.h>
25#include <linux/raid/multipath.h>
26#include <linux/bio.h>
27#include <linux/buffer_head.h>
28#include <asm/atomic.h>
29
30#define MAJOR_NR MD_MAJOR
31#define MD_DRIVER
32#define MD_PERSONALITY
33#define DEVICE_NR(device) (minor(device))
34
35#define MAX_WORK_PER_DISK 128
36
37#define NR_RESERVED_BUFS 32
38
39
40
41
42
43#define MULTIPATH_DEBUG 0
44
45#if MULTIPATH_DEBUG
46#define PRINTK(x...) printk(x)
47#define inline
48#define __inline__
49#else
50#define PRINTK(x...) do { } while (0)
51#endif
52
53
54static mdk_personality_t multipath_personality;
55static spinlock_t retry_list_lock = SPIN_LOCK_UNLOCKED;
56struct multipath_bh *multipath_retry_list = NULL, **multipath_retry_tail;
57
58
59static void *mp_pool_alloc(int gfp_flags, void *data)
60{
61 struct multipath_bh *mpb;
62 mpb = kmalloc(sizeof(*mpb), gfp_flags);
63 if (mpb)
64 memset(mpb, 0, sizeof(*mpb));
65 return mpb;
66}
67
68static void mp_pool_free(void *mpb, void *data)
69{
70 kfree(mpb);
71}
72
73static int multipath_map (mddev_t *mddev, struct block_device **bdev)
74{
75 multipath_conf_t *conf = mddev_to_conf(mddev);
76 int i, disks = MD_SB_DISKS;
77
78
79
80
81
82
83 for (i = 0; i < disks; i++) {
84 if (conf->multipaths[i].operational) {
85 *bdev = conf->multipaths[i].bdev;
86 return (0);
87 }
88 }
89
90 printk (KERN_ERR "multipath_map(): no more operational IO paths?\n");
91 return (-1);
92}
93
94static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
95{
96 unsigned long flags;
97 mddev_t *mddev = mp_bh->mddev;
98 multipath_conf_t *conf = mddev_to_conf(mddev);
99
100 spin_lock_irqsave(&retry_list_lock, flags);
101 if (multipath_retry_list == NULL)
102 multipath_retry_tail = &multipath_retry_list;
103 *multipath_retry_tail = mp_bh;
104 multipath_retry_tail = &mp_bh->next_mp;
105 mp_bh->next_mp = NULL;
106 spin_unlock_irqrestore(&retry_list_lock, flags);
107 md_wakeup_thread(conf->thread);
108}
109
110
111
112
113
114
115
116static void multipath_end_bh_io (struct multipath_bh *mp_bh, int uptodate)
117{
118 struct bio *bio = mp_bh->master_bio;
119 multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev);
120
121 bio_endio(bio, uptodate);
122 mempool_free(mp_bh, conf->pool);
123}
124
125void multipath_end_request(struct bio *bio)
126{
127 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
128 struct multipath_bh * mp_bh = (struct multipath_bh *)(bio->bi_private);
129 multipath_conf_t *conf;
130 struct block_device *bdev;
131 if (uptodate) {
132 multipath_end_bh_io(mp_bh, uptodate);
133 return;
134 }
135
136
137
138 conf = mddev_to_conf(mp_bh->mddev);
139 bdev = conf->multipaths[mp_bh->path].bdev;
140 md_error (mp_bh->mddev, bdev);
141 printk(KERN_ERR "multipath: %s: rescheduling sector %lu\n",
142 bdev_partition_name(bdev), bio->bi_sector);
143 multipath_reschedule_retry(mp_bh);
144 return;
145}
146
147
148
149
150
151
152static int multipath_read_balance (multipath_conf_t *conf)
153{
154 int disk;
155
156 for (disk = 0; disk < MD_SB_DISKS; disk++)
157 if (conf->multipaths[disk].operational)
158 return disk;
159 BUG();
160 return 0;
161}
162
163static int multipath_make_request (request_queue_t *q, struct bio * bio)
164{
165 mddev_t *mddev = q->queuedata;
166 multipath_conf_t *conf = mddev_to_conf(mddev);
167 struct multipath_bh * mp_bh;
168 struct multipath_info *multipath;
169
170 mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
171
172 mp_bh->master_bio = bio;
173 mp_bh->mddev = mddev;
174
175
176
177
178 mp_bh->path = multipath_read_balance(conf);
179 multipath = conf->multipaths + mp_bh->path;
180
181 mp_bh->bio = *bio;
182 mp_bh->bio.bi_bdev = multipath->bdev;
183 mp_bh->bio.bi_end_io = multipath_end_request;
184 mp_bh->bio.bi_private = mp_bh;
185 generic_make_request(&mp_bh->bio);
186 return 0;
187}
188
189static int multipath_status (char *page, mddev_t *mddev)
190{
191 multipath_conf_t *conf = mddev_to_conf(mddev);
192 int sz = 0, i;
193
194 sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks,
195 conf->working_disks);
196 for (i = 0; i < conf->raid_disks; i++)
197 sz += sprintf (page+sz, "%s",
198 conf->multipaths[i].operational ? "U" : "_");
199 sz += sprintf (page+sz, "]");
200 return sz;
201}
202
203#define LAST_DISK KERN_ALERT \
204"multipath: only one IO path left and IO error.\n"
205
206#define NO_SPARE_DISK KERN_ALERT \
207"multipath: no spare IO path left!\n"
208
209#define DISK_FAILED KERN_ALERT \
210"multipath: IO failure on %s, disabling IO path. \n" \
211" Operation continuing on %d IO paths.\n"
212
213static void mark_disk_bad (mddev_t *mddev, int failed)
214{
215 multipath_conf_t *conf = mddev_to_conf(mddev);
216 struct multipath_info *multipath = conf->multipaths+failed;
217
218 multipath->operational = 0;
219 mddev->sb_dirty = 1;
220 conf->working_disks--;
221 printk (DISK_FAILED, bdev_partition_name (multipath->bdev),
222 conf->working_disks);
223}
224
225
226
227
228static int multipath_error (mddev_t *mddev, struct block_device *bdev)
229{
230 multipath_conf_t *conf = mddev_to_conf(mddev);
231 struct multipath_info * multipaths = conf->multipaths;
232 int disks = MD_SB_DISKS;
233 int i;
234
235
236 if (conf->working_disks <= 1) {
237
238
239
240
241
242 for (i = 0; i < disks; i++) {
243 if (multipaths[i].bdev == bdev && !multipaths[i].operational)
244 return 0;
245 }
246 printk (LAST_DISK);
247 return 1;
248 } else {
249
250
251
252 for (i = 0; i < disks; i++) {
253 if (multipaths[i].bdev == bdev && multipaths[i].operational) {
254 mark_disk_bad(mddev, i);
255 break;
256 }
257 }
258 }
259 return 0;
260}
261
262#undef LAST_DISK
263#undef NO_SPARE_DISK
264#undef DISK_FAILED
265
266
267static void print_multipath_conf (multipath_conf_t *conf)
268{
269 int i;
270 struct multipath_info *tmp;
271
272 printk("MULTIPATH conf printout:\n");
273 if (!conf) {
274 printk("(conf==NULL)\n");
275 return;
276 }
277 printk(" --- wd:%d rd:%d\n", conf->working_disks,
278 conf->raid_disks);
279
280 for (i = 0; i < MD_SB_DISKS; i++) {
281 tmp = conf->multipaths + i;
282 if (tmp->operational || tmp->used_slot)
283 printk(" disk%d, o:%d, us:%d dev:%s\n",
284 i,tmp->operational,
285 tmp->used_slot,
286 bdev_partition_name(tmp->bdev));
287 }
288}
289
290
291static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
292{
293 multipath_conf_t *conf = mddev->private;
294 int err = 1;
295 struct multipath_info *p = conf->multipaths + rdev->raid_disk;
296
297 print_multipath_conf(conf);
298 spin_lock_irq(&conf->device_lock);
299 if (!p->used_slot) {
300 p->bdev = rdev->bdev;
301 p->operational = 1;
302 p->used_slot = 1;
303 conf->working_disks++;
304 err = 0;
305 }
306 if (err)
307 MD_BUG();
308 spin_unlock_irq(&conf->device_lock);
309
310 print_multipath_conf(conf);
311 return err;
312}
313
314static int multipath_remove_disk(mddev_t *mddev, int number)
315{
316 multipath_conf_t *conf = mddev->private;
317 int err = 1;
318 struct multipath_info *p = conf->multipaths + number;
319
320 print_multipath_conf(conf);
321 spin_lock_irq(&conf->device_lock);
322
323 if (p->used_slot) {
324 if (p->operational) {
325 printk(KERN_ERR "hot-remove-disk, slot %d is identified but is still operational!\n", number);
326 err = -EBUSY;
327 goto abort;
328 }
329 p->bdev = NULL;
330 p->used_slot = 0;
331 err = 0;
332 }
333 if (err)
334 MD_BUG();
335abort:
336 spin_unlock_irq(&conf->device_lock);
337
338 print_multipath_conf(conf);
339 return err;
340}
341
342#define IO_ERROR KERN_ALERT \
343"multipath: %s: unrecoverable IO read error for block %lu\n"
344
345#define REDIRECT_SECTOR KERN_ERR \
346"multipath: %s: redirecting sector %lu to another IO path\n"
347
348
349
350
351
352
353
354
355
356static void multipathd (void *data)
357{
358 struct multipath_bh *mp_bh;
359 struct bio *bio;
360 unsigned long flags;
361 mddev_t *mddev;
362 struct block_device *bdev;
363
364 for (;;) {
365 spin_lock_irqsave(&retry_list_lock, flags);
366 mp_bh = multipath_retry_list;
367 if (!mp_bh)
368 break;
369 multipath_retry_list = mp_bh->next_mp;
370 spin_unlock_irqrestore(&retry_list_lock, flags);
371
372 mddev = mp_bh->mddev;
373 bio = &mp_bh->bio;
374 bio->bi_sector = mp_bh->master_bio->bi_sector;
375 bdev = bio->bi_bdev;
376
377 multipath_map (mddev, &bio->bi_bdev);
378 if (bio->bi_bdev == bdev) {
379 printk(IO_ERROR,
380 bdev_partition_name(bio->bi_bdev), bio->bi_sector);
381 multipath_end_bh_io(mp_bh, 0);
382 } else {
383 printk(REDIRECT_SECTOR,
384 bdev_partition_name(bio->bi_bdev), bio->bi_sector);
385 generic_make_request(bio);
386 }
387 }
388 spin_unlock_irqrestore(&retry_list_lock, flags);
389}
390#undef IO_ERROR
391#undef REDIRECT_SECTOR
392
393#define INVALID_LEVEL KERN_WARNING \
394"multipath: md%d: raid level not set to multipath IO (%d)\n"
395
396#define NO_SB KERN_ERR \
397"multipath: disabled IO path %s (couldn't access raid superblock)\n"
398
399#define ERRORS KERN_ERR \
400"multipath: disabled IO path %s (errors detected)\n"
401
402#define NOT_IN_SYNC KERN_ERR \
403"multipath: making IO path %s a spare path (not in sync)\n"
404
405#define INCONSISTENT KERN_ERR \
406"multipath: disabled IO path %s (inconsistent descriptor)\n"
407
408#define ALREADY_RUNNING KERN_ERR \
409"multipath: disabled IO path %s (multipath %d already operational)\n"
410
411#define OPERATIONAL KERN_INFO \
412"multipath: device %s operational as IO path %d\n"
413
414#define MEM_ERROR KERN_ERR \
415"multipath: couldn't allocate memory for md%d\n"
416
417#define SPARE KERN_INFO \
418"multipath: spare IO path %s\n"
419
420#define NONE_OPERATIONAL KERN_ERR \
421"multipath: no operational IO paths for md%d\n"
422
423#define SB_DIFFERENCES KERN_ERR \
424"multipath: detected IO path differences!\n"
425
426#define ARRAY_IS_ACTIVE KERN_INFO \
427"multipath: array md%d active with %d out of %d IO paths\n"
428
429#define THREAD_ERROR KERN_ERR \
430"multipath: couldn't allocate thread for md%d\n"
431
432static int multipath_run (mddev_t *mddev)
433{
434 multipath_conf_t *conf;
435 int disk_idx;
436 struct multipath_info *disk;
437 mdk_rdev_t *rdev;
438 struct list_head *tmp;
439 int num_rdevs = 0;
440
441 MOD_INC_USE_COUNT;
442
443 if (mddev->level != LEVEL_MULTIPATH) {
444 printk(INVALID_LEVEL, mdidx(mddev), mddev->level);
445 goto out;
446 }
447
448
449
450
451
452
453 conf = kmalloc(sizeof(multipath_conf_t), GFP_KERNEL);
454 mddev->private = conf;
455 if (!conf) {
456 printk(MEM_ERROR, mdidx(mddev));
457 goto out;
458 }
459 memset(conf, 0, sizeof(*conf));
460
461 ITERATE_RDEV(mddev,rdev,tmp) {
462 if (rdev->faulty) {
463
464
465 printk(ERRORS, bdev_partition_name(rdev->bdev));
466 continue;
467 } else {
468
469
470 if (!rdev->sb) {
471 MD_BUG();
472 continue;
473 }
474 }
475 if (rdev->desc_nr == -1) {
476 MD_BUG();
477 continue;
478 }
479
480 disk_idx = rdev->raid_disk;
481 disk = conf->multipaths + disk_idx;
482
483
484
485
486
487
488 disk->bdev = rdev->bdev;
489 disk->operational = 1;
490 disk->used_slot = 1;
491 num_rdevs++;
492 }
493
494 conf->raid_disks = mddev->raid_disks = num_rdevs;
495 mddev->sb_dirty = 1;
496 conf->mddev = mddev;
497 conf->device_lock = SPIN_LOCK_UNLOCKED;
498
499 if (!conf->working_disks) {
500 printk(NONE_OPERATIONAL, mdidx(mddev));
501 goto out_free_conf;
502 }
503
504 conf->pool = mempool_create(NR_RESERVED_BUFS,
505 mp_pool_alloc, mp_pool_free,
506 NULL);
507 if (conf->pool == NULL) {
508 printk(MEM_ERROR, mdidx(mddev));
509 goto out_free_conf;
510 }
511
512 {
513 const char * name = "multipathd";
514
515 conf->thread = md_register_thread(multipathd, conf, name);
516 if (!conf->thread) {
517 printk(THREAD_ERROR, mdidx(mddev));
518 goto out_free_conf;
519 }
520 }
521
522 printk(ARRAY_IS_ACTIVE, mdidx(mddev), conf->working_disks,
523 mddev->raid_disks);
524
525
526
527 return 0;
528
529out_free_conf:
530 if (conf->pool)
531 mempool_destroy(conf->pool);
532 kfree(conf);
533 mddev->private = NULL;
534out:
535 MOD_DEC_USE_COUNT;
536 return -EIO;
537}
538
539#undef INVALID_LEVEL
540#undef NO_SB
541#undef ERRORS
542#undef NOT_IN_SYNC
543#undef INCONSISTENT
544#undef ALREADY_RUNNING
545#undef OPERATIONAL
546#undef SPARE
547#undef NONE_OPERATIONAL
548#undef SB_DIFFERENCES
549#undef ARRAY_IS_ACTIVE
550
551static int multipath_stop (mddev_t *mddev)
552{
553 multipath_conf_t *conf = mddev_to_conf(mddev);
554
555 md_unregister_thread(conf->thread);
556 mempool_destroy(conf->pool);
557 kfree(conf);
558 mddev->private = NULL;
559 MOD_DEC_USE_COUNT;
560 return 0;
561}
562
563static mdk_personality_t multipath_personality=
564{
565 .name = "multipath",
566 .make_request = multipath_make_request,
567 .run = multipath_run,
568 .stop = multipath_stop,
569 .status = multipath_status,
570 .error_handler = multipath_error,
571 .hot_add_disk = multipath_add_disk,
572 .hot_remove_disk= multipath_remove_disk,
573};
574
575static int __init multipath_init (void)
576{
577 return register_md_personality (MULTIPATH, &multipath_personality);
578}
579
580static void __exit multipath_exit (void)
581{
582 unregister_md_personality (MULTIPATH);
583}
584
585module_init(multipath_init);
586module_exit(multipath_exit);
587MODULE_LICENSE("GPL");
588