1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/module.h>
36#include <linux/kernel.h>
37#include <linux/kthread.h>
38#include <linux/linkage.h>
39#include <linux/raid/md.h>
40#include <linux/raid/bitmap.h>
41#include <linux/sysctl.h>
42#include <linux/buffer_head.h>
43#include <linux/poll.h>
44#include <linux/mutex.h>
45#include <linux/ctype.h>
46#include <linux/freezer.h>
47
48#include <linux/init.h>
49
50#include <linux/file.h>
51
52#ifdef CONFIG_KMOD
53#include <linux/kmod.h>
54#endif
55
56#include <asm/unaligned.h>
57
58#define MAJOR_NR MD_MAJOR
59#define MD_DRIVER
60
61
62#define MdpMinorShift 6
63
64#define DEBUG 0
65#define dprintk(x...) ((void)(DEBUG && printk(x)))
66
67
68#ifndef MODULE
69static void autostart_arrays (int part);
70#endif
71
72static LIST_HEAD(pers_list);
73static DEFINE_SPINLOCK(pers_lock);
74
75static void md_print_devices(void);
76
77static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
78
79#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94static int sysctl_speed_limit_min = 1000;
95static int sysctl_speed_limit_max = 200000;
96static inline int speed_min(mddev_t *mddev)
97{
98 return mddev->sync_speed_min ?
99 mddev->sync_speed_min : sysctl_speed_limit_min;
100}
101
102static inline int speed_max(mddev_t *mddev)
103{
104 return mddev->sync_speed_max ?
105 mddev->sync_speed_max : sysctl_speed_limit_max;
106}
107
108static struct ctl_table_header *raid_table_header;
109
110static ctl_table raid_table[] = {
111 {
112 .ctl_name = DEV_RAID_SPEED_LIMIT_MIN,
113 .procname = "speed_limit_min",
114 .data = &sysctl_speed_limit_min,
115 .maxlen = sizeof(int),
116 .mode = S_IRUGO|S_IWUSR,
117 .proc_handler = &proc_dointvec,
118 },
119 {
120 .ctl_name = DEV_RAID_SPEED_LIMIT_MAX,
121 .procname = "speed_limit_max",
122 .data = &sysctl_speed_limit_max,
123 .maxlen = sizeof(int),
124 .mode = S_IRUGO|S_IWUSR,
125 .proc_handler = &proc_dointvec,
126 },
127 { .ctl_name = 0 }
128};
129
130static ctl_table raid_dir_table[] = {
131 {
132 .ctl_name = DEV_RAID,
133 .procname = "raid",
134 .maxlen = 0,
135 .mode = S_IRUGO|S_IXUGO,
136 .child = raid_table,
137 },
138 { .ctl_name = 0 }
139};
140
141static ctl_table raid_root_table[] = {
142 {
143 .ctl_name = CTL_DEV,
144 .procname = "dev",
145 .maxlen = 0,
146 .mode = 0555,
147 .child = raid_dir_table,
148 },
149 { .ctl_name = 0 }
150};
151
152static struct block_device_operations md_fops;
153
154static int start_readonly;
155
156
157
158
159
160
161
162
163
164
165
166static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
167static atomic_t md_event_count;
168void md_new_event(mddev_t *mddev)
169{
170 atomic_inc(&md_event_count);
171 wake_up(&md_event_waiters);
172 sysfs_notify(&mddev->kobj, NULL, "sync_action");
173}
174EXPORT_SYMBOL_GPL(md_new_event);
175
176
177
178
179static void md_new_event_inintr(mddev_t *mddev)
180{
181 atomic_inc(&md_event_count);
182 wake_up(&md_event_waiters);
183}
184
185
186
187
188
189static LIST_HEAD(all_mddevs);
190static DEFINE_SPINLOCK(all_mddevs_lock);
191
192
193
194
195
196
197
198
199
200#define for_each_mddev(mddev,tmp) \
201 \
202 for (({ spin_lock(&all_mddevs_lock); \
203 tmp = all_mddevs.next; \
204 mddev = NULL;}); \
205 ({ if (tmp != &all_mddevs) \
206 mddev_get(list_entry(tmp, mddev_t, all_mddevs));\
207 spin_unlock(&all_mddevs_lock); \
208 if (mddev) mddev_put(mddev); \
209 mddev = list_entry(tmp, mddev_t, all_mddevs); \
210 tmp != &all_mddevs;}); \
211 ({ spin_lock(&all_mddevs_lock); \
212 tmp = tmp->next;}) \
213 )
214
215
216static int md_fail_request (struct request_queue *q, struct bio *bio)
217{
218 bio_io_error(bio);
219 return 0;
220}
221
222static inline mddev_t *mddev_get(mddev_t *mddev)
223{
224 atomic_inc(&mddev->active);
225 return mddev;
226}
227
228static void mddev_put(mddev_t *mddev)
229{
230 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
231 return;
232 if (!mddev->raid_disks && list_empty(&mddev->disks)) {
233 list_del(&mddev->all_mddevs);
234 spin_unlock(&all_mddevs_lock);
235 blk_cleanup_queue(mddev->queue);
236 kobject_put(&mddev->kobj);
237 } else
238 spin_unlock(&all_mddevs_lock);
239}
240
241static mddev_t * mddev_find(dev_t unit)
242{
243 mddev_t *mddev, *new = NULL;
244
245 retry:
246 spin_lock(&all_mddevs_lock);
247 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
248 if (mddev->unit == unit) {
249 mddev_get(mddev);
250 spin_unlock(&all_mddevs_lock);
251 kfree(new);
252 return mddev;
253 }
254
255 if (new) {
256 list_add(&new->all_mddevs, &all_mddevs);
257 spin_unlock(&all_mddevs_lock);
258 return new;
259 }
260 spin_unlock(&all_mddevs_lock);
261
262 new = kzalloc(sizeof(*new), GFP_KERNEL);
263 if (!new)
264 return NULL;
265
266 new->unit = unit;
267 if (MAJOR(unit) == MD_MAJOR)
268 new->md_minor = MINOR(unit);
269 else
270 new->md_minor = MINOR(unit) >> MdpMinorShift;
271
272 mutex_init(&new->reconfig_mutex);
273 INIT_LIST_HEAD(&new->disks);
274 INIT_LIST_HEAD(&new->all_mddevs);
275 init_timer(&new->safemode_timer);
276 atomic_set(&new->active, 1);
277 spin_lock_init(&new->write_lock);
278 init_waitqueue_head(&new->sb_wait);
279 init_waitqueue_head(&new->recovery_wait);
280 new->reshape_position = MaxSector;
281 new->resync_max = MaxSector;
282 new->level = LEVEL_NONE;
283
284 new->queue = blk_alloc_queue(GFP_KERNEL);
285 if (!new->queue) {
286 kfree(new);
287 return NULL;
288 }
289
290 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
291
292 blk_queue_make_request(new->queue, md_fail_request);
293
294 goto retry;
295}
296
297static inline int mddev_lock(mddev_t * mddev)
298{
299 return mutex_lock_interruptible(&mddev->reconfig_mutex);
300}
301
302static inline int mddev_trylock(mddev_t * mddev)
303{
304 return mutex_trylock(&mddev->reconfig_mutex);
305}
306
307static inline void mddev_unlock(mddev_t * mddev)
308{
309 mutex_unlock(&mddev->reconfig_mutex);
310
311 md_wakeup_thread(mddev->thread);
312}
313
314static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
315{
316 mdk_rdev_t * rdev;
317 struct list_head *tmp;
318
319 rdev_for_each(rdev, tmp, mddev) {
320 if (rdev->desc_nr == nr)
321 return rdev;
322 }
323 return NULL;
324}
325
326static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
327{
328 struct list_head *tmp;
329 mdk_rdev_t *rdev;
330
331 rdev_for_each(rdev, tmp, mddev) {
332 if (rdev->bdev->bd_dev == dev)
333 return rdev;
334 }
335 return NULL;
336}
337
338static struct mdk_personality *find_pers(int level, char *clevel)
339{
340 struct mdk_personality *pers;
341 list_for_each_entry(pers, &pers_list, list) {
342 if (level != LEVEL_NONE && pers->level == level)
343 return pers;
344 if (strcmp(pers->name, clevel)==0)
345 return pers;
346 }
347 return NULL;
348}
349
350static inline sector_t calc_dev_sboffset(struct block_device *bdev)
351{
352 sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
353 return MD_NEW_SIZE_BLOCKS(size);
354}
355
356static sector_t calc_dev_size(mdk_rdev_t *rdev, unsigned chunk_size)
357{
358 sector_t size;
359
360 size = rdev->sb_offset;
361
362 if (chunk_size)
363 size &= ~((sector_t)chunk_size/1024 - 1);
364 return size;
365}
366
367static int alloc_disk_sb(mdk_rdev_t * rdev)
368{
369 if (rdev->sb_page)
370 MD_BUG();
371
372 rdev->sb_page = alloc_page(GFP_KERNEL);
373 if (!rdev->sb_page) {
374 printk(KERN_ALERT "md: out of memory.\n");
375 return -EINVAL;
376 }
377
378 return 0;
379}
380
381static void free_disk_sb(mdk_rdev_t * rdev)
382{
383 if (rdev->sb_page) {
384 put_page(rdev->sb_page);
385 rdev->sb_loaded = 0;
386 rdev->sb_page = NULL;
387 rdev->sb_offset = 0;
388 rdev->size = 0;
389 }
390}
391
392
393static void super_written(struct bio *bio, int error)
394{
395 mdk_rdev_t *rdev = bio->bi_private;
396 mddev_t *mddev = rdev->mddev;
397
398 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
399 printk("md: super_written gets error=%d, uptodate=%d\n",
400 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
401 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
402 md_error(mddev, rdev);
403 }
404
405 if (atomic_dec_and_test(&mddev->pending_writes))
406 wake_up(&mddev->sb_wait);
407 bio_put(bio);
408}
409
410static void super_written_barrier(struct bio *bio, int error)
411{
412 struct bio *bio2 = bio->bi_private;
413 mdk_rdev_t *rdev = bio2->bi_private;
414 mddev_t *mddev = rdev->mddev;
415
416 if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
417 error == -EOPNOTSUPP) {
418 unsigned long flags;
419
420 set_bit(BarriersNotsupp, &rdev->flags);
421 mddev->barriers_work = 0;
422 spin_lock_irqsave(&mddev->write_lock, flags);
423 bio2->bi_next = mddev->biolist;
424 mddev->biolist = bio2;
425 spin_unlock_irqrestore(&mddev->write_lock, flags);
426 wake_up(&mddev->sb_wait);
427 bio_put(bio);
428 } else {
429 bio_put(bio2);
430 bio->bi_private = rdev;
431 super_written(bio, error);
432 }
433}
434
435void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
436 sector_t sector, int size, struct page *page)
437{
438
439
440
441
442
443
444
445
446
447 struct bio *bio = bio_alloc(GFP_NOIO, 1);
448 int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNC);
449
450 bio->bi_bdev = rdev->bdev;
451 bio->bi_sector = sector;
452 bio_add_page(bio, page, size, 0);
453 bio->bi_private = rdev;
454 bio->bi_end_io = super_written;
455 bio->bi_rw = rw;
456
457 atomic_inc(&mddev->pending_writes);
458 if (!test_bit(BarriersNotsupp, &rdev->flags)) {
459 struct bio *rbio;
460 rw |= (1<<BIO_RW_BARRIER);
461 rbio = bio_clone(bio, GFP_NOIO);
462 rbio->bi_private = bio;
463 rbio->bi_end_io = super_written_barrier;
464 submit_bio(rw, rbio);
465 } else
466 submit_bio(rw, bio);
467}
468
469void md_super_wait(mddev_t *mddev)
470{
471
472
473
474 DEFINE_WAIT(wq);
475 for(;;) {
476 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
477 if (atomic_read(&mddev->pending_writes)==0)
478 break;
479 while (mddev->biolist) {
480 struct bio *bio;
481 spin_lock_irq(&mddev->write_lock);
482 bio = mddev->biolist;
483 mddev->biolist = bio->bi_next ;
484 bio->bi_next = NULL;
485 spin_unlock_irq(&mddev->write_lock);
486 submit_bio(bio->bi_rw, bio);
487 }
488 schedule();
489 }
490 finish_wait(&mddev->sb_wait, &wq);
491}
492
493static void bi_complete(struct bio *bio, int error)
494{
495 complete((struct completion*)bio->bi_private);
496}
497
498int sync_page_io(struct block_device *bdev, sector_t sector, int size,
499 struct page *page, int rw)
500{
501 struct bio *bio = bio_alloc(GFP_NOIO, 1);
502 struct completion event;
503 int ret;
504
505 rw |= (1 << BIO_RW_SYNC);
506
507 bio->bi_bdev = bdev;
508 bio->bi_sector = sector;
509 bio_add_page(bio, page, size, 0);
510 init_completion(&event);
511 bio->bi_private = &event;
512 bio->bi_end_io = bi_complete;
513 submit_bio(rw, bio);
514 wait_for_completion(&event);
515
516 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
517 bio_put(bio);
518 return ret;
519}
520EXPORT_SYMBOL_GPL(sync_page_io);
521
522static int read_disk_sb(mdk_rdev_t * rdev, int size)
523{
524 char b[BDEVNAME_SIZE];
525 if (!rdev->sb_page) {
526 MD_BUG();
527 return -EINVAL;
528 }
529 if (rdev->sb_loaded)
530 return 0;
531
532
533 if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, size, rdev->sb_page, READ))
534 goto fail;
535 rdev->sb_loaded = 1;
536 return 0;
537
538fail:
539 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
540 bdevname(rdev->bdev,b));
541 return -EINVAL;
542}
543
544static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
545{
546 if ( (sb1->set_uuid0 == sb2->set_uuid0) &&
547 (sb1->set_uuid1 == sb2->set_uuid1) &&
548 (sb1->set_uuid2 == sb2->set_uuid2) &&
549 (sb1->set_uuid3 == sb2->set_uuid3))
550
551 return 1;
552
553 return 0;
554}
555
556
557static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
558{
559 int ret;
560 mdp_super_t *tmp1, *tmp2;
561
562 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
563 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
564
565 if (!tmp1 || !tmp2) {
566 ret = 0;
567 printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n");
568 goto abort;
569 }
570
571 *tmp1 = *sb1;
572 *tmp2 = *sb2;
573
574
575
576
577 tmp1->nr_disks = 0;
578 tmp2->nr_disks = 0;
579
580 if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4))
581 ret = 0;
582 else
583 ret = 1;
584
585abort:
586 kfree(tmp1);
587 kfree(tmp2);
588 return ret;
589}
590
591
592static u32 md_csum_fold(u32 csum)
593{
594 csum = (csum & 0xffff) + (csum >> 16);
595 return (csum & 0xffff) + (csum >> 16);
596}
597
598static unsigned int calc_sb_csum(mdp_super_t * sb)
599{
600 u64 newcsum = 0;
601 u32 *sb32 = (u32*)sb;
602 int i;
603 unsigned int disk_csum, csum;
604
605 disk_csum = sb->sb_csum;
606 sb->sb_csum = 0;
607
608 for (i = 0; i < MD_SB_BYTES/4 ; i++)
609 newcsum += sb32[i];
610 csum = (newcsum & 0xffffffff) + (newcsum>>32);
611
612
613#ifdef CONFIG_ALPHA
614
615
616
617
618
619
620
621
622 sb->sb_csum = md_csum_fold(disk_csum);
623#else
624 sb->sb_csum = disk_csum;
625#endif
626 return csum;
627}
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660struct super_type {
661 char *name;
662 struct module *owner;
663 int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version);
664 int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);
665 void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);
666};
667
668
669
670
671static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
672{
673 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
674 mdp_super_t *sb;
675 int ret;
676 sector_t sb_offset;
677
678
679
680
681
682
683
684 sb_offset = calc_dev_sboffset(rdev->bdev);
685 rdev->sb_offset = sb_offset;
686
687 ret = read_disk_sb(rdev, MD_SB_BYTES);
688 if (ret) return ret;
689
690 ret = -EINVAL;
691
692 bdevname(rdev->bdev, b);
693 sb = (mdp_super_t*)page_address(rdev->sb_page);
694
695 if (sb->md_magic != MD_SB_MAGIC) {
696 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
697 b);
698 goto abort;
699 }
700
701 if (sb->major_version != 0 ||
702 sb->minor_version < 90 ||
703 sb->minor_version > 91) {
704 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
705 sb->major_version, sb->minor_version,
706 b);
707 goto abort;
708 }
709
710 if (sb->raid_disks <= 0)
711 goto abort;
712
713 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
714 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
715 b);
716 goto abort;
717 }
718
719 rdev->preferred_minor = sb->md_minor;
720 rdev->data_offset = 0;
721 rdev->sb_size = MD_SB_BYTES;
722
723 if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) {
724 if (sb->level != 1 && sb->level != 4
725 && sb->level != 5 && sb->level != 6
726 && sb->level != 10) {
727
728 printk(KERN_WARNING
729 "md: bitmaps not supported for this level.\n");
730 goto abort;
731 }
732 }
733
734 if (sb->level == LEVEL_MULTIPATH)
735 rdev->desc_nr = -1;
736 else
737 rdev->desc_nr = sb->this_disk.number;
738
739 if (!refdev) {
740 ret = 1;
741 } else {
742 __u64 ev1, ev2;
743 mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
744 if (!uuid_equal(refsb, sb)) {
745 printk(KERN_WARNING "md: %s has different UUID to %s\n",
746 b, bdevname(refdev->bdev,b2));
747 goto abort;
748 }
749 if (!sb_equal(refsb, sb)) {
750 printk(KERN_WARNING "md: %s has same UUID"
751 " but different superblock to %s\n",
752 b, bdevname(refdev->bdev, b2));
753 goto abort;
754 }
755 ev1 = md_event(sb);
756 ev2 = md_event(refsb);
757 if (ev1 > ev2)
758 ret = 1;
759 else
760 ret = 0;
761 }
762 rdev->size = calc_dev_size(rdev, sb->chunk_size);
763
764 if (rdev->size < sb->size && sb->level > 1)
765
766 ret = -EINVAL;
767
768 abort:
769 return ret;
770}
771
772
773
774
775static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
776{
777 mdp_disk_t *desc;
778 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
779 __u64 ev1 = md_event(sb);
780
781 rdev->raid_disk = -1;
782 clear_bit(Faulty, &rdev->flags);
783 clear_bit(In_sync, &rdev->flags);
784 clear_bit(WriteMostly, &rdev->flags);
785 clear_bit(BarriersNotsupp, &rdev->flags);
786
787 if (mddev->raid_disks == 0) {
788 mddev->major_version = 0;
789 mddev->minor_version = sb->minor_version;
790 mddev->patch_version = sb->patch_version;
791 mddev->external = 0;
792 mddev->chunk_size = sb->chunk_size;
793 mddev->ctime = sb->ctime;
794 mddev->utime = sb->utime;
795 mddev->level = sb->level;
796 mddev->clevel[0] = 0;
797 mddev->layout = sb->layout;
798 mddev->raid_disks = sb->raid_disks;
799 mddev->size = sb->size;
800 mddev->events = ev1;
801 mddev->bitmap_offset = 0;
802 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
803
804 if (mddev->minor_version >= 91) {
805 mddev->reshape_position = sb->reshape_position;
806 mddev->delta_disks = sb->delta_disks;
807 mddev->new_level = sb->new_level;
808 mddev->new_layout = sb->new_layout;
809 mddev->new_chunk = sb->new_chunk;
810 } else {
811 mddev->reshape_position = MaxSector;
812 mddev->delta_disks = 0;
813 mddev->new_level = mddev->level;
814 mddev->new_layout = mddev->layout;
815 mddev->new_chunk = mddev->chunk_size;
816 }
817
818 if (sb->state & (1<<MD_SB_CLEAN))
819 mddev->recovery_cp = MaxSector;
820 else {
821 if (sb->events_hi == sb->cp_events_hi &&
822 sb->events_lo == sb->cp_events_lo) {
823 mddev->recovery_cp = sb->recovery_cp;
824 } else
825 mddev->recovery_cp = 0;
826 }
827
828 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
829 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
830 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
831 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
832
833 mddev->max_disks = MD_SB_DISKS;
834
835 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
836 mddev->bitmap_file == NULL)
837 mddev->bitmap_offset = mddev->default_bitmap_offset;
838
839 } else if (mddev->pers == NULL) {
840
841 ++ev1;
842 if (ev1 < mddev->events)
843 return -EINVAL;
844 } else if (mddev->bitmap) {
845
846
847
848 if (ev1 < mddev->bitmap->events_cleared)
849 return 0;
850 } else {
851 if (ev1 < mddev->events)
852
853 return 0;
854 }
855
856 if (mddev->level != LEVEL_MULTIPATH) {
857 desc = sb->disks + rdev->desc_nr;
858
859 if (desc->state & (1<<MD_DISK_FAULTY))
860 set_bit(Faulty, &rdev->flags);
861 else if (desc->state & (1<<MD_DISK_SYNC)
862) {
863 set_bit(In_sync, &rdev->flags);
864 rdev->raid_disk = desc->raid_disk;
865 }
866 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
867 set_bit(WriteMostly, &rdev->flags);
868 } else
869 set_bit(In_sync, &rdev->flags);
870 return 0;
871}
872
873
874
875
876static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
877{
878 mdp_super_t *sb;
879 struct list_head *tmp;
880 mdk_rdev_t *rdev2;
881 int next_spare = mddev->raid_disks;
882
883
884
885
886
887
888
889
890
891
892
893
894 int i;
895 int active=0, working=0,failed=0,spare=0,nr_disks=0;
896
897 rdev->sb_size = MD_SB_BYTES;
898
899 sb = (mdp_super_t*)page_address(rdev->sb_page);
900
901 memset(sb, 0, sizeof(*sb));
902
903 sb->md_magic = MD_SB_MAGIC;
904 sb->major_version = mddev->major_version;
905 sb->patch_version = mddev->patch_version;
906 sb->gvalid_words = 0;
907 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
908 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
909 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
910 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
911
912 sb->ctime = mddev->ctime;
913 sb->level = mddev->level;
914 sb->size = mddev->size;
915 sb->raid_disks = mddev->raid_disks;
916 sb->md_minor = mddev->md_minor;
917 sb->not_persistent = 0;
918 sb->utime = mddev->utime;
919 sb->state = 0;
920 sb->events_hi = (mddev->events>>32);
921 sb->events_lo = (u32)mddev->events;
922
923 if (mddev->reshape_position == MaxSector)
924 sb->minor_version = 90;
925 else {
926 sb->minor_version = 91;
927 sb->reshape_position = mddev->reshape_position;
928 sb->new_level = mddev->new_level;
929 sb->delta_disks = mddev->delta_disks;
930 sb->new_layout = mddev->new_layout;
931 sb->new_chunk = mddev->new_chunk;
932 }
933 mddev->minor_version = sb->minor_version;
934 if (mddev->in_sync)
935 {
936 sb->recovery_cp = mddev->recovery_cp;
937 sb->cp_events_hi = (mddev->events>>32);
938 sb->cp_events_lo = (u32)mddev->events;
939 if (mddev->recovery_cp == MaxSector)
940 sb->state = (1<< MD_SB_CLEAN);
941 } else
942 sb->recovery_cp = 0;
943
944 sb->layout = mddev->layout;
945 sb->chunk_size = mddev->chunk_size;
946
947 if (mddev->bitmap && mddev->bitmap_file == NULL)
948 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
949
950 sb->disks[0].state = (1<<MD_DISK_REMOVED);
951 rdev_for_each(rdev2, tmp, mddev) {
952 mdp_disk_t *d;
953 int desc_nr;
954 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
955 && !test_bit(Faulty, &rdev2->flags))
956 desc_nr = rdev2->raid_disk;
957 else
958 desc_nr = next_spare++;
959 rdev2->desc_nr = desc_nr;
960 d = &sb->disks[rdev2->desc_nr];
961 nr_disks++;
962 d->number = rdev2->desc_nr;
963 d->major = MAJOR(rdev2->bdev->bd_dev);
964 d->minor = MINOR(rdev2->bdev->bd_dev);
965 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
966 && !test_bit(Faulty, &rdev2->flags))
967 d->raid_disk = rdev2->raid_disk;
968 else
969 d->raid_disk = rdev2->desc_nr;
970 if (test_bit(Faulty, &rdev2->flags))
971 d->state = (1<<MD_DISK_FAULTY);
972 else if (test_bit(In_sync, &rdev2->flags)) {
973 d->state = (1<<MD_DISK_ACTIVE);
974 d->state |= (1<<MD_DISK_SYNC);
975 active++;
976 working++;
977 } else {
978 d->state = 0;
979 spare++;
980 working++;
981 }
982 if (test_bit(WriteMostly, &rdev2->flags))
983 d->state |= (1<<MD_DISK_WRITEMOSTLY);
984 }
985
986 for (i=0 ; i < mddev->raid_disks ; i++) {
987 mdp_disk_t *d = &sb->disks[i];
988 if (d->state == 0 && d->number == 0) {
989 d->number = i;
990 d->raid_disk = i;
991 d->state = (1<<MD_DISK_REMOVED);
992 d->state |= (1<<MD_DISK_FAULTY);
993 failed++;
994 }
995 }
996 sb->nr_disks = nr_disks;
997 sb->active_disks = active;
998 sb->working_disks = working;
999 sb->failed_disks = failed;
1000 sb->spare_disks = spare;
1001
1002 sb->this_disk = sb->disks[rdev->desc_nr];
1003 sb->sb_csum = calc_sb_csum(sb);
1004}
1005
1006
1007
1008
1009
1010static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1011{
1012 __le32 disk_csum;
1013 u32 csum;
1014 unsigned long long newcsum;
1015 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1016 __le32 *isuper = (__le32*)sb;
1017 int i;
1018
1019 disk_csum = sb->sb_csum;
1020 sb->sb_csum = 0;
1021 newcsum = 0;
1022 for (i=0; size>=4; size -= 4 )
1023 newcsum += le32_to_cpu(*isuper++);
1024
1025 if (size == 2)
1026 newcsum += le16_to_cpu(*(__le16*) isuper);
1027
1028 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1029 sb->sb_csum = disk_csum;
1030 return cpu_to_le32(csum);
1031}
1032
1033static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1034{
1035 struct mdp_superblock_1 *sb;
1036 int ret;
1037 sector_t sb_offset;
1038 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1039 int bmask;
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049 switch(minor_version) {
1050 case 0:
1051 sb_offset = rdev->bdev->bd_inode->i_size >> 9;
1052 sb_offset -= 8*2;
1053 sb_offset &= ~(sector_t)(4*2-1);
1054
1055 sb_offset /= 2;
1056 break;
1057 case 1:
1058 sb_offset = 0;
1059 break;
1060 case 2:
1061 sb_offset = 4;
1062 break;
1063 default:
1064 return -EINVAL;
1065 }
1066 rdev->sb_offset = sb_offset;
1067
1068
1069
1070
1071 ret = read_disk_sb(rdev, 4096);
1072 if (ret) return ret;
1073
1074
1075 sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1076
1077 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1078 sb->major_version != cpu_to_le32(1) ||
1079 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1080 le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) ||
1081 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1082 return -EINVAL;
1083
1084 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1085 printk("md: invalid superblock checksum on %s\n",
1086 bdevname(rdev->bdev,b));
1087 return -EINVAL;
1088 }
1089 if (le64_to_cpu(sb->data_size) < 10) {
1090 printk("md: data_size too small on %s\n",
1091 bdevname(rdev->bdev,b));
1092 return -EINVAL;
1093 }
1094 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) {
1095 if (sb->level != cpu_to_le32(1) &&
1096 sb->level != cpu_to_le32(4) &&
1097 sb->level != cpu_to_le32(5) &&
1098 sb->level != cpu_to_le32(6) &&
1099 sb->level != cpu_to_le32(10)) {
1100 printk(KERN_WARNING
1101 "md: bitmaps not supported for this level.\n");
1102 return -EINVAL;
1103 }
1104 }
1105
1106 rdev->preferred_minor = 0xffff;
1107 rdev->data_offset = le64_to_cpu(sb->data_offset);
1108 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1109
1110 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1111 bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
1112 if (rdev->sb_size & bmask)
1113 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1114
1115 if (minor_version
1116 && rdev->data_offset < sb_offset + (rdev->sb_size/512))
1117 return -EINVAL;
1118
1119 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1120 rdev->desc_nr = -1;
1121 else
1122 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1123
1124 if (!refdev) {
1125 ret = 1;
1126 } else {
1127 __u64 ev1, ev2;
1128 struct mdp_superblock_1 *refsb =
1129 (struct mdp_superblock_1*)page_address(refdev->sb_page);
1130
1131 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1132 sb->level != refsb->level ||
1133 sb->layout != refsb->layout ||
1134 sb->chunksize != refsb->chunksize) {
1135 printk(KERN_WARNING "md: %s has strangely different"
1136 " superblock to %s\n",
1137 bdevname(rdev->bdev,b),
1138 bdevname(refdev->bdev,b2));
1139 return -EINVAL;
1140 }
1141 ev1 = le64_to_cpu(sb->events);
1142 ev2 = le64_to_cpu(refsb->events);
1143
1144 if (ev1 > ev2)
1145 ret = 1;
1146 else
1147 ret = 0;
1148 }
1149 if (minor_version)
1150 rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2;
1151 else
1152 rdev->size = rdev->sb_offset;
1153 if (rdev->size < le64_to_cpu(sb->data_size)/2)
1154 return -EINVAL;
1155 rdev->size = le64_to_cpu(sb->data_size)/2;
1156 if (le32_to_cpu(sb->chunksize))
1157 rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1);
1158
1159 if (le64_to_cpu(sb->size) > rdev->size*2)
1160 return -EINVAL;
1161 return ret;
1162}
1163
1164static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1165{
1166 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1167 __u64 ev1 = le64_to_cpu(sb->events);
1168
1169 rdev->raid_disk = -1;
1170 clear_bit(Faulty, &rdev->flags);
1171 clear_bit(In_sync, &rdev->flags);
1172 clear_bit(WriteMostly, &rdev->flags);
1173 clear_bit(BarriersNotsupp, &rdev->flags);
1174
1175 if (mddev->raid_disks == 0) {
1176 mddev->major_version = 1;
1177 mddev->patch_version = 0;
1178 mddev->external = 0;
1179 mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
1180 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1181 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1182 mddev->level = le32_to_cpu(sb->level);
1183 mddev->clevel[0] = 0;
1184 mddev->layout = le32_to_cpu(sb->layout);
1185 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1186 mddev->size = le64_to_cpu(sb->size)/2;
1187 mddev->events = ev1;
1188 mddev->bitmap_offset = 0;
1189 mddev->default_bitmap_offset = 1024 >> 9;
1190
1191 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1192 memcpy(mddev->uuid, sb->set_uuid, 16);
1193
1194 mddev->max_disks = (4096-256)/2;
1195
1196 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1197 mddev->bitmap_file == NULL )
1198 mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
1199
1200 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1201 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1202 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1203 mddev->new_level = le32_to_cpu(sb->new_level);
1204 mddev->new_layout = le32_to_cpu(sb->new_layout);
1205 mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9;
1206 } else {
1207 mddev->reshape_position = MaxSector;
1208 mddev->delta_disks = 0;
1209 mddev->new_level = mddev->level;
1210 mddev->new_layout = mddev->layout;
1211 mddev->new_chunk = mddev->chunk_size;
1212 }
1213
1214 } else if (mddev->pers == NULL) {
1215
1216 ++ev1;
1217 if (ev1 < mddev->events)
1218 return -EINVAL;
1219 } else if (mddev->bitmap) {
1220
1221
1222
1223 if (ev1 < mddev->bitmap->events_cleared)
1224 return 0;
1225 } else {
1226 if (ev1 < mddev->events)
1227
1228 return 0;
1229 }
1230 if (mddev->level != LEVEL_MULTIPATH) {
1231 int role;
1232 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1233 switch(role) {
1234 case 0xffff:
1235 break;
1236 case 0xfffe:
1237 set_bit(Faulty, &rdev->flags);
1238 break;
1239 default:
1240 if ((le32_to_cpu(sb->feature_map) &
1241 MD_FEATURE_RECOVERY_OFFSET))
1242 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1243 else
1244 set_bit(In_sync, &rdev->flags);
1245 rdev->raid_disk = role;
1246 break;
1247 }
1248 if (sb->devflags & WriteMostly1)
1249 set_bit(WriteMostly, &rdev->flags);
1250 } else
1251 set_bit(In_sync, &rdev->flags);
1252
1253 return 0;
1254}
1255
1256static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1257{
1258 struct mdp_superblock_1 *sb;
1259 struct list_head *tmp;
1260 mdk_rdev_t *rdev2;
1261 int max_dev, i;
1262
1263
1264 sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1265
1266 sb->feature_map = 0;
1267 sb->pad0 = 0;
1268 sb->recovery_offset = cpu_to_le64(0);
1269 memset(sb->pad1, 0, sizeof(sb->pad1));
1270 memset(sb->pad2, 0, sizeof(sb->pad2));
1271 memset(sb->pad3, 0, sizeof(sb->pad3));
1272
1273 sb->utime = cpu_to_le64((__u64)mddev->utime);
1274 sb->events = cpu_to_le64(mddev->events);
1275 if (mddev->in_sync)
1276 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1277 else
1278 sb->resync_offset = cpu_to_le64(0);
1279
1280 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1281
1282 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1283 sb->size = cpu_to_le64(mddev->size<<1);
1284
1285 if (mddev->bitmap && mddev->bitmap_file == NULL) {
1286 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
1287 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1288 }
1289
1290 if (rdev->raid_disk >= 0 &&
1291 !test_bit(In_sync, &rdev->flags) &&
1292 rdev->recovery_offset > 0) {
1293 sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1294 sb->recovery_offset = cpu_to_le64(rdev->recovery_offset);
1295 }
1296
1297 if (mddev->reshape_position != MaxSector) {
1298 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1299 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1300 sb->new_layout = cpu_to_le32(mddev->new_layout);
1301 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1302 sb->new_level = cpu_to_le32(mddev->new_level);
1303 sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9);
1304 }
1305
1306 max_dev = 0;
1307 rdev_for_each(rdev2, tmp, mddev)
1308 if (rdev2->desc_nr+1 > max_dev)
1309 max_dev = rdev2->desc_nr+1;
1310
1311 if (max_dev > le32_to_cpu(sb->max_dev))
1312 sb->max_dev = cpu_to_le32(max_dev);
1313 for (i=0; i<max_dev;i++)
1314 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1315
1316 rdev_for_each(rdev2, tmp, mddev) {
1317 i = rdev2->desc_nr;
1318 if (test_bit(Faulty, &rdev2->flags))
1319 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1320 else if (test_bit(In_sync, &rdev2->flags))
1321 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1322 else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0)
1323 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1324 else
1325 sb->dev_roles[i] = cpu_to_le16(0xffff);
1326 }
1327
1328 sb->sb_csum = calc_sb_1_csum(sb);
1329}
1330
1331
1332static struct super_type super_types[] = {
1333 [0] = {
1334 .name = "0.90.0",
1335 .owner = THIS_MODULE,
1336 .load_super = super_90_load,
1337 .validate_super = super_90_validate,
1338 .sync_super = super_90_sync,
1339 },
1340 [1] = {
1341 .name = "md-1",
1342 .owner = THIS_MODULE,
1343 .load_super = super_1_load,
1344 .validate_super = super_1_validate,
1345 .sync_super = super_1_sync,
1346 },
1347};
1348
1349static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1350{
1351 struct list_head *tmp, *tmp2;
1352 mdk_rdev_t *rdev, *rdev2;
1353
1354 rdev_for_each(rdev, tmp, mddev1)
1355 rdev_for_each(rdev2, tmp2, mddev2)
1356 if (rdev->bdev->bd_contains ==
1357 rdev2->bdev->bd_contains)
1358 return 1;
1359
1360 return 0;
1361}
1362
1363static LIST_HEAD(pending_raid_disks);
1364
1365static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1366{
1367 char b[BDEVNAME_SIZE];
1368 struct kobject *ko;
1369 char *s;
1370 int err;
1371
1372 if (rdev->mddev) {
1373 MD_BUG();
1374 return -EINVAL;
1375 }
1376
1377
1378 if (find_rdev(mddev, rdev->bdev->bd_dev))
1379 return -EEXIST;
1380
1381
1382 if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {
1383 if (mddev->pers) {
1384
1385
1386
1387
1388 if (mddev->level > 0)
1389 return -ENOSPC;
1390 } else
1391 mddev->size = rdev->size;
1392 }
1393
1394
1395
1396
1397
1398 if (rdev->desc_nr < 0) {
1399 int choice = 0;
1400 if (mddev->pers) choice = mddev->raid_disks;
1401 while (find_rdev_nr(mddev, choice))
1402 choice++;
1403 rdev->desc_nr = choice;
1404 } else {
1405 if (find_rdev_nr(mddev, rdev->desc_nr))
1406 return -EBUSY;
1407 }
1408 bdevname(rdev->bdev,b);
1409 while ( (s=strchr(b, '/')) != NULL)
1410 *s = '!';
1411
1412 rdev->mddev = mddev;
1413 printk(KERN_INFO "md: bind<%s>\n", b);
1414
1415 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
1416 goto fail;
1417
1418 if (rdev->bdev->bd_part)
1419 ko = &rdev->bdev->bd_part->dev.kobj;
1420 else
1421 ko = &rdev->bdev->bd_disk->dev.kobj;
1422 if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) {
1423 kobject_del(&rdev->kobj);
1424 goto fail;
1425 }
1426 list_add(&rdev->same_set, &mddev->disks);
1427 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
1428 return 0;
1429
1430 fail:
1431 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
1432 b, mdname(mddev));
1433 return err;
1434}
1435
1436static void md_delayed_delete(struct work_struct *ws)
1437{
1438 mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
1439 kobject_del(&rdev->kobj);
1440 kobject_put(&rdev->kobj);
1441}
1442
1443static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1444{
1445 char b[BDEVNAME_SIZE];
1446 if (!rdev->mddev) {
1447 MD_BUG();
1448 return;
1449 }
1450 bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
1451 list_del_init(&rdev->same_set);
1452 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
1453 rdev->mddev = NULL;
1454 sysfs_remove_link(&rdev->kobj, "block");
1455
1456
1457
1458
1459 INIT_WORK(&rdev->del_work, md_delayed_delete);
1460 kobject_get(&rdev->kobj);
1461 schedule_work(&rdev->del_work);
1462}
1463
1464
1465
1466
1467
1468
1469static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
1470{
1471 int err = 0;
1472 struct block_device *bdev;
1473 char b[BDEVNAME_SIZE];
1474
1475 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
1476 if (IS_ERR(bdev)) {
1477 printk(KERN_ERR "md: could not open %s.\n",
1478 __bdevname(dev, b));
1479 return PTR_ERR(bdev);
1480 }
1481 err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
1482 if (err) {
1483 printk(KERN_ERR "md: could not bd_claim %s.\n",
1484 bdevname(bdev, b));
1485 blkdev_put(bdev);
1486 return err;
1487 }
1488 if (!shared)
1489 set_bit(AllReserved, &rdev->flags);
1490 rdev->bdev = bdev;
1491 return err;
1492}
1493
1494static void unlock_rdev(mdk_rdev_t *rdev)
1495{
1496 struct block_device *bdev = rdev->bdev;
1497 rdev->bdev = NULL;
1498 if (!bdev)
1499 MD_BUG();
1500 bd_release(bdev);
1501 blkdev_put(bdev);
1502}
1503
1504void md_autodetect_dev(dev_t dev);
1505
1506static void export_rdev(mdk_rdev_t * rdev)
1507{
1508 char b[BDEVNAME_SIZE];
1509 printk(KERN_INFO "md: export_rdev(%s)\n",
1510 bdevname(rdev->bdev,b));
1511 if (rdev->mddev)
1512 MD_BUG();
1513 free_disk_sb(rdev);
1514 list_del_init(&rdev->same_set);
1515#ifndef MODULE
1516 if (test_bit(AutoDetected, &rdev->flags))
1517 md_autodetect_dev(rdev->bdev->bd_dev);
1518#endif
1519 unlock_rdev(rdev);
1520 kobject_put(&rdev->kobj);
1521}
1522
1523static void kick_rdev_from_array(mdk_rdev_t * rdev)
1524{
1525 unbind_rdev_from_array(rdev);
1526 export_rdev(rdev);
1527}
1528
1529static void export_array(mddev_t *mddev)
1530{
1531 struct list_head *tmp;
1532 mdk_rdev_t *rdev;
1533
1534 rdev_for_each(rdev, tmp, mddev) {
1535 if (!rdev->mddev) {
1536 MD_BUG();
1537 continue;
1538 }
1539 kick_rdev_from_array(rdev);
1540 }
1541 if (!list_empty(&mddev->disks))
1542 MD_BUG();
1543 mddev->raid_disks = 0;
1544 mddev->major_version = 0;
1545}
1546
1547static void print_desc(mdp_disk_t *desc)
1548{
1549 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
1550 desc->major,desc->minor,desc->raid_disk,desc->state);
1551}
1552
1553static void print_sb(mdp_super_t *sb)
1554{
1555 int i;
1556
1557 printk(KERN_INFO
1558 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
1559 sb->major_version, sb->minor_version, sb->patch_version,
1560 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
1561 sb->ctime);
1562 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
1563 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
1564 sb->md_minor, sb->layout, sb->chunk_size);
1565 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
1566 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
1567 sb->utime, sb->state, sb->active_disks, sb->working_disks,
1568 sb->failed_disks, sb->spare_disks,
1569 sb->sb_csum, (unsigned long)sb->events_lo);
1570
1571 printk(KERN_INFO);
1572 for (i = 0; i < MD_SB_DISKS; i++) {
1573 mdp_disk_t *desc;
1574
1575 desc = sb->disks + i;
1576 if (desc->number || desc->major || desc->minor ||
1577 desc->raid_disk || (desc->state && (desc->state != 4))) {
1578 printk(" D %2d: ", i);
1579 print_desc(desc);
1580 }
1581 }
1582 printk(KERN_INFO "md: THIS: ");
1583 print_desc(&sb->this_disk);
1584
1585}
1586
1587static void print_rdev(mdk_rdev_t *rdev)
1588{
1589 char b[BDEVNAME_SIZE];
1590 printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n",
1591 bdevname(rdev->bdev,b), (unsigned long long)rdev->size,
1592 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
1593 rdev->desc_nr);
1594 if (rdev->sb_loaded) {
1595 printk(KERN_INFO "md: rdev superblock:\n");
1596 print_sb((mdp_super_t*)page_address(rdev->sb_page));
1597 } else
1598 printk(KERN_INFO "md: no rdev superblock!\n");
1599}
1600
1601static void md_print_devices(void)
1602{
1603 struct list_head *tmp, *tmp2;
1604 mdk_rdev_t *rdev;
1605 mddev_t *mddev;
1606 char b[BDEVNAME_SIZE];
1607
1608 printk("\n");
1609 printk("md: **********************************\n");
1610 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
1611 printk("md: **********************************\n");
1612 for_each_mddev(mddev, tmp) {
1613
1614 if (mddev->bitmap)
1615 bitmap_print_sb(mddev->bitmap);
1616 else
1617 printk("%s: ", mdname(mddev));
1618 rdev_for_each(rdev, tmp2, mddev)
1619 printk("<%s>", bdevname(rdev->bdev,b));
1620 printk("\n");
1621
1622 rdev_for_each(rdev, tmp2, mddev)
1623 print_rdev(rdev);
1624 }
1625 printk("md: **********************************\n");
1626 printk("\n");
1627}
1628
1629
1630static void sync_sbs(mddev_t * mddev, int nospares)
1631{
1632
1633
1634
1635
1636
1637
1638 mdk_rdev_t *rdev;
1639 struct list_head *tmp;
1640
1641 rdev_for_each(rdev, tmp, mddev) {
1642 if (rdev->sb_events == mddev->events ||
1643 (nospares &&
1644 rdev->raid_disk < 0 &&
1645 (rdev->sb_events&1)==0 &&
1646 rdev->sb_events+1 == mddev->events)) {
1647
1648 rdev->sb_loaded = 2;
1649 } else {
1650 super_types[mddev->major_version].
1651 sync_super(mddev, rdev);
1652 rdev->sb_loaded = 1;
1653 }
1654 }
1655}
1656
1657static void md_update_sb(mddev_t * mddev, int force_change)
1658{
1659 struct list_head *tmp;
1660 mdk_rdev_t *rdev;
1661 int sync_req;
1662 int nospares = 0;
1663
1664 if (mddev->external)
1665 return;
1666repeat:
1667 spin_lock_irq(&mddev->write_lock);
1668
1669 set_bit(MD_CHANGE_PENDING, &mddev->flags);
1670 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
1671 force_change = 1;
1672 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
1673
1674
1675
1676
1677 nospares = 1;
1678 if (force_change)
1679 nospares = 0;
1680 if (mddev->degraded)
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690 nospares = 0;
1691
1692 sync_req = mddev->in_sync;
1693 mddev->utime = get_seconds();
1694
1695
1696
1697 if (nospares
1698 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
1699 && (mddev->events & 1)
1700 && mddev->events != 1)
1701 mddev->events--;
1702 else {
1703
1704 mddev->events ++;
1705 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) {
1706
1707 if ((mddev->events&1)==0) {
1708 mddev->events++;
1709 nospares = 0;
1710 }
1711 } else {
1712
1713 if ((mddev->events&1)) {
1714 mddev->events++;
1715 nospares = 0;
1716 }
1717 }
1718 }
1719
1720 if (!mddev->events) {
1721
1722
1723
1724
1725
1726 MD_BUG();
1727 mddev->events --;
1728 }
1729
1730
1731
1732
1733
1734 if (!mddev->persistent) {
1735 if (!mddev->external)
1736 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
1737
1738 spin_unlock_irq(&mddev->write_lock);
1739 wake_up(&mddev->sb_wait);
1740 return;
1741 }
1742 sync_sbs(mddev, nospares);
1743 spin_unlock_irq(&mddev->write_lock);
1744
1745 dprintk(KERN_INFO
1746 "md: updating %s RAID superblock on device (in sync %d)\n",
1747 mdname(mddev),mddev->in_sync);
1748
1749 bitmap_update_sb(mddev->bitmap);
1750 rdev_for_each(rdev, tmp, mddev) {
1751 char b[BDEVNAME_SIZE];
1752 dprintk(KERN_INFO "md: ");
1753 if (rdev->sb_loaded != 1)
1754 continue;
1755 if (test_bit(Faulty, &rdev->flags))
1756 dprintk("(skipping faulty ");
1757
1758 dprintk("%s ", bdevname(rdev->bdev,b));
1759 if (!test_bit(Faulty, &rdev->flags)) {
1760 md_super_write(mddev,rdev,
1761 rdev->sb_offset<<1, rdev->sb_size,
1762 rdev->sb_page);
1763 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
1764 bdevname(rdev->bdev,b),
1765 (unsigned long long)rdev->sb_offset);
1766 rdev->sb_events = mddev->events;
1767
1768 } else
1769 dprintk(")\n");
1770 if (mddev->level == LEVEL_MULTIPATH)
1771
1772 break;
1773 }
1774 md_super_wait(mddev);
1775
1776
1777 spin_lock_irq(&mddev->write_lock);
1778 if (mddev->in_sync != sync_req ||
1779 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
1780
1781 spin_unlock_irq(&mddev->write_lock);
1782 goto repeat;
1783 }
1784 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
1785 spin_unlock_irq(&mddev->write_lock);
1786 wake_up(&mddev->sb_wait);
1787
1788}
1789
1790
1791
1792
1793static int cmd_match(const char *cmd, const char *str)
1794{
1795
1796
1797
1798
1799 while (*cmd && *str && *cmd == *str) {
1800 cmd++;
1801 str++;
1802 }
1803 if (*cmd == '\n')
1804 cmd++;
1805 if (*str || *cmd)
1806 return 0;
1807 return 1;
1808}
1809
1810struct rdev_sysfs_entry {
1811 struct attribute attr;
1812 ssize_t (*show)(mdk_rdev_t *, char *);
1813 ssize_t (*store)(mdk_rdev_t *, const char *, size_t);
1814};
1815
1816static ssize_t
1817state_show(mdk_rdev_t *rdev, char *page)
1818{
1819 char *sep = "";
1820 size_t len = 0;
1821
1822 if (test_bit(Faulty, &rdev->flags)) {
1823 len+= sprintf(page+len, "%sfaulty",sep);
1824 sep = ",";
1825 }
1826 if (test_bit(In_sync, &rdev->flags)) {
1827 len += sprintf(page+len, "%sin_sync",sep);
1828 sep = ",";
1829 }
1830 if (test_bit(WriteMostly, &rdev->flags)) {
1831 len += sprintf(page+len, "%swrite_mostly",sep);
1832 sep = ",";
1833 }
1834 if (test_bit(Blocked, &rdev->flags)) {
1835 len += sprintf(page+len, "%sblocked", sep);
1836 sep = ",";
1837 }
1838 if (!test_bit(Faulty, &rdev->flags) &&
1839 !test_bit(In_sync, &rdev->flags)) {
1840 len += sprintf(page+len, "%sspare", sep);
1841 sep = ",";
1842 }
1843 return len+sprintf(page+len, "\n");
1844}
1845
1846static ssize_t
1847state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1848{
1849
1850
1851
1852
1853
1854
1855
1856
1857 int err = -EINVAL;
1858 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
1859 md_error(rdev->mddev, rdev);
1860 err = 0;
1861 } else if (cmd_match(buf, "remove")) {
1862 if (rdev->raid_disk >= 0)
1863 err = -EBUSY;
1864 else {
1865 mddev_t *mddev = rdev->mddev;
1866 kick_rdev_from_array(rdev);
1867 if (mddev->pers)
1868 md_update_sb(mddev, 1);
1869 md_new_event(mddev);
1870 err = 0;
1871 }
1872 } else if (cmd_match(buf, "writemostly")) {
1873 set_bit(WriteMostly, &rdev->flags);
1874 err = 0;
1875 } else if (cmd_match(buf, "-writemostly")) {
1876 clear_bit(WriteMostly, &rdev->flags);
1877 err = 0;
1878 } else if (cmd_match(buf, "blocked")) {
1879 set_bit(Blocked, &rdev->flags);
1880 err = 0;
1881 } else if (cmd_match(buf, "-blocked")) {
1882 clear_bit(Blocked, &rdev->flags);
1883 wake_up(&rdev->blocked_wait);
1884 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
1885 md_wakeup_thread(rdev->mddev->thread);
1886
1887 err = 0;
1888 }
1889 return err ? err : len;
1890}
1891static struct rdev_sysfs_entry rdev_state =
1892__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
1893
1894static ssize_t
1895errors_show(mdk_rdev_t *rdev, char *page)
1896{
1897 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
1898}
1899
1900static ssize_t
1901errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1902{
1903 char *e;
1904 unsigned long n = simple_strtoul(buf, &e, 10);
1905 if (*buf && (*e == 0 || *e == '\n')) {
1906 atomic_set(&rdev->corrected_errors, n);
1907 return len;
1908 }
1909 return -EINVAL;
1910}
1911static struct rdev_sysfs_entry rdev_errors =
1912__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
1913
1914static ssize_t
1915slot_show(mdk_rdev_t *rdev, char *page)
1916{
1917 if (rdev->raid_disk < 0)
1918 return sprintf(page, "none\n");
1919 else
1920 return sprintf(page, "%d\n", rdev->raid_disk);
1921}
1922
1923static ssize_t
1924slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1925{
1926 char *e;
1927 int err;
1928 char nm[20];
1929 int slot = simple_strtoul(buf, &e, 10);
1930 if (strncmp(buf, "none", 4)==0)
1931 slot = -1;
1932 else if (e==buf || (*e && *e!= '\n'))
1933 return -EINVAL;
1934 if (rdev->mddev->pers) {
1935
1936
1937
1938
1939
1940
1941
1942 if (slot != -1)
1943 return -EBUSY;
1944 if (rdev->raid_disk == -1)
1945 return -EEXIST;
1946
1947 if (rdev->mddev->pers->hot_add_disk == NULL)
1948 return -EINVAL;
1949 err = rdev->mddev->pers->
1950 hot_remove_disk(rdev->mddev, rdev->raid_disk);
1951 if (err)
1952 return err;
1953 sprintf(nm, "rd%d", rdev->raid_disk);
1954 sysfs_remove_link(&rdev->mddev->kobj, nm);
1955 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
1956 md_wakeup_thread(rdev->mddev->thread);
1957 } else {
1958 if (slot >= rdev->mddev->raid_disks)
1959 return -ENOSPC;
1960 rdev->raid_disk = slot;
1961
1962 clear_bit(Faulty, &rdev->flags);
1963 clear_bit(WriteMostly, &rdev->flags);
1964 set_bit(In_sync, &rdev->flags);
1965 }
1966 return len;
1967}
1968
1969
1970static struct rdev_sysfs_entry rdev_slot =
1971__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
1972
1973static ssize_t
1974offset_show(mdk_rdev_t *rdev, char *page)
1975{
1976 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
1977}
1978
1979static ssize_t
1980offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1981{
1982 char *e;
1983 unsigned long long offset = simple_strtoull(buf, &e, 10);
1984 if (e==buf || (*e && *e != '\n'))
1985 return -EINVAL;
1986 if (rdev->mddev->pers)
1987 return -EBUSY;
1988 if (rdev->size && rdev->mddev->external)
1989
1990
1991 return -EBUSY;
1992 rdev->data_offset = offset;
1993 return len;
1994}
1995
1996static struct rdev_sysfs_entry rdev_offset =
1997__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
1998
1999static ssize_t
2000rdev_size_show(mdk_rdev_t *rdev, char *page)
2001{
2002 return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
2003}
2004
2005static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2006{
2007
2008 if (s1+l1 <= s2)
2009 return 0;
2010 if (s2+l2 <= s1)
2011 return 0;
2012 return 1;
2013}
2014
2015static ssize_t
2016rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2017{
2018 char *e;
2019 unsigned long long size = simple_strtoull(buf, &e, 10);
2020 unsigned long long oldsize = rdev->size;
2021 mddev_t *my_mddev = rdev->mddev;
2022
2023 if (e==buf || (*e && *e != '\n'))
2024 return -EINVAL;
2025 if (my_mddev->pers)
2026 return -EBUSY;
2027 rdev->size = size;
2028 if (size > oldsize && rdev->mddev->external) {
2029
2030
2031
2032
2033
2034 mddev_t *mddev;
2035 int overlap = 0;
2036 struct list_head *tmp, *tmp2;
2037
2038 mddev_unlock(my_mddev);
2039 for_each_mddev(mddev, tmp) {
2040 mdk_rdev_t *rdev2;
2041
2042 mddev_lock(mddev);
2043 rdev_for_each(rdev2, tmp2, mddev)
2044 if (test_bit(AllReserved, &rdev2->flags) ||
2045 (rdev->bdev == rdev2->bdev &&
2046 rdev != rdev2 &&
2047 overlaps(rdev->data_offset, rdev->size,
2048 rdev2->data_offset, rdev2->size))) {
2049 overlap = 1;
2050 break;
2051 }
2052 mddev_unlock(mddev);
2053 if (overlap) {
2054 mddev_put(mddev);
2055 break;
2056 }
2057 }
2058 mddev_lock(my_mddev);
2059 if (overlap) {
2060
2061
2062
2063
2064
2065
2066 rdev->size = oldsize;
2067 return -EBUSY;
2068 }
2069 }
2070 if (size < my_mddev->size || my_mddev->size == 0)
2071 my_mddev->size = size;
2072 return len;
2073}
2074
2075static struct rdev_sysfs_entry rdev_size =
2076__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
2077
2078static struct attribute *rdev_default_attrs[] = {
2079 &rdev_state.attr,
2080 &rdev_errors.attr,
2081 &rdev_slot.attr,
2082 &rdev_offset.attr,
2083 &rdev_size.attr,
2084 NULL,
2085};
2086static ssize_t
2087rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
2088{
2089 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2090 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2091 mddev_t *mddev = rdev->mddev;
2092 ssize_t rv;
2093
2094 if (!entry->show)
2095 return -EIO;
2096
2097 rv = mddev ? mddev_lock(mddev) : -EBUSY;
2098 if (!rv) {
2099 if (rdev->mddev == NULL)
2100 rv = -EBUSY;
2101 else
2102 rv = entry->show(rdev, page);
2103 mddev_unlock(mddev);
2104 }
2105 return rv;
2106}
2107
2108static ssize_t
2109rdev_attr_store(struct kobject *kobj, struct attribute *attr,
2110 const char *page, size_t length)
2111{
2112 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2113 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2114 ssize_t rv;
2115 mddev_t *mddev = rdev->mddev;
2116
2117 if (!entry->store)
2118 return -EIO;
2119 if (!capable(CAP_SYS_ADMIN))
2120 return -EACCES;
2121 rv = mddev ? mddev_lock(mddev): -EBUSY;
2122 if (!rv) {
2123 if (rdev->mddev == NULL)
2124 rv = -EBUSY;
2125 else
2126 rv = entry->store(rdev, page, length);
2127 mddev_unlock(mddev);
2128 }
2129 return rv;
2130}
2131
2132static void rdev_free(struct kobject *ko)
2133{
2134 mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
2135 kfree(rdev);
2136}
2137static struct sysfs_ops rdev_sysfs_ops = {
2138 .show = rdev_attr_show,
2139 .store = rdev_attr_store,
2140};
2141static struct kobj_type rdev_ktype = {
2142 .release = rdev_free,
2143 .sysfs_ops = &rdev_sysfs_ops,
2144 .default_attrs = rdev_default_attrs,
2145};
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_minor)
2158{
2159 char b[BDEVNAME_SIZE];
2160 int err;
2161 mdk_rdev_t *rdev;
2162 sector_t size;
2163
2164 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
2165 if (!rdev) {
2166 printk(KERN_ERR "md: could not alloc mem for new device!\n");
2167 return ERR_PTR(-ENOMEM);
2168 }
2169
2170 if ((err = alloc_disk_sb(rdev)))
2171 goto abort_free;
2172
2173 err = lock_rdev(rdev, newdev, super_format == -2);
2174 if (err)
2175 goto abort_free;
2176
2177 kobject_init(&rdev->kobj, &rdev_ktype);
2178
2179 rdev->desc_nr = -1;
2180 rdev->saved_raid_disk = -1;
2181 rdev->raid_disk = -1;
2182 rdev->flags = 0;
2183 rdev->data_offset = 0;
2184 rdev->sb_events = 0;
2185 atomic_set(&rdev->nr_pending, 0);
2186 atomic_set(&rdev->read_errors, 0);
2187 atomic_set(&rdev->corrected_errors, 0);
2188
2189 size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
2190 if (!size) {
2191 printk(KERN_WARNING
2192 "md: %s has zero or unknown size, marking faulty!\n",
2193 bdevname(rdev->bdev,b));
2194 err = -EINVAL;
2195 goto abort_free;
2196 }
2197
2198 if (super_format >= 0) {
2199 err = super_types[super_format].
2200 load_super(rdev, NULL, super_minor);
2201 if (err == -EINVAL) {
2202 printk(KERN_WARNING
2203 "md: %s does not have a valid v%d.%d "
2204 "superblock, not importing!\n",
2205 bdevname(rdev->bdev,b),
2206 super_format, super_minor);
2207 goto abort_free;
2208 }
2209 if (err < 0) {
2210 printk(KERN_WARNING
2211 "md: could not read %s's sb, not importing!\n",
2212 bdevname(rdev->bdev,b));
2213 goto abort_free;
2214 }
2215 }
2216
2217 INIT_LIST_HEAD(&rdev->same_set);
2218 init_waitqueue_head(&rdev->blocked_wait);
2219
2220 return rdev;
2221
2222abort_free:
2223 if (rdev->sb_page) {
2224 if (rdev->bdev)
2225 unlock_rdev(rdev);
2226 free_disk_sb(rdev);
2227 }
2228 kfree(rdev);
2229 return ERR_PTR(err);
2230}
2231
2232
2233
2234
2235
2236
2237static void analyze_sbs(mddev_t * mddev)
2238{
2239 int i;
2240 struct list_head *tmp;
2241 mdk_rdev_t *rdev, *freshest;
2242 char b[BDEVNAME_SIZE];
2243
2244 freshest = NULL;
2245 rdev_for_each(rdev, tmp, mddev)
2246 switch (super_types[mddev->major_version].
2247 load_super(rdev, freshest, mddev->minor_version)) {
2248 case 1:
2249 freshest = rdev;
2250 break;
2251 case 0:
2252 break;
2253 default:
2254 printk( KERN_ERR \
2255 "md: fatal superblock inconsistency in %s"
2256 " -- removing from array\n",
2257 bdevname(rdev->bdev,b));
2258 kick_rdev_from_array(rdev);
2259 }
2260
2261
2262 super_types[mddev->major_version].
2263 validate_super(mddev, freshest);
2264
2265 i = 0;
2266 rdev_for_each(rdev, tmp, mddev) {
2267 if (rdev != freshest)
2268 if (super_types[mddev->major_version].
2269 validate_super(mddev, rdev)) {
2270 printk(KERN_WARNING "md: kicking non-fresh %s"
2271 " from array!\n",
2272 bdevname(rdev->bdev,b));
2273 kick_rdev_from_array(rdev);
2274 continue;
2275 }
2276 if (mddev->level == LEVEL_MULTIPATH) {
2277 rdev->desc_nr = i++;
2278 rdev->raid_disk = rdev->desc_nr;
2279 set_bit(In_sync, &rdev->flags);
2280 } else if (rdev->raid_disk >= mddev->raid_disks) {
2281 rdev->raid_disk = -1;
2282 clear_bit(In_sync, &rdev->flags);
2283 }
2284 }
2285
2286
2287
2288 if (mddev->recovery_cp != MaxSector &&
2289 mddev->level >= 1)
2290 printk(KERN_ERR "md: %s: raid array is not clean"
2291 " -- starting background reconstruction\n",
2292 mdname(mddev));
2293
2294}
2295
2296static ssize_t
2297safe_delay_show(mddev_t *mddev, char *page)
2298{
2299 int msec = (mddev->safemode_delay*1000)/HZ;
2300 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
2301}
2302static ssize_t
2303safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
2304{
2305 int scale=1;
2306 int dot=0;
2307 int i;
2308 unsigned long msec;
2309 char buf[30];
2310 char *e;
2311
2312 if (len >= sizeof(buf))
2313 return -EINVAL;
2314 strlcpy(buf, cbuf, len);
2315 buf[len] = 0;
2316 for (i=0; i<len; i++) {
2317 if (dot) {
2318 if (isdigit(buf[i])) {
2319 buf[i-1] = buf[i];
2320 scale *= 10;
2321 }
2322 buf[i] = 0;
2323 } else if (buf[i] == '.') {
2324 dot=1;
2325 buf[i] = 0;
2326 }
2327 }
2328 msec = simple_strtoul(buf, &e, 10);
2329 if (e == buf || (*e && *e != '\n'))
2330 return -EINVAL;
2331 msec = (msec * 1000) / scale;
2332 if (msec == 0)
2333 mddev->safemode_delay = 0;
2334 else {
2335 mddev->safemode_delay = (msec*HZ)/1000;
2336 if (mddev->safemode_delay == 0)
2337 mddev->safemode_delay = 1;
2338 }
2339 return len;
2340}
2341static struct md_sysfs_entry md_safe_delay =
2342__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
2343
2344static ssize_t
2345level_show(mddev_t *mddev, char *page)
2346{
2347 struct mdk_personality *p = mddev->pers;
2348 if (p)
2349 return sprintf(page, "%s\n", p->name);
2350 else if (mddev->clevel[0])
2351 return sprintf(page, "%s\n", mddev->clevel);
2352 else if (mddev->level != LEVEL_NONE)
2353 return sprintf(page, "%d\n", mddev->level);
2354 else
2355 return 0;
2356}
2357
2358static ssize_t
2359level_store(mddev_t *mddev, const char *buf, size_t len)
2360{
2361 ssize_t rv = len;
2362 if (mddev->pers)
2363 return -EBUSY;
2364 if (len == 0)
2365 return 0;
2366 if (len >= sizeof(mddev->clevel))
2367 return -ENOSPC;
2368 strncpy(mddev->clevel, buf, len);
2369 if (mddev->clevel[len-1] == '\n')
2370 len--;
2371 mddev->clevel[len] = 0;
2372 mddev->level = LEVEL_NONE;
2373 return rv;
2374}
2375
2376static struct md_sysfs_entry md_level =
2377__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
2378
2379
2380static ssize_t
2381layout_show(mddev_t *mddev, char *page)
2382{
2383
2384 if (mddev->reshape_position != MaxSector &&
2385 mddev->layout != mddev->new_layout)
2386 return sprintf(page, "%d (%d)\n",
2387 mddev->new_layout, mddev->layout);
2388 return sprintf(page, "%d\n", mddev->layout);
2389}
2390
2391static ssize_t
2392layout_store(mddev_t *mddev, const char *buf, size_t len)
2393{
2394 char *e;
2395 unsigned long n = simple_strtoul(buf, &e, 10);
2396
2397 if (!*buf || (*e && *e != '\n'))
2398 return -EINVAL;
2399
2400 if (mddev->pers)
2401 return -EBUSY;
2402 if (mddev->reshape_position != MaxSector)
2403 mddev->new_layout = n;
2404 else
2405 mddev->layout = n;
2406 return len;
2407}
2408static struct md_sysfs_entry md_layout =
2409__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
2410
2411
2412static ssize_t
2413raid_disks_show(mddev_t *mddev, char *page)
2414{
2415 if (mddev->raid_disks == 0)
2416 return 0;
2417 if (mddev->reshape_position != MaxSector &&
2418 mddev->delta_disks != 0)
2419 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
2420 mddev->raid_disks - mddev->delta_disks);
2421 return sprintf(page, "%d\n", mddev->raid_disks);
2422}
2423
2424static int update_raid_disks(mddev_t *mddev, int raid_disks);
2425
2426static ssize_t
2427raid_disks_store(mddev_t *mddev, const char *buf, size_t len)
2428{
2429 char *e;
2430 int rv = 0;
2431 unsigned long n = simple_strtoul(buf, &e, 10);
2432
2433 if (!*buf || (*e && *e != '\n'))
2434 return -EINVAL;
2435
2436 if (mddev->pers)
2437 rv = update_raid_disks(mddev, n);
2438 else if (mddev->reshape_position != MaxSector) {
2439 int olddisks = mddev->raid_disks - mddev->delta_disks;
2440 mddev->delta_disks = n - olddisks;
2441 mddev->raid_disks = n;
2442 } else
2443 mddev->raid_disks = n;
2444 return rv ? rv : len;
2445}
2446static struct md_sysfs_entry md_raid_disks =
2447__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
2448
2449static ssize_t
2450chunk_size_show(mddev_t *mddev, char *page)
2451{
2452 if (mddev->reshape_position != MaxSector &&
2453 mddev->chunk_size != mddev->new_chunk)
2454 return sprintf(page, "%d (%d)\n", mddev->new_chunk,
2455 mddev->chunk_size);
2456 return sprintf(page, "%d\n", mddev->chunk_size);
2457}
2458
2459static ssize_t
2460chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
2461{
2462
2463 char *e;
2464 unsigned long n = simple_strtoul(buf, &e, 10);
2465
2466 if (!*buf || (*e && *e != '\n'))
2467 return -EINVAL;
2468
2469 if (mddev->pers)
2470 return -EBUSY;
2471 else if (mddev->reshape_position != MaxSector)
2472 mddev->new_chunk = n;
2473 else
2474 mddev->chunk_size = n;
2475 return len;
2476}
2477static struct md_sysfs_entry md_chunk_size =
2478__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
2479
2480static ssize_t
2481resync_start_show(mddev_t *mddev, char *page)
2482{
2483 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
2484}
2485
2486static ssize_t
2487resync_start_store(mddev_t *mddev, const char *buf, size_t len)
2488{
2489 char *e;
2490 unsigned long long n = simple_strtoull(buf, &e, 10);
2491
2492 if (mddev->pers)
2493 return -EBUSY;
2494 if (!*buf || (*e && *e != '\n'))
2495 return -EINVAL;
2496
2497 mddev->recovery_cp = n;
2498 return len;
2499}
2500static struct md_sysfs_entry md_resync_start =
2501__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
2540 write_pending, active_idle, bad_word};
2541static char *array_states[] = {
2542 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
2543 "write-pending", "active-idle", NULL };
2544
2545static int match_word(const char *word, char **list)
2546{
2547 int n;
2548 for (n=0; list[n]; n++)
2549 if (cmd_match(word, list[n]))
2550 break;
2551 return n;
2552}
2553
2554static ssize_t
2555array_state_show(mddev_t *mddev, char *page)
2556{
2557 enum array_state st = inactive;
2558
2559 if (mddev->pers)
2560 switch(mddev->ro) {
2561 case 1:
2562 st = readonly;
2563 break;
2564 case 2:
2565 st = read_auto;
2566 break;
2567 case 0:
2568 if (mddev->in_sync)
2569 st = clean;
2570 else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
2571 st = write_pending;
2572 else if (mddev->safemode)
2573 st = active_idle;
2574 else
2575 st = active;
2576 }
2577 else {
2578 if (list_empty(&mddev->disks) &&
2579 mddev->raid_disks == 0 &&
2580 mddev->size == 0)
2581 st = clear;
2582 else
2583 st = inactive;
2584 }
2585 return sprintf(page, "%s\n", array_states[st]);
2586}
2587
2588static int do_md_stop(mddev_t * mddev, int ro);
2589static int do_md_run(mddev_t * mddev);
2590static int restart_array(mddev_t *mddev);
2591
2592static ssize_t
2593array_state_store(mddev_t *mddev, const char *buf, size_t len)
2594{
2595 int err = -EINVAL;
2596 enum array_state st = match_word(buf, array_states);
2597 switch(st) {
2598 case bad_word:
2599 break;
2600 case clear:
2601
2602 if (atomic_read(&mddev->active) > 1)
2603 return -EBUSY;
2604 err = do_md_stop(mddev, 0);
2605 break;
2606 case inactive:
2607
2608 if (mddev->pers) {
2609 if (atomic_read(&mddev->active) > 1)
2610 return -EBUSY;
2611 err = do_md_stop(mddev, 2);
2612 } else
2613 err = 0;
2614 break;
2615 case suspended:
2616 break;
2617 case readonly:
2618 if (mddev->pers)
2619 err = do_md_stop(mddev, 1);
2620 else {
2621 mddev->ro = 1;
2622 set_disk_ro(mddev->gendisk, 1);
2623 err = do_md_run(mddev);
2624 }
2625 break;
2626 case read_auto:
2627 if (mddev->pers) {
2628 if (mddev->ro != 1)
2629 err = do_md_stop(mddev, 1);
2630 else
2631 err = restart_array(mddev);
2632 if (err == 0) {
2633 mddev->ro = 2;
2634 set_disk_ro(mddev->gendisk, 0);
2635 }
2636 } else {
2637 mddev->ro = 2;
2638 err = do_md_run(mddev);
2639 }
2640 break;
2641 case clean:
2642 if (mddev->pers) {
2643 restart_array(mddev);
2644 spin_lock_irq(&mddev->write_lock);
2645 if (atomic_read(&mddev->writes_pending) == 0) {
2646 if (mddev->in_sync == 0) {
2647 mddev->in_sync = 1;
2648 if (mddev->safemode == 1)
2649 mddev->safemode = 0;
2650 if (mddev->persistent)
2651 set_bit(MD_CHANGE_CLEAN,
2652 &mddev->flags);
2653 }
2654 err = 0;
2655 } else
2656 err = -EBUSY;
2657 spin_unlock_irq(&mddev->write_lock);
2658 } else {
2659 mddev->ro = 0;
2660 mddev->recovery_cp = MaxSector;
2661 err = do_md_run(mddev);
2662 }
2663 break;
2664 case active:
2665 if (mddev->pers) {
2666 restart_array(mddev);
2667 if (mddev->external)
2668 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2669 wake_up(&mddev->sb_wait);
2670 err = 0;
2671 } else {
2672 mddev->ro = 0;
2673 set_disk_ro(mddev->gendisk, 0);
2674 err = do_md_run(mddev);
2675 }
2676 break;
2677 case write_pending:
2678 case active_idle:
2679
2680 break;
2681 }
2682 if (err)
2683 return err;
2684 else
2685 return len;
2686}
2687static struct md_sysfs_entry md_array_state =
2688__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
2689
2690static ssize_t
2691null_show(mddev_t *mddev, char *page)
2692{
2693 return -EINVAL;
2694}
2695
2696static ssize_t
2697new_dev_store(mddev_t *mddev, const char *buf, size_t len)
2698{
2699
2700
2701
2702
2703
2704
2705
2706 char *e;
2707 int major = simple_strtoul(buf, &e, 10);
2708 int minor;
2709 dev_t dev;
2710 mdk_rdev_t *rdev;
2711 int err;
2712
2713 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
2714 return -EINVAL;
2715 minor = simple_strtoul(e+1, &e, 10);
2716 if (*e && *e != '\n')
2717 return -EINVAL;
2718 dev = MKDEV(major, minor);
2719 if (major != MAJOR(dev) ||
2720 minor != MINOR(dev))
2721 return -EOVERFLOW;
2722
2723
2724 if (mddev->persistent) {
2725 rdev = md_import_device(dev, mddev->major_version,
2726 mddev->minor_version);
2727 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
2728 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
2729 mdk_rdev_t, same_set);
2730 err = super_types[mddev->major_version]
2731 .load_super(rdev, rdev0, mddev->minor_version);
2732 if (err < 0)
2733 goto out;
2734 }
2735 } else if (mddev->external)
2736 rdev = md_import_device(dev, -2, -1);
2737 else
2738 rdev = md_import_device(dev, -1, -1);
2739
2740 if (IS_ERR(rdev))
2741 return PTR_ERR(rdev);
2742 err = bind_rdev_to_array(rdev, mddev);
2743 out:
2744 if (err)
2745 export_rdev(rdev);
2746 return err ? err : len;
2747}
2748
2749static struct md_sysfs_entry md_new_device =
2750__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
2751
2752static ssize_t
2753bitmap_store(mddev_t *mddev, const char *buf, size_t len)
2754{
2755 char *end;
2756 unsigned long chunk, end_chunk;
2757
2758 if (!mddev->bitmap)
2759 goto out;
2760
2761 while (*buf) {
2762 chunk = end_chunk = simple_strtoul(buf, &end, 0);
2763 if (buf == end) break;
2764 if (*end == '-') {
2765 buf = end + 1;
2766 end_chunk = simple_strtoul(buf, &end, 0);
2767 if (buf == end) break;
2768 }
2769 if (*end && !isspace(*end)) break;
2770 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
2771 buf = end;
2772 while (isspace(*buf)) buf++;
2773 }
2774 bitmap_unplug(mddev->bitmap);
2775out:
2776 return len;
2777}
2778
2779static struct md_sysfs_entry md_bitmap =
2780__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
2781
2782static ssize_t
2783size_show(mddev_t *mddev, char *page)
2784{
2785 return sprintf(page, "%llu\n", (unsigned long long)mddev->size);
2786}
2787
2788static int update_size(mddev_t *mddev, unsigned long size);
2789
2790static ssize_t
2791size_store(mddev_t *mddev, const char *buf, size_t len)
2792{
2793
2794
2795
2796
2797 char *e;
2798 int err = 0;
2799 unsigned long long size = simple_strtoull(buf, &e, 10);
2800 if (!*buf || *buf == '\n' ||
2801 (*e && *e != '\n'))
2802 return -EINVAL;
2803
2804 if (mddev->pers) {
2805 err = update_size(mddev, size);
2806 md_update_sb(mddev, 1);
2807 } else {
2808 if (mddev->size == 0 ||
2809 mddev->size > size)
2810 mddev->size = size;
2811 else
2812 err = -ENOSPC;
2813 }
2814 return err ? err : len;
2815}
2816
2817static struct md_sysfs_entry md_size =
2818__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
2819
2820
2821
2822
2823
2824
2825
2826
2827static ssize_t
2828metadata_show(mddev_t *mddev, char *page)
2829{
2830 if (mddev->persistent)
2831 return sprintf(page, "%d.%d\n",
2832 mddev->major_version, mddev->minor_version);
2833 else if (mddev->external)
2834 return sprintf(page, "external:%s\n", mddev->metadata_type);
2835 else
2836 return sprintf(page, "none\n");
2837}
2838
2839static ssize_t
2840metadata_store(mddev_t *mddev, const char *buf, size_t len)
2841{
2842 int major, minor;
2843 char *e;
2844 if (!list_empty(&mddev->disks))
2845 return -EBUSY;
2846
2847 if (cmd_match(buf, "none")) {
2848 mddev->persistent = 0;
2849 mddev->external = 0;
2850 mddev->major_version = 0;
2851 mddev->minor_version = 90;
2852 return len;
2853 }
2854 if (strncmp(buf, "external:", 9) == 0) {
2855 size_t namelen = len-9;
2856 if (namelen >= sizeof(mddev->metadata_type))
2857 namelen = sizeof(mddev->metadata_type)-1;
2858 strncpy(mddev->metadata_type, buf+9, namelen);
2859 mddev->metadata_type[namelen] = 0;
2860 if (namelen && mddev->metadata_type[namelen-1] == '\n')
2861 mddev->metadata_type[--namelen] = 0;
2862 mddev->persistent = 0;
2863 mddev->external = 1;
2864 mddev->major_version = 0;
2865 mddev->minor_version = 90;
2866 return len;
2867 }
2868 major = simple_strtoul(buf, &e, 10);
2869 if (e==buf || *e != '.')
2870 return -EINVAL;
2871 buf = e+1;
2872 minor = simple_strtoul(buf, &e, 10);
2873 if (e==buf || (*e && *e != '\n') )
2874 return -EINVAL;
2875 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
2876 return -ENOENT;
2877 mddev->major_version = major;
2878 mddev->minor_version = minor;
2879 mddev->persistent = 1;
2880 mddev->external = 0;
2881 return len;
2882}
2883
2884static struct md_sysfs_entry md_metadata =
2885__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2886
2887static ssize_t
2888action_show(mddev_t *mddev, char *page)
2889{
2890 char *type = "idle";
2891 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
2892 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
2893 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
2894 type = "reshape";
2895 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
2896 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
2897 type = "resync";
2898 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
2899 type = "check";
2900 else
2901 type = "repair";
2902 } else
2903 type = "recover";
2904 }
2905 return sprintf(page, "%s\n", type);
2906}
2907
2908static ssize_t
2909action_store(mddev_t *mddev, const char *page, size_t len)
2910{
2911 if (!mddev->pers || !mddev->pers->sync_request)
2912 return -EINVAL;
2913
2914 if (cmd_match(page, "idle")) {
2915 if (mddev->sync_thread) {
2916 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
2917 md_unregister_thread(mddev->sync_thread);
2918 mddev->sync_thread = NULL;
2919 mddev->recovery = 0;
2920 }
2921 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
2922 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
2923 return -EBUSY;
2924 else if (cmd_match(page, "resync") || cmd_match(page, "recover"))
2925 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2926 else if (cmd_match(page, "reshape")) {
2927 int err;
2928 if (mddev->pers->start_reshape == NULL)
2929 return -EINVAL;
2930 err = mddev->pers->start_reshape(mddev);
2931 if (err)
2932 return err;
2933 } else {
2934 if (cmd_match(page, "check"))
2935 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
2936 else if (!cmd_match(page, "repair"))
2937 return -EINVAL;
2938 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
2939 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
2940 }
2941 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2942 md_wakeup_thread(mddev->thread);
2943 return len;
2944}
2945
2946static ssize_t
2947mismatch_cnt_show(mddev_t *mddev, char *page)
2948{
2949 return sprintf(page, "%llu\n",
2950 (unsigned long long) mddev->resync_mismatches);
2951}
2952
2953static struct md_sysfs_entry md_scan_mode =
2954__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
2955
2956
2957static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
2958
2959static ssize_t
2960sync_min_show(mddev_t *mddev, char *page)
2961{
2962 return sprintf(page, "%d (%s)\n", speed_min(mddev),
2963 mddev->sync_speed_min ? "local": "system");
2964}
2965
2966static ssize_t
2967sync_min_store(mddev_t *mddev, const char *buf, size_t len)
2968{
2969 int min;
2970 char *e;
2971 if (strncmp(buf, "system", 6)==0) {
2972 mddev->sync_speed_min = 0;
2973 return len;
2974 }
2975 min = simple_strtoul(buf, &e, 10);
2976 if (buf == e || (*e && *e != '\n') || min <= 0)
2977 return -EINVAL;
2978 mddev->sync_speed_min = min;
2979 return len;
2980}
2981
2982static struct md_sysfs_entry md_sync_min =
2983__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
2984
2985static ssize_t
2986sync_max_show(mddev_t *mddev, char *page)
2987{
2988 return sprintf(page, "%d (%s)\n", speed_max(mddev),
2989 mddev->sync_speed_max ? "local": "system");
2990}
2991
2992static ssize_t
2993sync_max_store(mddev_t *mddev, const char *buf, size_t len)
2994{
2995 int max;
2996 char *e;
2997 if (strncmp(buf, "system", 6)==0) {
2998 mddev->sync_speed_max = 0;
2999 return len;
3000 }
3001 max = simple_strtoul(buf, &e, 10);
3002 if (buf == e || (*e && *e != '\n') || max <= 0)
3003 return -EINVAL;
3004 mddev->sync_speed_max = max;
3005 return len;
3006}
3007
3008static struct md_sysfs_entry md_sync_max =
3009__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
3010
3011static ssize_t
3012degraded_show(mddev_t *mddev, char *page)
3013{
3014 return sprintf(page, "%d\n", mddev->degraded);
3015}
3016static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
3017
3018static ssize_t
3019sync_force_parallel_show(mddev_t *mddev, char *page)
3020{
3021 return sprintf(page, "%d\n", mddev->parallel_resync);
3022}
3023
3024static ssize_t
3025sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
3026{
3027 long n;
3028
3029 if (strict_strtol(buf, 10, &n))
3030 return -EINVAL;
3031
3032 if (n != 0 && n != 1)
3033 return -EINVAL;
3034
3035 mddev->parallel_resync = n;
3036
3037 if (mddev->sync_thread)
3038 wake_up(&resync_wait);
3039
3040 return len;
3041}
3042
3043
3044static struct md_sysfs_entry md_sync_force_parallel =
3045__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
3046 sync_force_parallel_show, sync_force_parallel_store);
3047
3048static ssize_t
3049sync_speed_show(mddev_t *mddev, char *page)
3050{
3051 unsigned long resync, dt, db;
3052 resync = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active));
3053 dt = ((jiffies - mddev->resync_mark) / HZ);
3054 if (!dt) dt++;
3055 db = resync - (mddev->resync_mark_cnt);
3056 return sprintf(page, "%ld\n", db/dt/2);
3057}
3058
3059static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
3060
3061static ssize_t
3062sync_completed_show(mddev_t *mddev, char *page)
3063{
3064 unsigned long max_blocks, resync;
3065
3066 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
3067 max_blocks = mddev->resync_max_sectors;
3068 else
3069 max_blocks = mddev->size << 1;
3070
3071 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
3072 return sprintf(page, "%lu / %lu\n", resync, max_blocks);
3073}
3074
3075static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
3076
3077static ssize_t
3078max_sync_show(mddev_t *mddev, char *page)
3079{
3080 if (mddev->resync_max == MaxSector)
3081 return sprintf(page, "max\n");
3082 else
3083 return sprintf(page, "%llu\n",
3084 (unsigned long long)mddev->resync_max);
3085}
3086static ssize_t
3087max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3088{
3089 if (strncmp(buf, "max", 3) == 0)
3090 mddev->resync_max = MaxSector;
3091 else {
3092 char *ep;
3093 unsigned long long max = simple_strtoull(buf, &ep, 10);
3094 if (ep == buf || (*ep != 0 && *ep != '\n'))
3095 return -EINVAL;
3096 if (max < mddev->resync_max &&
3097 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3098 return -EBUSY;
3099
3100
3101 if (mddev->chunk_size) {
3102 if (max & (sector_t)((mddev->chunk_size>>9)-1))
3103 return -EINVAL;
3104 }
3105 mddev->resync_max = max;
3106 }
3107 wake_up(&mddev->recovery_wait);
3108 return len;
3109}
3110
3111static struct md_sysfs_entry md_max_sync =
3112__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
3113
3114static ssize_t
3115suspend_lo_show(mddev_t *mddev, char *page)
3116{
3117 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
3118}
3119
3120static ssize_t
3121suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
3122{
3123 char *e;
3124 unsigned long long new = simple_strtoull(buf, &e, 10);
3125
3126 if (mddev->pers->quiesce == NULL)
3127 return -EINVAL;
3128 if (buf == e || (*e && *e != '\n'))
3129 return -EINVAL;
3130 if (new >= mddev->suspend_hi ||
3131 (new > mddev->suspend_lo && new < mddev->suspend_hi)) {
3132 mddev->suspend_lo = new;
3133 mddev->pers->quiesce(mddev, 2);
3134 return len;
3135 } else
3136 return -EINVAL;
3137}
3138static struct md_sysfs_entry md_suspend_lo =
3139__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
3140
3141
3142static ssize_t
3143suspend_hi_show(mddev_t *mddev, char *page)
3144{
3145 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
3146}
3147
3148static ssize_t
3149suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
3150{
3151 char *e;
3152 unsigned long long new = simple_strtoull(buf, &e, 10);
3153
3154 if (mddev->pers->quiesce == NULL)
3155 return -EINVAL;
3156 if (buf == e || (*e && *e != '\n'))
3157 return -EINVAL;
3158 if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) ||
3159 (new > mddev->suspend_lo && new > mddev->suspend_hi)) {
3160 mddev->suspend_hi = new;
3161 mddev->pers->quiesce(mddev, 1);
3162 mddev->pers->quiesce(mddev, 0);
3163 return len;
3164 } else
3165 return -EINVAL;
3166}
3167static struct md_sysfs_entry md_suspend_hi =
3168__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
3169
3170static ssize_t
3171reshape_position_show(mddev_t *mddev, char *page)
3172{
3173 if (mddev->reshape_position != MaxSector)
3174 return sprintf(page, "%llu\n",
3175 (unsigned long long)mddev->reshape_position);
3176 strcpy(page, "none\n");
3177 return 5;
3178}
3179
3180static ssize_t
3181reshape_position_store(mddev_t *mddev, const char *buf, size_t len)
3182{
3183 char *e;
3184 unsigned long long new = simple_strtoull(buf, &e, 10);
3185 if (mddev->pers)
3186 return -EBUSY;
3187 if (buf == e || (*e && *e != '\n'))
3188 return -EINVAL;
3189 mddev->reshape_position = new;
3190 mddev->delta_disks = 0;
3191 mddev->new_level = mddev->level;
3192 mddev->new_layout = mddev->layout;
3193 mddev->new_chunk = mddev->chunk_size;
3194 return len;
3195}
3196
3197static struct md_sysfs_entry md_reshape_position =
3198__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
3199 reshape_position_store);
3200
3201
3202static struct attribute *md_default_attrs[] = {
3203 &md_level.attr,
3204 &md_layout.attr,
3205 &md_raid_disks.attr,
3206 &md_chunk_size.attr,
3207 &md_size.attr,
3208 &md_resync_start.attr,
3209 &md_metadata.attr,
3210 &md_new_device.attr,
3211 &md_safe_delay.attr,
3212 &md_array_state.attr,
3213 &md_reshape_position.attr,
3214 NULL,
3215};
3216
3217static struct attribute *md_redundancy_attrs[] = {
3218 &md_scan_mode.attr,
3219 &md_mismatches.attr,
3220 &md_sync_min.attr,
3221 &md_sync_max.attr,
3222 &md_sync_speed.attr,
3223 &md_sync_force_parallel.attr,
3224 &md_sync_completed.attr,
3225 &md_max_sync.attr,
3226 &md_suspend_lo.attr,
3227 &md_suspend_hi.attr,
3228 &md_bitmap.attr,
3229 &md_degraded.attr,
3230 NULL,
3231};
3232static struct attribute_group md_redundancy_group = {
3233 .name = NULL,
3234 .attrs = md_redundancy_attrs,
3235};
3236
3237
3238static ssize_t
3239md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3240{
3241 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
3242 mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
3243 ssize_t rv;
3244
3245 if (!entry->show)
3246 return -EIO;
3247 rv = mddev_lock(mddev);
3248 if (!rv) {
3249 rv = entry->show(mddev, page);
3250 mddev_unlock(mddev);
3251 }
3252 return rv;
3253}
3254
3255static ssize_t
3256md_attr_store(struct kobject *kobj, struct attribute *attr,
3257 const char *page, size_t length)
3258{
3259 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
3260 mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
3261 ssize_t rv;
3262
3263 if (!entry->store)
3264 return -EIO;
3265 if (!capable(CAP_SYS_ADMIN))
3266 return -EACCES;
3267 rv = mddev_lock(mddev);
3268 if (!rv) {
3269 rv = entry->store(mddev, page, length);
3270 mddev_unlock(mddev);
3271 }
3272 return rv;
3273}
3274
3275static void md_free(struct kobject *ko)
3276{
3277 mddev_t *mddev = container_of(ko, mddev_t, kobj);
3278 kfree(mddev);
3279}
3280
3281static struct sysfs_ops md_sysfs_ops = {
3282 .show = md_attr_show,
3283 .store = md_attr_store,
3284};
3285static struct kobj_type md_ktype = {
3286 .release = md_free,
3287 .sysfs_ops = &md_sysfs_ops,
3288 .default_attrs = md_default_attrs,
3289};
3290
3291int mdp_major = 0;
3292
3293static struct kobject *md_probe(dev_t dev, int *part, void *data)
3294{
3295 static DEFINE_MUTEX(disks_mutex);
3296 mddev_t *mddev = mddev_find(dev);
3297 struct gendisk *disk;
3298 int partitioned = (MAJOR(dev) != MD_MAJOR);
3299 int shift = partitioned ? MdpMinorShift : 0;
3300 int unit = MINOR(dev) >> shift;
3301 int error;
3302
3303 if (!mddev)
3304 return NULL;
3305
3306 mutex_lock(&disks_mutex);
3307 if (mddev->gendisk) {
3308 mutex_unlock(&disks_mutex);
3309 mddev_put(mddev);
3310 return NULL;
3311 }
3312 disk = alloc_disk(1 << shift);
3313 if (!disk) {
3314 mutex_unlock(&disks_mutex);
3315 mddev_put(mddev);
3316 return NULL;
3317 }
3318 disk->major = MAJOR(dev);
3319 disk->first_minor = unit << shift;
3320 if (partitioned)
3321 sprintf(disk->disk_name, "md_d%d", unit);
3322 else
3323 sprintf(disk->disk_name, "md%d", unit);
3324 disk->fops = &md_fops;
3325 disk->private_data = mddev;
3326 disk->queue = mddev->queue;
3327 add_disk(disk);
3328 mddev->gendisk = disk;
3329 error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj,
3330 "%s", "md");
3331 mutex_unlock(&disks_mutex);
3332 if (error)
3333 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
3334 disk->disk_name);
3335 else
3336 kobject_uevent(&mddev->kobj, KOBJ_ADD);
3337 return NULL;
3338}
3339
3340static void md_safemode_timeout(unsigned long data)
3341{
3342 mddev_t *mddev = (mddev_t *) data;
3343
3344 mddev->safemode = 1;
3345 md_wakeup_thread(mddev->thread);
3346}
3347
3348static int start_dirty_degraded;
3349
3350static int do_md_run(mddev_t * mddev)
3351{
3352 int err;
3353 int chunk_size;
3354 struct list_head *tmp;
3355 mdk_rdev_t *rdev;
3356 struct gendisk *disk;
3357 struct mdk_personality *pers;
3358 char b[BDEVNAME_SIZE];
3359
3360 if (list_empty(&mddev->disks))
3361
3362 return -EINVAL;
3363
3364 if (mddev->pers)
3365 return -EBUSY;
3366
3367
3368
3369
3370 if (!mddev->raid_disks) {
3371 if (!mddev->persistent)
3372 return -EINVAL;
3373 analyze_sbs(mddev);
3374 }
3375
3376 chunk_size = mddev->chunk_size;
3377
3378 if (chunk_size) {
3379 if (chunk_size > MAX_CHUNK_SIZE) {
3380 printk(KERN_ERR "too big chunk_size: %d > %d\n",
3381 chunk_size, MAX_CHUNK_SIZE);
3382 return -EINVAL;
3383 }
3384
3385
3386
3387 if ( (1 << ffz(~chunk_size)) != chunk_size) {
3388 printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size);
3389 return -EINVAL;
3390 }
3391 if (chunk_size < PAGE_SIZE) {
3392 printk(KERN_ERR "too small chunk_size: %d < %ld\n",
3393 chunk_size, PAGE_SIZE);
3394 return -EINVAL;
3395 }
3396
3397
3398 rdev_for_each(rdev, tmp, mddev) {
3399 if (test_bit(Faulty, &rdev->flags))
3400 continue;
3401 if (rdev->size < chunk_size / 1024) {
3402 printk(KERN_WARNING
3403 "md: Dev %s smaller than chunk_size:"
3404 " %lluk < %dk\n",
3405 bdevname(rdev->bdev,b),
3406 (unsigned long long)rdev->size,
3407 chunk_size / 1024);
3408 return -EINVAL;
3409 }
3410 }
3411 }
3412
3413#ifdef CONFIG_KMOD
3414 if (mddev->level != LEVEL_NONE)
3415 request_module("md-level-%d", mddev->level);
3416 else if (mddev->clevel[0])
3417 request_module("md-%s", mddev->clevel);
3418#endif
3419
3420
3421
3422
3423
3424
3425 rdev_for_each(rdev, tmp, mddev) {
3426 if (test_bit(Faulty, &rdev->flags))
3427 continue;
3428 sync_blockdev(rdev->bdev);
3429 invalidate_bdev(rdev->bdev);
3430
3431
3432
3433
3434
3435 if (rdev->data_offset < rdev->sb_offset) {
3436 if (mddev->size &&
3437 rdev->data_offset + mddev->size*2
3438 > rdev->sb_offset*2) {
3439 printk("md: %s: data overlaps metadata\n",
3440 mdname(mddev));
3441 return -EINVAL;
3442 }
3443 } else {
3444 if (rdev->sb_offset*2 + rdev->sb_size/512
3445 > rdev->data_offset) {
3446 printk("md: %s: metadata overlaps data\n",
3447 mdname(mddev));
3448 return -EINVAL;
3449 }
3450 }
3451 }
3452
3453 md_probe(mddev->unit, NULL, NULL);
3454 disk = mddev->gendisk;
3455 if (!disk)
3456 return -ENOMEM;
3457
3458 spin_lock(&pers_lock);
3459 pers = find_pers(mddev->level, mddev->clevel);
3460 if (!pers || !try_module_get(pers->owner)) {
3461 spin_unlock(&pers_lock);
3462 if (mddev->level != LEVEL_NONE)
3463 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
3464 mddev->level);
3465 else
3466 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
3467 mddev->clevel);
3468 return -EINVAL;
3469 }
3470 mddev->pers = pers;
3471 spin_unlock(&pers_lock);
3472 mddev->level = pers->level;
3473 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3474
3475 if (mddev->reshape_position != MaxSector &&
3476 pers->start_reshape == NULL) {
3477
3478 mddev->pers = NULL;
3479 module_put(pers->owner);
3480 return -EINVAL;
3481 }
3482
3483 if (pers->sync_request) {
3484
3485
3486
3487 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
3488 mdk_rdev_t *rdev2;
3489 struct list_head *tmp2;
3490 int warned = 0;
3491 rdev_for_each(rdev, tmp, mddev) {
3492 rdev_for_each(rdev2, tmp2, mddev) {
3493 if (rdev < rdev2 &&
3494 rdev->bdev->bd_contains ==
3495 rdev2->bdev->bd_contains) {
3496 printk(KERN_WARNING
3497 "%s: WARNING: %s appears to be"
3498 " on the same physical disk as"
3499 " %s.\n",
3500 mdname(mddev),
3501 bdevname(rdev->bdev,b),
3502 bdevname(rdev2->bdev,b2));
3503 warned = 1;
3504 }
3505 }
3506 }
3507 if (warned)
3508 printk(KERN_WARNING
3509 "True protection against single-disk"
3510 " failure might be compromised.\n");
3511 }
3512
3513 mddev->recovery = 0;
3514 mddev->resync_max_sectors = mddev->size << 1;
3515 mddev->barriers_work = 1;
3516 mddev->ok_start_degraded = start_dirty_degraded;
3517
3518 if (start_readonly)
3519 mddev->ro = 2;
3520
3521 err = mddev->pers->run(mddev);
3522 if (!err && mddev->pers->sync_request) {
3523 err = bitmap_create(mddev);
3524 if (err) {
3525 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
3526 mdname(mddev), err);
3527 mddev->pers->stop(mddev);
3528 }
3529 }
3530 if (err) {
3531 printk(KERN_ERR "md: pers->run() failed ...\n");
3532 module_put(mddev->pers->owner);
3533 mddev->pers = NULL;
3534 bitmap_destroy(mddev);
3535 return err;
3536 }
3537 if (mddev->pers->sync_request) {
3538 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3539 printk(KERN_WARNING
3540 "md: cannot register extra attributes for %s\n",
3541 mdname(mddev));
3542 } else if (mddev->ro == 2)
3543 mddev->ro = 0;
3544
3545 atomic_set(&mddev->writes_pending,0);
3546 mddev->safemode = 0;
3547 mddev->safemode_timer.function = md_safemode_timeout;
3548 mddev->safemode_timer.data = (unsigned long) mddev;
3549 mddev->safemode_delay = (200 * HZ)/1000 +1;
3550 mddev->in_sync = 1;
3551
3552 rdev_for_each(rdev, tmp, mddev)
3553 if (rdev->raid_disk >= 0) {
3554 char nm[20];
3555 sprintf(nm, "rd%d", rdev->raid_disk);
3556 if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
3557 printk("md: cannot register %s for %s\n",
3558 nm, mdname(mddev));
3559 }
3560
3561 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3562
3563 if (mddev->flags)
3564 md_update_sb(mddev, 0);
3565
3566 set_capacity(disk, mddev->array_size<<1);
3567
3568
3569
3570
3571
3572
3573
3574
3575 mddev->queue->queuedata = mddev;
3576 mddev->queue->make_request_fn = mddev->pers->make_request;
3577
3578
3579
3580
3581
3582 if (mddev->degraded && !mddev->sync_thread) {
3583 struct list_head *rtmp;
3584 int spares = 0;
3585 rdev_for_each(rdev, rtmp, mddev)
3586 if (rdev->raid_disk >= 0 &&
3587 !test_bit(In_sync, &rdev->flags) &&
3588 !test_bit(Faulty, &rdev->flags))
3589
3590 spares++;
3591 if (spares && mddev->pers->sync_request) {
3592 mddev->recovery = 0;
3593 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
3594 mddev->sync_thread = md_register_thread(md_do_sync,
3595 mddev,
3596 "%s_resync");
3597 if (!mddev->sync_thread) {
3598 printk(KERN_ERR "%s: could not start resync"
3599 " thread...\n",
3600 mdname(mddev));
3601
3602 mddev->recovery = 0;
3603 }
3604 }
3605 }
3606 md_wakeup_thread(mddev->thread);
3607 md_wakeup_thread(mddev->sync_thread);
3608
3609 mddev->changed = 1;
3610 md_new_event(mddev);
3611 kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE);
3612 return 0;
3613}
3614
3615static int restart_array(mddev_t *mddev)
3616{
3617 struct gendisk *disk = mddev->gendisk;
3618 int err;
3619
3620
3621
3622
3623 err = -ENXIO;
3624 if (list_empty(&mddev->disks))
3625 goto out;
3626
3627 if (mddev->pers) {
3628 err = -EBUSY;
3629 if (!mddev->ro)
3630 goto out;
3631
3632 mddev->safemode = 0;
3633 mddev->ro = 0;
3634 set_disk_ro(disk, 0);
3635
3636 printk(KERN_INFO "md: %s switched to read-write mode.\n",
3637 mdname(mddev));
3638
3639
3640
3641 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3642 md_wakeup_thread(mddev->thread);
3643 md_wakeup_thread(mddev->sync_thread);
3644 err = 0;
3645 } else
3646 err = -EINVAL;
3647
3648out:
3649 return err;
3650}
3651
3652
3653
3654static int deny_bitmap_write_access(struct file * file)
3655{
3656 struct inode *inode = file->f_mapping->host;
3657
3658 spin_lock(&inode->i_lock);
3659 if (atomic_read(&inode->i_writecount) > 1) {
3660 spin_unlock(&inode->i_lock);
3661 return -ETXTBSY;
3662 }
3663 atomic_set(&inode->i_writecount, -1);
3664 spin_unlock(&inode->i_lock);
3665
3666 return 0;
3667}
3668
3669static void restore_bitmap_write_access(struct file *file)
3670{
3671 struct inode *inode = file->f_mapping->host;
3672
3673 spin_lock(&inode->i_lock);
3674 atomic_set(&inode->i_writecount, 1);
3675 spin_unlock(&inode->i_lock);
3676}
3677
3678
3679
3680
3681
3682
3683static int do_md_stop(mddev_t * mddev, int mode)
3684{
3685 int err = 0;
3686 struct gendisk *disk = mddev->gendisk;
3687
3688 if (mddev->pers) {
3689 if (atomic_read(&mddev->active)>2) {
3690 printk("md: %s still in use.\n",mdname(mddev));
3691 return -EBUSY;
3692 }
3693
3694 if (mddev->sync_thread) {
3695 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3696 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3697 md_unregister_thread(mddev->sync_thread);
3698 mddev->sync_thread = NULL;
3699 }
3700
3701 del_timer_sync(&mddev->safemode_timer);
3702
3703 invalidate_partition(disk, 0);
3704
3705 switch(mode) {
3706 case 1:
3707 err = -ENXIO;
3708 if (mddev->ro==1)
3709 goto out;
3710 mddev->ro = 1;
3711 break;
3712 case 0:
3713 case 2:
3714 bitmap_flush(mddev);
3715 md_super_wait(mddev);
3716 if (mddev->ro)
3717 set_disk_ro(disk, 0);
3718 blk_queue_make_request(mddev->queue, md_fail_request);
3719 mddev->pers->stop(mddev);
3720 mddev->queue->merge_bvec_fn = NULL;
3721 mddev->queue->unplug_fn = NULL;
3722 mddev->queue->backing_dev_info.congested_fn = NULL;
3723 if (mddev->pers->sync_request)
3724 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
3725
3726 module_put(mddev->pers->owner);
3727 mddev->pers = NULL;
3728
3729 sysfs_notify(&mddev->kobj, NULL, "array_state");
3730
3731 set_capacity(disk, 0);
3732 mddev->changed = 1;
3733
3734 if (mddev->ro)
3735 mddev->ro = 0;
3736 }
3737 if (!mddev->in_sync || mddev->flags) {
3738
3739 mddev->in_sync = 1;
3740 md_update_sb(mddev, 1);
3741 }
3742 if (mode == 1)
3743 set_disk_ro(disk, 1);
3744 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3745 }
3746
3747
3748
3749
3750 if (mode == 0) {
3751 mdk_rdev_t *rdev;
3752 struct list_head *tmp;
3753
3754 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
3755
3756 bitmap_destroy(mddev);
3757 if (mddev->bitmap_file) {
3758 restore_bitmap_write_access(mddev->bitmap_file);
3759 fput(mddev->bitmap_file);
3760 mddev->bitmap_file = NULL;
3761 }
3762 mddev->bitmap_offset = 0;
3763
3764 rdev_for_each(rdev, tmp, mddev)
3765 if (rdev->raid_disk >= 0) {
3766 char nm[20];
3767 sprintf(nm, "rd%d", rdev->raid_disk);
3768 sysfs_remove_link(&mddev->kobj, nm);
3769 }
3770
3771
3772 flush_scheduled_work();
3773
3774 export_array(mddev);
3775
3776 mddev->array_size = 0;
3777 mddev->size = 0;
3778 mddev->raid_disks = 0;
3779 mddev->recovery_cp = 0;
3780 mddev->resync_max = MaxSector;
3781 mddev->reshape_position = MaxSector;
3782 mddev->external = 0;
3783 mddev->persistent = 0;
3784 mddev->level = LEVEL_NONE;
3785 mddev->clevel[0] = 0;
3786 mddev->flags = 0;
3787 mddev->ro = 0;
3788 mddev->metadata_type[0] = 0;
3789 mddev->chunk_size = 0;
3790 mddev->ctime = mddev->utime = 0;
3791 mddev->layout = 0;
3792 mddev->max_disks = 0;
3793 mddev->events = 0;
3794 mddev->delta_disks = 0;
3795 mddev->new_level = LEVEL_NONE;
3796 mddev->new_layout = 0;
3797 mddev->new_chunk = 0;
3798 mddev->curr_resync = 0;
3799 mddev->resync_mismatches = 0;
3800 mddev->suspend_lo = mddev->suspend_hi = 0;
3801 mddev->sync_speed_min = mddev->sync_speed_max = 0;
3802 mddev->recovery = 0;
3803 mddev->in_sync = 0;
3804 mddev->changed = 0;
3805 mddev->degraded = 0;
3806 mddev->barriers_work = 0;
3807 mddev->safemode = 0;
3808
3809 } else if (mddev->pers)
3810 printk(KERN_INFO "md: %s switched to read-only mode.\n",
3811 mdname(mddev));
3812 err = 0;
3813 md_new_event(mddev);
3814out:
3815 return err;
3816}
3817
3818#ifndef MODULE
3819static void autorun_array(mddev_t *mddev)
3820{
3821 mdk_rdev_t *rdev;
3822 struct list_head *tmp;
3823 int err;
3824
3825 if (list_empty(&mddev->disks))
3826 return;
3827
3828 printk(KERN_INFO "md: running: ");
3829
3830 rdev_for_each(rdev, tmp, mddev) {
3831 char b[BDEVNAME_SIZE];
3832 printk("<%s>", bdevname(rdev->bdev,b));
3833 }
3834 printk("\n");
3835
3836 err = do_md_run (mddev);
3837 if (err) {
3838 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
3839 do_md_stop (mddev, 0);
3840 }
3841}
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855static void autorun_devices(int part)
3856{
3857 struct list_head *tmp;
3858 mdk_rdev_t *rdev0, *rdev;
3859 mddev_t *mddev;
3860 char b[BDEVNAME_SIZE];
3861
3862 printk(KERN_INFO "md: autorun ...\n");
3863 while (!list_empty(&pending_raid_disks)) {
3864 int unit;
3865 dev_t dev;
3866 LIST_HEAD(candidates);
3867 rdev0 = list_entry(pending_raid_disks.next,
3868 mdk_rdev_t, same_set);
3869
3870 printk(KERN_INFO "md: considering %s ...\n",
3871 bdevname(rdev0->bdev,b));
3872 INIT_LIST_HEAD(&candidates);
3873 rdev_for_each_list(rdev, tmp, pending_raid_disks)
3874 if (super_90_load(rdev, rdev0, 0) >= 0) {
3875 printk(KERN_INFO "md: adding %s ...\n",
3876 bdevname(rdev->bdev,b));
3877 list_move(&rdev->same_set, &candidates);
3878 }
3879
3880
3881
3882
3883
3884 if (part) {
3885 dev = MKDEV(mdp_major,
3886 rdev0->preferred_minor << MdpMinorShift);
3887 unit = MINOR(dev) >> MdpMinorShift;
3888 } else {
3889 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
3890 unit = MINOR(dev);
3891 }
3892 if (rdev0->preferred_minor != unit) {
3893 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
3894 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
3895 break;
3896 }
3897
3898 md_probe(dev, NULL, NULL);
3899 mddev = mddev_find(dev);
3900 if (!mddev || !mddev->gendisk) {
3901 if (mddev)
3902 mddev_put(mddev);
3903 printk(KERN_ERR
3904 "md: cannot allocate memory for md drive.\n");
3905 break;
3906 }
3907 if (mddev_lock(mddev))
3908 printk(KERN_WARNING "md: %s locked, cannot run\n",
3909 mdname(mddev));
3910 else if (mddev->raid_disks || mddev->major_version
3911 || !list_empty(&mddev->disks)) {
3912 printk(KERN_WARNING
3913 "md: %s already running, cannot run %s\n",
3914 mdname(mddev), bdevname(rdev0->bdev,b));
3915 mddev_unlock(mddev);
3916 } else {
3917 printk(KERN_INFO "md: created %s\n", mdname(mddev));
3918 mddev->persistent = 1;
3919 rdev_for_each_list(rdev, tmp, candidates) {
3920 list_del_init(&rdev->same_set);
3921 if (bind_rdev_to_array(rdev, mddev))
3922 export_rdev(rdev);
3923 }
3924 autorun_array(mddev);
3925 mddev_unlock(mddev);
3926 }
3927
3928
3929
3930 rdev_for_each_list(rdev, tmp, candidates)
3931 export_rdev(rdev);
3932 mddev_put(mddev);
3933 }
3934 printk(KERN_INFO "md: ... autorun DONE.\n");
3935}
3936#endif
3937
3938static int get_version(void __user * arg)
3939{
3940 mdu_version_t ver;
3941
3942 ver.major = MD_MAJOR_VERSION;
3943 ver.minor = MD_MINOR_VERSION;
3944 ver.patchlevel = MD_PATCHLEVEL_VERSION;
3945
3946 if (copy_to_user(arg, &ver, sizeof(ver)))
3947 return -EFAULT;
3948
3949 return 0;
3950}
3951
3952static int get_array_info(mddev_t * mddev, void __user * arg)
3953{
3954 mdu_array_info_t info;
3955 int nr,working,active,failed,spare;
3956 mdk_rdev_t *rdev;
3957 struct list_head *tmp;
3958
3959 nr=working=active=failed=spare=0;
3960 rdev_for_each(rdev, tmp, mddev) {
3961 nr++;
3962 if (test_bit(Faulty, &rdev->flags))
3963 failed++;
3964 else {
3965 working++;
3966 if (test_bit(In_sync, &rdev->flags))
3967 active++;
3968 else
3969 spare++;
3970 }
3971 }
3972
3973 info.major_version = mddev->major_version;
3974 info.minor_version = mddev->minor_version;
3975 info.patch_version = MD_PATCHLEVEL_VERSION;
3976 info.ctime = mddev->ctime;
3977 info.level = mddev->level;
3978 info.size = mddev->size;
3979 if (info.size != mddev->size)
3980 info.size = -1;
3981 info.nr_disks = nr;
3982 info.raid_disks = mddev->raid_disks;
3983 info.md_minor = mddev->md_minor;
3984 info.not_persistent= !mddev->persistent;
3985
3986 info.utime = mddev->utime;
3987 info.state = 0;
3988 if (mddev->in_sync)
3989 info.state = (1<<MD_SB_CLEAN);
3990 if (mddev->bitmap && mddev->bitmap_offset)
3991 info.state = (1<<MD_SB_BITMAP_PRESENT);
3992 info.active_disks = active;
3993 info.working_disks = working;
3994 info.failed_disks = failed;
3995 info.spare_disks = spare;
3996
3997 info.layout = mddev->layout;
3998 info.chunk_size = mddev->chunk_size;
3999
4000 if (copy_to_user(arg, &info, sizeof(info)))
4001 return -EFAULT;
4002
4003 return 0;
4004}
4005
4006static int get_bitmap_file(mddev_t * mddev, void __user * arg)
4007{
4008 mdu_bitmap_file_t *file = NULL;
4009 char *ptr, *buf = NULL;
4010 int err = -ENOMEM;
4011
4012 md_allow_write(mddev);
4013
4014 file = kmalloc(sizeof(*file), GFP_KERNEL);
4015 if (!file)
4016 goto out;
4017
4018
4019 if (!mddev->bitmap || !mddev->bitmap->file) {
4020 file->pathname[0] = '\0';
4021 goto copy_out;
4022 }
4023
4024 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
4025 if (!buf)
4026 goto out;
4027
4028 ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
4029 if (IS_ERR(ptr))
4030 goto out;
4031
4032 strcpy(file->pathname, ptr);
4033
4034copy_out:
4035 err = 0;
4036 if (copy_to_user(arg, file, sizeof(*file)))
4037 err = -EFAULT;
4038out:
4039 kfree(buf);
4040 kfree(file);
4041 return err;
4042}
4043
4044static int get_disk_info(mddev_t * mddev, void __user * arg)
4045{
4046 mdu_disk_info_t info;
4047 unsigned int nr;
4048 mdk_rdev_t *rdev;
4049
4050 if (copy_from_user(&info, arg, sizeof(info)))
4051 return -EFAULT;
4052
4053 nr = info.number;
4054
4055 rdev = find_rdev_nr(mddev, nr);
4056 if (rdev) {
4057 info.major = MAJOR(rdev->bdev->bd_dev);
4058 info.minor = MINOR(rdev->bdev->bd_dev);
4059 info.raid_disk = rdev->raid_disk;
4060 info.state = 0;
4061 if (test_bit(Faulty, &rdev->flags))
4062 info.state |= (1<<MD_DISK_FAULTY);
4063 else if (test_bit(In_sync, &rdev->flags)) {
4064 info.state |= (1<<MD_DISK_ACTIVE);
4065 info.state |= (1<<MD_DISK_SYNC);
4066 }
4067 if (test_bit(WriteMostly, &rdev->flags))
4068 info.state |= (1<<MD_DISK_WRITEMOSTLY);
4069 } else {
4070 info.major = info.minor = 0;
4071 info.raid_disk = -1;
4072 info.state = (1<<MD_DISK_REMOVED);
4073 }
4074
4075 if (copy_to_user(arg, &info, sizeof(info)))
4076 return -EFAULT;
4077
4078 return 0;
4079}
4080
4081static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
4082{
4083 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
4084 mdk_rdev_t *rdev;
4085 dev_t dev = MKDEV(info->major,info->minor);
4086
4087 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
4088 return -EOVERFLOW;
4089
4090 if (!mddev->raid_disks) {
4091 int err;
4092
4093 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
4094 if (IS_ERR(rdev)) {
4095 printk(KERN_WARNING
4096 "md: md_import_device returned %ld\n",
4097 PTR_ERR(rdev));
4098 return PTR_ERR(rdev);
4099 }
4100 if (!list_empty(&mddev->disks)) {
4101 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
4102 mdk_rdev_t, same_set);
4103 int err = super_types[mddev->major_version]
4104 .load_super(rdev, rdev0, mddev->minor_version);
4105 if (err < 0) {
4106 printk(KERN_WARNING
4107 "md: %s has different UUID to %s\n",
4108 bdevname(rdev->bdev,b),
4109 bdevname(rdev0->bdev,b2));
4110 export_rdev(rdev);
4111 return -EINVAL;
4112 }
4113 }
4114 err = bind_rdev_to_array(rdev, mddev);
4115 if (err)
4116 export_rdev(rdev);
4117 return err;
4118 }
4119
4120
4121
4122
4123
4124
4125 if (mddev->pers) {
4126 int err;
4127 if (!mddev->pers->hot_add_disk) {
4128 printk(KERN_WARNING
4129 "%s: personality does not support diskops!\n",
4130 mdname(mddev));
4131 return -EINVAL;
4132 }
4133 if (mddev->persistent)
4134 rdev = md_import_device(dev, mddev->major_version,
4135 mddev->minor_version);
4136 else
4137 rdev = md_import_device(dev, -1, -1);
4138 if (IS_ERR(rdev)) {
4139 printk(KERN_WARNING
4140 "md: md_import_device returned %ld\n",
4141 PTR_ERR(rdev));
4142 return PTR_ERR(rdev);
4143 }
4144
4145 if (!mddev->persistent) {
4146 if (info->state & (1<<MD_DISK_SYNC) &&
4147 info->raid_disk < mddev->raid_disks)
4148 rdev->raid_disk = info->raid_disk;
4149 else
4150 rdev->raid_disk = -1;
4151 } else
4152 super_types[mddev->major_version].
4153 validate_super(mddev, rdev);
4154 rdev->saved_raid_disk = rdev->raid_disk;
4155
4156 clear_bit(In_sync, &rdev->flags);
4157 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
4158 set_bit(WriteMostly, &rdev->flags);
4159
4160 rdev->raid_disk = -1;
4161 err = bind_rdev_to_array(rdev, mddev);
4162 if (!err && !mddev->pers->hot_remove_disk) {
4163
4164
4165
4166
4167 super_types[mddev->major_version].
4168 validate_super(mddev, rdev);
4169 err = mddev->pers->hot_add_disk(mddev, rdev);
4170 if (err)
4171 unbind_rdev_from_array(rdev);
4172 }
4173 if (err)
4174 export_rdev(rdev);
4175
4176 md_update_sb(mddev, 1);
4177 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4178 md_wakeup_thread(mddev->thread);
4179 return err;
4180 }
4181
4182
4183
4184
4185 if (mddev->major_version != 0) {
4186 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
4187 mdname(mddev));
4188 return -EINVAL;
4189 }
4190
4191 if (!(info->state & (1<<MD_DISK_FAULTY))) {
4192 int err;
4193 rdev = md_import_device (dev, -1, 0);
4194 if (IS_ERR(rdev)) {
4195 printk(KERN_WARNING
4196 "md: error, md_import_device() returned %ld\n",
4197 PTR_ERR(rdev));
4198 return PTR_ERR(rdev);
4199 }
4200 rdev->desc_nr = info->number;
4201 if (info->raid_disk < mddev->raid_disks)
4202 rdev->raid_disk = info->raid_disk;
4203 else
4204 rdev->raid_disk = -1;
4205
4206 if (rdev->raid_disk < mddev->raid_disks)
4207 if (info->state & (1<<MD_DISK_SYNC))
4208 set_bit(In_sync, &rdev->flags);
4209
4210 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
4211 set_bit(WriteMostly, &rdev->flags);
4212
4213 if (!mddev->persistent) {
4214 printk(KERN_INFO "md: nonpersistent superblock ...\n");
4215 rdev->sb_offset = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
4216 } else
4217 rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
4218 rdev->size = calc_dev_size(rdev, mddev->chunk_size);
4219
4220 err = bind_rdev_to_array(rdev, mddev);
4221 if (err) {
4222 export_rdev(rdev);
4223 return err;
4224 }
4225 }
4226
4227 return 0;
4228}
4229
4230static int hot_remove_disk(mddev_t * mddev, dev_t dev)
4231{
4232 char b[BDEVNAME_SIZE];
4233 mdk_rdev_t *rdev;
4234
4235 if (!mddev->pers)
4236 return -ENODEV;
4237
4238 rdev = find_rdev(mddev, dev);
4239 if (!rdev)
4240 return -ENXIO;
4241
4242 if (rdev->raid_disk >= 0)
4243 goto busy;
4244
4245 kick_rdev_from_array(rdev);
4246 md_update_sb(mddev, 1);
4247 md_new_event(mddev);
4248
4249 return 0;
4250busy:
4251 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
4252 bdevname(rdev->bdev,b), mdname(mddev));
4253 return -EBUSY;
4254}
4255
4256static int hot_add_disk(mddev_t * mddev, dev_t dev)
4257{
4258 char b[BDEVNAME_SIZE];
4259 int err;
4260 unsigned int size;
4261 mdk_rdev_t *rdev;
4262
4263 if (!mddev->pers)
4264 return -ENODEV;
4265
4266 if (mddev->major_version != 0) {
4267 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
4268 " version-0 superblocks.\n",
4269 mdname(mddev));
4270 return -EINVAL;
4271 }
4272 if (!mddev->pers->hot_add_disk) {
4273 printk(KERN_WARNING
4274 "%s: personality does not support diskops!\n",
4275 mdname(mddev));
4276 return -EINVAL;
4277 }
4278
4279 rdev = md_import_device (dev, -1, 0);
4280 if (IS_ERR(rdev)) {
4281 printk(KERN_WARNING
4282 "md: error, md_import_device() returned %ld\n",
4283 PTR_ERR(rdev));
4284 return -EINVAL;
4285 }
4286
4287 if (mddev->persistent)
4288 rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
4289 else
4290 rdev->sb_offset =
4291 rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
4292
4293 size = calc_dev_size(rdev, mddev->chunk_size);
4294 rdev->size = size;
4295
4296 if (test_bit(Faulty, &rdev->flags)) {
4297 printk(KERN_WARNING
4298 "md: can not hot-add faulty %s disk to %s!\n",
4299 bdevname(rdev->bdev,b), mdname(mddev));
4300 err = -EINVAL;
4301 goto abort_export;
4302 }
4303 clear_bit(In_sync, &rdev->flags);
4304 rdev->desc_nr = -1;
4305 rdev->saved_raid_disk = -1;
4306 err = bind_rdev_to_array(rdev, mddev);
4307 if (err)
4308 goto abort_export;
4309
4310
4311
4312
4313
4314
4315 if (rdev->desc_nr == mddev->max_disks) {
4316 printk(KERN_WARNING "%s: can not hot-add to full array!\n",
4317 mdname(mddev));
4318 err = -EBUSY;
4319 goto abort_unbind_export;
4320 }
4321
4322 rdev->raid_disk = -1;
4323
4324 md_update_sb(mddev, 1);
4325
4326
4327
4328
4329
4330 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4331 md_wakeup_thread(mddev->thread);
4332 md_new_event(mddev);
4333 return 0;
4334
4335abort_unbind_export:
4336 unbind_rdev_from_array(rdev);
4337
4338abort_export:
4339 export_rdev(rdev);
4340 return err;
4341}
4342
4343static int set_bitmap_file(mddev_t *mddev, int fd)
4344{
4345 int err;
4346
4347 if (mddev->pers) {
4348 if (!mddev->pers->quiesce)
4349 return -EBUSY;
4350 if (mddev->recovery || mddev->sync_thread)
4351 return -EBUSY;
4352
4353 }
4354
4355
4356 if (fd >= 0) {
4357 if (mddev->bitmap)
4358 return -EEXIST;
4359 mddev->bitmap_file = fget(fd);
4360
4361 if (mddev->bitmap_file == NULL) {
4362 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
4363 mdname(mddev));
4364 return -EBADF;
4365 }
4366
4367 err = deny_bitmap_write_access(mddev->bitmap_file);
4368 if (err) {
4369 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
4370 mdname(mddev));
4371 fput(mddev->bitmap_file);
4372 mddev->bitmap_file = NULL;
4373 return err;
4374 }
4375 mddev->bitmap_offset = 0;
4376 } else if (mddev->bitmap == NULL)
4377 return -ENOENT;
4378 err = 0;
4379 if (mddev->pers) {
4380 mddev->pers->quiesce(mddev, 1);
4381 if (fd >= 0)
4382 err = bitmap_create(mddev);
4383 if (fd < 0 || err) {
4384 bitmap_destroy(mddev);
4385 fd = -1;
4386 }
4387 mddev->pers->quiesce(mddev, 0);
4388 }
4389 if (fd < 0) {
4390 if (mddev->bitmap_file) {
4391 restore_bitmap_write_access(mddev->bitmap_file);
4392 fput(mddev->bitmap_file);
4393 }
4394 mddev->bitmap_file = NULL;
4395 }
4396
4397 return err;
4398}
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
4414{
4415
4416 if (info->raid_disks == 0) {
4417
4418 if (info->major_version < 0 ||
4419 info->major_version >= ARRAY_SIZE(super_types) ||
4420 super_types[info->major_version].name == NULL) {
4421
4422 printk(KERN_INFO
4423 "md: superblock version %d not known\n",
4424 info->major_version);
4425 return -EINVAL;
4426 }
4427 mddev->major_version = info->major_version;
4428 mddev->minor_version = info->minor_version;
4429 mddev->patch_version = info->patch_version;
4430 mddev->persistent = !info->not_persistent;
4431 return 0;
4432 }
4433 mddev->major_version = MD_MAJOR_VERSION;
4434 mddev->minor_version = MD_MINOR_VERSION;
4435 mddev->patch_version = MD_PATCHLEVEL_VERSION;
4436 mddev->ctime = get_seconds();
4437
4438 mddev->level = info->level;
4439 mddev->clevel[0] = 0;
4440 mddev->size = info->size;
4441 mddev->raid_disks = info->raid_disks;
4442
4443
4444
4445 if (info->state & (1<<MD_SB_CLEAN))
4446 mddev->recovery_cp = MaxSector;
4447 else
4448 mddev->recovery_cp = 0;
4449 mddev->persistent = ! info->not_persistent;
4450 mddev->external = 0;
4451
4452 mddev->layout = info->layout;
4453 mddev->chunk_size = info->chunk_size;
4454
4455 mddev->max_disks = MD_SB_DISKS;
4456
4457 if (mddev->persistent)
4458 mddev->flags = 0;
4459 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4460
4461 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
4462 mddev->bitmap_offset = 0;
4463
4464 mddev->reshape_position = MaxSector;
4465
4466
4467
4468
4469 get_random_bytes(mddev->uuid, 16);
4470
4471 mddev->new_level = mddev->level;
4472 mddev->new_chunk = mddev->chunk_size;
4473 mddev->new_layout = mddev->layout;
4474 mddev->delta_disks = 0;
4475
4476 return 0;
4477}
4478
4479static int update_size(mddev_t *mddev, unsigned long size)
4480{
4481 mdk_rdev_t * rdev;
4482 int rv;
4483 struct list_head *tmp;
4484 int fit = (size == 0);
4485
4486 if (mddev->pers->resize == NULL)
4487 return -EINVAL;
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498 if (mddev->sync_thread)
4499 return -EBUSY;
4500 rdev_for_each(rdev, tmp, mddev) {
4501 sector_t avail;
4502 avail = rdev->size * 2;
4503
4504 if (fit && (size == 0 || size > avail/2))
4505 size = avail/2;
4506 if (avail < ((sector_t)size << 1))
4507 return -ENOSPC;
4508 }
4509 rv = mddev->pers->resize(mddev, (sector_t)size *2);
4510 if (!rv) {
4511 struct block_device *bdev;
4512
4513 bdev = bdget_disk(mddev->gendisk, 0);
4514 if (bdev) {
4515 mutex_lock(&bdev->bd_inode->i_mutex);
4516 i_size_write(bdev->bd_inode, (loff_t)mddev->array_size << 10);
4517 mutex_unlock(&bdev->bd_inode->i_mutex);
4518 bdput(bdev);
4519 }
4520 }
4521 return rv;
4522}
4523
4524static int update_raid_disks(mddev_t *mddev, int raid_disks)
4525{
4526 int rv;
4527
4528 if (mddev->pers->check_reshape == NULL)
4529 return -EINVAL;
4530 if (raid_disks <= 0 ||
4531 raid_disks >= mddev->max_disks)
4532 return -EINVAL;
4533 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
4534 return -EBUSY;
4535 mddev->delta_disks = raid_disks - mddev->raid_disks;
4536
4537 rv = mddev->pers->check_reshape(mddev);
4538 return rv;
4539}
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
4551{
4552 int rv = 0;
4553 int cnt = 0;
4554 int state = 0;
4555
4556
4557 if (mddev->bitmap && mddev->bitmap_offset)
4558 state |= (1 << MD_SB_BITMAP_PRESENT);
4559
4560 if (mddev->major_version != info->major_version ||
4561 mddev->minor_version != info->minor_version ||
4562
4563 mddev->ctime != info->ctime ||
4564 mddev->level != info->level ||
4565
4566 !mddev->persistent != info->not_persistent||
4567 mddev->chunk_size != info->chunk_size ||
4568
4569 ((state^info->state) & 0xfffffe00)
4570 )
4571 return -EINVAL;
4572
4573 if (info->size >= 0 && mddev->size != info->size) cnt++;
4574 if (mddev->raid_disks != info->raid_disks) cnt++;
4575 if (mddev->layout != info->layout) cnt++;
4576 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++;
4577 if (cnt == 0) return 0;
4578 if (cnt > 1) return -EINVAL;
4579
4580 if (mddev->layout != info->layout) {
4581
4582
4583
4584
4585 if (mddev->pers->reconfig == NULL)
4586 return -EINVAL;
4587 else
4588 return mddev->pers->reconfig(mddev, info->layout, -1);
4589 }
4590 if (info->size >= 0 && mddev->size != info->size)
4591 rv = update_size(mddev, info->size);
4592
4593 if (mddev->raid_disks != info->raid_disks)
4594 rv = update_raid_disks(mddev, info->raid_disks);
4595
4596 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
4597 if (mddev->pers->quiesce == NULL)
4598 return -EINVAL;
4599 if (mddev->recovery || mddev->sync_thread)
4600 return -EBUSY;
4601 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
4602
4603 if (mddev->bitmap)
4604 return -EEXIST;
4605 if (mddev->default_bitmap_offset == 0)
4606 return -EINVAL;
4607 mddev->bitmap_offset = mddev->default_bitmap_offset;
4608 mddev->pers->quiesce(mddev, 1);
4609 rv = bitmap_create(mddev);
4610 if (rv)
4611 bitmap_destroy(mddev);
4612 mddev->pers->quiesce(mddev, 0);
4613 } else {
4614
4615 if (!mddev->bitmap)
4616 return -ENOENT;
4617 if (mddev->bitmap->file)
4618 return -EINVAL;
4619 mddev->pers->quiesce(mddev, 1);
4620 bitmap_destroy(mddev);
4621 mddev->pers->quiesce(mddev, 0);
4622 mddev->bitmap_offset = 0;
4623 }
4624 }
4625 md_update_sb(mddev, 1);
4626 return rv;
4627}
4628
4629static int set_disk_faulty(mddev_t *mddev, dev_t dev)
4630{
4631 mdk_rdev_t *rdev;
4632
4633 if (mddev->pers == NULL)
4634 return -ENODEV;
4635
4636 rdev = find_rdev(mddev, dev);
4637 if (!rdev)
4638 return -ENODEV;
4639
4640 md_error(mddev, rdev);
4641 return 0;
4642}
4643
4644static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
4645{
4646 mddev_t *mddev = bdev->bd_disk->private_data;
4647
4648 geo->heads = 2;
4649 geo->sectors = 4;
4650 geo->cylinders = get_capacity(mddev->gendisk) / 8;
4651 return 0;
4652}
4653
4654static int md_ioctl(struct inode *inode, struct file *file,
4655 unsigned int cmd, unsigned long arg)
4656{
4657 int err = 0;
4658 void __user *argp = (void __user *)arg;
4659 mddev_t *mddev = NULL;
4660
4661 if (!capable(CAP_SYS_ADMIN))
4662 return -EACCES;
4663
4664
4665
4666
4667
4668 switch (cmd)
4669 {
4670 case RAID_VERSION:
4671 err = get_version(argp);
4672 goto done;
4673
4674 case PRINT_RAID_DEBUG:
4675 err = 0;
4676 md_print_devices();
4677 goto done;
4678
4679#ifndef MODULE
4680 case RAID_AUTORUN:
4681 err = 0;
4682 autostart_arrays(arg);
4683 goto done;
4684#endif
4685 default:;
4686 }
4687
4688
4689
4690
4691
4692 mddev = inode->i_bdev->bd_disk->private_data;
4693
4694 if (!mddev) {
4695 BUG();
4696 goto abort;
4697 }
4698
4699 err = mddev_lock(mddev);
4700 if (err) {
4701 printk(KERN_INFO
4702 "md: ioctl lock interrupted, reason %d, cmd %d\n",
4703 err, cmd);
4704 goto abort;
4705 }
4706
4707 switch (cmd)
4708 {
4709 case SET_ARRAY_INFO:
4710 {
4711 mdu_array_info_t info;
4712 if (!arg)
4713 memset(&info, 0, sizeof(info));
4714 else if (copy_from_user(&info, argp, sizeof(info))) {
4715 err = -EFAULT;
4716 goto abort_unlock;
4717 }
4718 if (mddev->pers) {
4719 err = update_array_info(mddev, &info);
4720 if (err) {
4721 printk(KERN_WARNING "md: couldn't update"
4722 " array info. %d\n", err);
4723 goto abort_unlock;
4724 }
4725 goto done_unlock;
4726 }
4727 if (!list_empty(&mddev->disks)) {
4728 printk(KERN_WARNING
4729 "md: array %s already has disks!\n",
4730 mdname(mddev));
4731 err = -EBUSY;
4732 goto abort_unlock;
4733 }
4734 if (mddev->raid_disks) {
4735 printk(KERN_WARNING
4736 "md: array %s already initialised!\n",
4737 mdname(mddev));
4738 err = -EBUSY;
4739 goto abort_unlock;
4740 }
4741 err = set_array_info(mddev, &info);
4742 if (err) {
4743 printk(KERN_WARNING "md: couldn't set"
4744 " array info. %d\n", err);
4745 goto abort_unlock;
4746 }
4747 }
4748 goto done_unlock;
4749
4750 default:;
4751 }
4752
4753
4754
4755
4756
4757
4758 if ((!mddev->raid_disks && !mddev->external)
4759 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
4760 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
4761 && cmd != GET_BITMAP_FILE) {
4762 err = -ENODEV;
4763 goto abort_unlock;
4764 }
4765
4766
4767
4768
4769 switch (cmd)
4770 {
4771 case GET_ARRAY_INFO:
4772 err = get_array_info(mddev, argp);
4773 goto done_unlock;
4774
4775 case GET_BITMAP_FILE:
4776 err = get_bitmap_file(mddev, argp);
4777 goto done_unlock;
4778
4779 case GET_DISK_INFO:
4780 err = get_disk_info(mddev, argp);
4781 goto done_unlock;
4782
4783 case RESTART_ARRAY_RW:
4784 err = restart_array(mddev);
4785 goto done_unlock;
4786
4787 case STOP_ARRAY:
4788 err = do_md_stop (mddev, 0);
4789 goto done_unlock;
4790
4791 case STOP_ARRAY_RO:
4792 err = do_md_stop (mddev, 1);
4793 goto done_unlock;
4794
4795
4796
4797
4798
4799
4800
4801 }
4802
4803
4804
4805
4806
4807
4808
4809
4810 if (_IOC_TYPE(cmd) == MD_MAJOR &&
4811 mddev->ro && mddev->pers) {
4812 if (mddev->ro == 2) {
4813 mddev->ro = 0;
4814 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4815 md_wakeup_thread(mddev->thread);
4816
4817 } else {
4818 err = -EROFS;
4819 goto abort_unlock;
4820 }
4821 }
4822
4823 switch (cmd)
4824 {
4825 case ADD_NEW_DISK:
4826 {
4827 mdu_disk_info_t info;
4828 if (copy_from_user(&info, argp, sizeof(info)))
4829 err = -EFAULT;
4830 else
4831 err = add_new_disk(mddev, &info);
4832 goto done_unlock;
4833 }
4834
4835 case HOT_REMOVE_DISK:
4836 err = hot_remove_disk(mddev, new_decode_dev(arg));
4837 goto done_unlock;
4838
4839 case HOT_ADD_DISK:
4840 err = hot_add_disk(mddev, new_decode_dev(arg));
4841 goto done_unlock;
4842
4843 case SET_DISK_FAULTY:
4844 err = set_disk_faulty(mddev, new_decode_dev(arg));
4845 goto done_unlock;
4846
4847 case RUN_ARRAY:
4848 err = do_md_run (mddev);
4849 goto done_unlock;
4850
4851 case SET_BITMAP_FILE:
4852 err = set_bitmap_file(mddev, (int)arg);
4853 goto done_unlock;
4854
4855 default:
4856 err = -EINVAL;
4857 goto abort_unlock;
4858 }
4859
4860done_unlock:
4861abort_unlock:
4862 mddev_unlock(mddev);
4863
4864 return err;
4865done:
4866 if (err)
4867 MD_BUG();
4868abort:
4869 return err;
4870}
4871
4872static int md_open(struct inode *inode, struct file *file)
4873{
4874
4875
4876
4877
4878 mddev_t *mddev = inode->i_bdev->bd_disk->private_data;
4879 int err;
4880
4881 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
4882 goto out;
4883
4884 err = 0;
4885 mddev_get(mddev);
4886 mddev_unlock(mddev);
4887
4888 check_disk_change(inode->i_bdev);
4889 out:
4890 return err;
4891}
4892
4893static int md_release(struct inode *inode, struct file * file)
4894{
4895 mddev_t *mddev = inode->i_bdev->bd_disk->private_data;
4896
4897 BUG_ON(!mddev);
4898 mddev_put(mddev);
4899
4900 return 0;
4901}
4902
4903static int md_media_changed(struct gendisk *disk)
4904{
4905 mddev_t *mddev = disk->private_data;
4906
4907 return mddev->changed;
4908}
4909
4910static int md_revalidate(struct gendisk *disk)
4911{
4912 mddev_t *mddev = disk->private_data;
4913
4914 mddev->changed = 0;
4915 return 0;
4916}
4917static struct block_device_operations md_fops =
4918{
4919 .owner = THIS_MODULE,
4920 .open = md_open,
4921 .release = md_release,
4922 .ioctl = md_ioctl,
4923 .getgeo = md_getgeo,
4924 .media_changed = md_media_changed,
4925 .revalidate_disk= md_revalidate,
4926};
4927
4928static int md_thread(void * arg)
4929{
4930 mdk_thread_t *thread = arg;
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944 allow_signal(SIGKILL);
4945 while (!kthread_should_stop()) {
4946
4947
4948
4949
4950
4951
4952 if (signal_pending(current))
4953 flush_signals(current);
4954
4955 wait_event_interruptible_timeout
4956 (thread->wqueue,
4957 test_bit(THREAD_WAKEUP, &thread->flags)
4958 || kthread_should_stop(),
4959 thread->timeout);
4960
4961 clear_bit(THREAD_WAKEUP, &thread->flags);
4962
4963 thread->run(thread->mddev);
4964 }
4965
4966 return 0;
4967}
4968
4969void md_wakeup_thread(mdk_thread_t *thread)
4970{
4971 if (thread) {
4972 dprintk("md: waking up MD thread %s.\n", thread->tsk->comm);
4973 set_bit(THREAD_WAKEUP, &thread->flags);
4974 wake_up(&thread->wqueue);
4975 }
4976}
4977
4978mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
4979 const char *name)
4980{
4981 mdk_thread_t *thread;
4982
4983 thread = kzalloc(sizeof(mdk_thread_t), GFP_KERNEL);
4984 if (!thread)
4985 return NULL;
4986
4987 init_waitqueue_head(&thread->wqueue);
4988
4989 thread->run = run;
4990 thread->mddev = mddev;
4991 thread->timeout = MAX_SCHEDULE_TIMEOUT;
4992 thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev));
4993 if (IS_ERR(thread->tsk)) {
4994 kfree(thread);
4995 return NULL;
4996 }
4997 return thread;
4998}
4999
5000void md_unregister_thread(mdk_thread_t *thread)
5001{
5002 dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
5003
5004 kthread_stop(thread->tsk);
5005 kfree(thread);
5006}
5007
5008void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
5009{
5010 if (!mddev) {
5011 MD_BUG();
5012 return;
5013 }
5014
5015 if (!rdev || test_bit(Faulty, &rdev->flags))
5016 return;
5017
5018 if (mddev->external)
5019 set_bit(Blocked, &rdev->flags);
5020
5021
5022
5023
5024
5025
5026
5027 if (!mddev->pers)
5028 return;
5029 if (!mddev->pers->error_handler)
5030 return;
5031 mddev->pers->error_handler(mddev,rdev);
5032 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5033 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5034 md_wakeup_thread(mddev->thread);
5035 md_new_event_inintr(mddev);
5036}
5037
5038
5039
5040static void status_unused(struct seq_file *seq)
5041{
5042 int i = 0;
5043 mdk_rdev_t *rdev;
5044 struct list_head *tmp;
5045
5046 seq_printf(seq, "unused devices: ");
5047
5048 rdev_for_each_list(rdev, tmp, pending_raid_disks) {
5049 char b[BDEVNAME_SIZE];
5050 i++;
5051 seq_printf(seq, "%s ",
5052 bdevname(rdev->bdev,b));
5053 }
5054 if (!i)
5055 seq_printf(seq, "<none>");
5056
5057 seq_printf(seq, "\n");
5058}
5059
5060
5061static void status_resync(struct seq_file *seq, mddev_t * mddev)
5062{
5063 sector_t max_blocks, resync, res;
5064 unsigned long dt, db, rt;
5065 int scale;
5066 unsigned int per_milli;
5067
5068 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
5069
5070 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
5071 max_blocks = mddev->resync_max_sectors >> 1;
5072 else
5073 max_blocks = mddev->size;
5074
5075
5076
5077
5078 if (!max_blocks) {
5079 MD_BUG();
5080 return;
5081 }
5082
5083
5084
5085
5086
5087 scale = 10;
5088 if (sizeof(sector_t) > sizeof(unsigned long)) {
5089 while ( max_blocks/2 > (1ULL<<(scale+32)))
5090 scale++;
5091 }
5092 res = (resync>>scale)*1000;
5093 sector_div(res, (u32)((max_blocks>>scale)+1));
5094
5095 per_milli = res;
5096 {
5097 int i, x = per_milli/50, y = 20-x;
5098 seq_printf(seq, "[");
5099 for (i = 0; i < x; i++)
5100 seq_printf(seq, "=");
5101 seq_printf(seq, ">");
5102 for (i = 0; i < y; i++)
5103 seq_printf(seq, ".");
5104 seq_printf(seq, "] ");
5105 }
5106 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
5107 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
5108 "reshape" :
5109 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
5110 "check" :
5111 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
5112 "resync" : "recovery"))),
5113 per_milli/10, per_milli % 10,
5114 (unsigned long long) resync,
5115 (unsigned long long) max_blocks);
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126 dt = ((jiffies - mddev->resync_mark) / HZ);
5127 if (!dt) dt++;
5128 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
5129 - mddev->resync_mark_cnt;
5130 rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100;
5131
5132 seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
5133
5134 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
5135}
5136
5137static void *md_seq_start(struct seq_file *seq, loff_t *pos)
5138{
5139 struct list_head *tmp;
5140 loff_t l = *pos;
5141 mddev_t *mddev;
5142
5143 if (l >= 0x10000)
5144 return NULL;
5145 if (!l--)
5146
5147 return (void*)1;
5148
5149 spin_lock(&all_mddevs_lock);
5150 list_for_each(tmp,&all_mddevs)
5151 if (!l--) {
5152 mddev = list_entry(tmp, mddev_t, all_mddevs);
5153 mddev_get(mddev);
5154 spin_unlock(&all_mddevs_lock);
5155 return mddev;
5156 }
5157 spin_unlock(&all_mddevs_lock);
5158 if (!l--)
5159 return (void*)2;
5160 return NULL;
5161}
5162
5163static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
5164{
5165 struct list_head *tmp;
5166 mddev_t *next_mddev, *mddev = v;
5167
5168 ++*pos;
5169 if (v == (void*)2)
5170 return NULL;
5171
5172 spin_lock(&all_mddevs_lock);
5173 if (v == (void*)1)
5174 tmp = all_mddevs.next;
5175 else
5176 tmp = mddev->all_mddevs.next;
5177 if (tmp != &all_mddevs)
5178 next_mddev = mddev_get(list_entry(tmp,mddev_t,all_mddevs));
5179 else {
5180 next_mddev = (void*)2;
5181 *pos = 0x10000;
5182 }
5183 spin_unlock(&all_mddevs_lock);
5184
5185 if (v != (void*)1)
5186 mddev_put(mddev);
5187 return next_mddev;
5188
5189}
5190
5191static void md_seq_stop(struct seq_file *seq, void *v)
5192{
5193 mddev_t *mddev = v;
5194
5195 if (mddev && v != (void*)1 && v != (void*)2)
5196 mddev_put(mddev);
5197}
5198
5199struct mdstat_info {
5200 int event;
5201};
5202
5203static int md_seq_show(struct seq_file *seq, void *v)
5204{
5205 mddev_t *mddev = v;
5206 sector_t size;
5207 struct list_head *tmp2;
5208 mdk_rdev_t *rdev;
5209 struct mdstat_info *mi = seq->private;
5210 struct bitmap *bitmap;
5211
5212 if (v == (void*)1) {
5213 struct mdk_personality *pers;
5214 seq_printf(seq, "Personalities : ");
5215 spin_lock(&pers_lock);
5216 list_for_each_entry(pers, &pers_list, list)
5217 seq_printf(seq, "[%s] ", pers->name);
5218
5219 spin_unlock(&pers_lock);
5220 seq_printf(seq, "\n");
5221 mi->event = atomic_read(&md_event_count);
5222 return 0;
5223 }
5224 if (v == (void*)2) {
5225 status_unused(seq);
5226 return 0;
5227 }
5228
5229 if (mddev_lock(mddev) < 0)
5230 return -EINTR;
5231
5232 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
5233 seq_printf(seq, "%s : %sactive", mdname(mddev),
5234 mddev->pers ? "" : "in");
5235 if (mddev->pers) {
5236 if (mddev->ro==1)
5237 seq_printf(seq, " (read-only)");
5238 if (mddev->ro==2)
5239 seq_printf(seq, " (auto-read-only)");
5240 seq_printf(seq, " %s", mddev->pers->name);
5241 }
5242
5243 size = 0;
5244 rdev_for_each(rdev, tmp2, mddev) {
5245 char b[BDEVNAME_SIZE];
5246 seq_printf(seq, " %s[%d]",
5247 bdevname(rdev->bdev,b), rdev->desc_nr);
5248 if (test_bit(WriteMostly, &rdev->flags))
5249 seq_printf(seq, "(W)");
5250 if (test_bit(Faulty, &rdev->flags)) {
5251 seq_printf(seq, "(F)");
5252 continue;
5253 } else if (rdev->raid_disk < 0)
5254 seq_printf(seq, "(S)");
5255 size += rdev->size;
5256 }
5257
5258 if (!list_empty(&mddev->disks)) {
5259 if (mddev->pers)
5260 seq_printf(seq, "\n %llu blocks",
5261 (unsigned long long)mddev->array_size);
5262 else
5263 seq_printf(seq, "\n %llu blocks",
5264 (unsigned long long)size);
5265 }
5266 if (mddev->persistent) {
5267 if (mddev->major_version != 0 ||
5268 mddev->minor_version != 90) {
5269 seq_printf(seq," super %d.%d",
5270 mddev->major_version,
5271 mddev->minor_version);
5272 }
5273 } else if (mddev->external)
5274 seq_printf(seq, " super external:%s",
5275 mddev->metadata_type);
5276 else
5277 seq_printf(seq, " super non-persistent");
5278
5279 if (mddev->pers) {
5280 mddev->pers->status (seq, mddev);
5281 seq_printf(seq, "\n ");
5282 if (mddev->pers->sync_request) {
5283 if (mddev->curr_resync > 2) {
5284 status_resync (seq, mddev);
5285 seq_printf(seq, "\n ");
5286 } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2)
5287 seq_printf(seq, "\tresync=DELAYED\n ");
5288 else if (mddev->recovery_cp < MaxSector)
5289 seq_printf(seq, "\tresync=PENDING\n ");
5290 }
5291 } else
5292 seq_printf(seq, "\n ");
5293
5294 if ((bitmap = mddev->bitmap)) {
5295 unsigned long chunk_kb;
5296 unsigned long flags;
5297 spin_lock_irqsave(&bitmap->lock, flags);
5298 chunk_kb = bitmap->chunksize >> 10;
5299 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
5300 "%lu%s chunk",
5301 bitmap->pages - bitmap->missing_pages,
5302 bitmap->pages,
5303 (bitmap->pages - bitmap->missing_pages)
5304 << (PAGE_SHIFT - 10),
5305 chunk_kb ? chunk_kb : bitmap->chunksize,
5306 chunk_kb ? "KB" : "B");
5307 if (bitmap->file) {
5308 seq_printf(seq, ", file: ");
5309 seq_path(seq, &bitmap->file->f_path, " \t\n");
5310 }
5311
5312 seq_printf(seq, "\n");
5313 spin_unlock_irqrestore(&bitmap->lock, flags);
5314 }
5315
5316 seq_printf(seq, "\n");
5317 }
5318 mddev_unlock(mddev);
5319
5320 return 0;
5321}
5322
5323static struct seq_operations md_seq_ops = {
5324 .start = md_seq_start,
5325 .next = md_seq_next,
5326 .stop = md_seq_stop,
5327 .show = md_seq_show,
5328};
5329
5330static int md_seq_open(struct inode *inode, struct file *file)
5331{
5332 int error;
5333 struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL);
5334 if (mi == NULL)
5335 return -ENOMEM;
5336
5337 error = seq_open(file, &md_seq_ops);
5338 if (error)
5339 kfree(mi);
5340 else {
5341 struct seq_file *p = file->private_data;
5342 p->private = mi;
5343 mi->event = atomic_read(&md_event_count);
5344 }
5345 return error;
5346}
5347
5348static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
5349{
5350 struct seq_file *m = filp->private_data;
5351 struct mdstat_info *mi = m->private;
5352 int mask;
5353
5354 poll_wait(filp, &md_event_waiters, wait);
5355
5356
5357 mask = POLLIN | POLLRDNORM;
5358
5359 if (mi->event != atomic_read(&md_event_count))
5360 mask |= POLLERR | POLLPRI;
5361 return mask;
5362}
5363
5364static const struct file_operations md_seq_fops = {
5365 .owner = THIS_MODULE,
5366 .open = md_seq_open,
5367 .read = seq_read,
5368 .llseek = seq_lseek,
5369 .release = seq_release_private,
5370 .poll = mdstat_poll,
5371};
5372
5373int register_md_personality(struct mdk_personality *p)
5374{
5375 spin_lock(&pers_lock);
5376 list_add_tail(&p->list, &pers_list);
5377 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
5378 spin_unlock(&pers_lock);
5379 return 0;
5380}
5381
5382int unregister_md_personality(struct mdk_personality *p)
5383{
5384 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
5385 spin_lock(&pers_lock);
5386 list_del_init(&p->list);
5387 spin_unlock(&pers_lock);
5388 return 0;
5389}
5390
5391static int is_mddev_idle(mddev_t *mddev)
5392{
5393 mdk_rdev_t * rdev;
5394 struct list_head *tmp;
5395 int idle;
5396 long curr_events;
5397
5398 idle = 1;
5399 rdev_for_each(rdev, tmp, mddev) {
5400 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
5401 curr_events = disk_stat_read(disk, sectors[0]) +
5402 disk_stat_read(disk, sectors[1]) -
5403 atomic_read(&disk->sync_io);
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426 if (curr_events - rdev->last_events > 4096) {
5427 rdev->last_events = curr_events;
5428 idle = 0;
5429 }
5430 }
5431 return idle;
5432}
5433
5434void md_done_sync(mddev_t *mddev, int blocks, int ok)
5435{
5436
5437 atomic_sub(blocks, &mddev->recovery_active);
5438 wake_up(&mddev->recovery_wait);
5439 if (!ok) {
5440 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5441 md_wakeup_thread(mddev->thread);
5442
5443 }
5444}
5445
5446
5447
5448
5449
5450
5451
5452void md_write_start(mddev_t *mddev, struct bio *bi)
5453{
5454 if (bio_data_dir(bi) != WRITE)
5455 return;
5456
5457 BUG_ON(mddev->ro == 1);
5458 if (mddev->ro == 2) {
5459
5460 mddev->ro = 0;
5461 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5462 md_wakeup_thread(mddev->thread);
5463 md_wakeup_thread(mddev->sync_thread);
5464 }
5465 atomic_inc(&mddev->writes_pending);
5466 if (mddev->safemode == 1)
5467 mddev->safemode = 0;
5468 if (mddev->in_sync) {
5469 spin_lock_irq(&mddev->write_lock);
5470 if (mddev->in_sync) {
5471 mddev->in_sync = 0;
5472 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
5473 md_wakeup_thread(mddev->thread);
5474 }
5475 spin_unlock_irq(&mddev->write_lock);
5476 sysfs_notify(&mddev->kobj, NULL, "array_state");
5477 }
5478 wait_event(mddev->sb_wait,
5479 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
5480 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
5481}
5482
5483void md_write_end(mddev_t *mddev)
5484{
5485 if (atomic_dec_and_test(&mddev->writes_pending)) {
5486 if (mddev->safemode == 2)
5487 md_wakeup_thread(mddev->thread);
5488 else if (mddev->safemode_delay)
5489 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
5490 }
5491}
5492
5493
5494
5495
5496
5497
5498
5499void md_allow_write(mddev_t *mddev)
5500{
5501 if (!mddev->pers)
5502 return;
5503 if (mddev->ro)
5504 return;
5505
5506 spin_lock_irq(&mddev->write_lock);
5507 if (mddev->in_sync) {
5508 mddev->in_sync = 0;
5509 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
5510 if (mddev->safemode_delay &&
5511 mddev->safemode == 0)
5512 mddev->safemode = 1;
5513 spin_unlock_irq(&mddev->write_lock);
5514 md_update_sb(mddev, 0);
5515
5516 sysfs_notify(&mddev->kobj, NULL, "array_state");
5517
5518 wait_event(mddev->sb_wait,
5519 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
5520 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
5521 } else
5522 spin_unlock_irq(&mddev->write_lock);
5523}
5524EXPORT_SYMBOL_GPL(md_allow_write);
5525
5526#define SYNC_MARKS 10
5527#define SYNC_MARK_STEP (3*HZ)
5528void md_do_sync(mddev_t *mddev)
5529{
5530 mddev_t *mddev2;
5531 unsigned int currspeed = 0,
5532 window;
5533 sector_t max_sectors,j, io_sectors;
5534 unsigned long mark[SYNC_MARKS];
5535 sector_t mark_cnt[SYNC_MARKS];
5536 int last_mark,m;
5537 struct list_head *tmp;
5538 sector_t last_check;
5539 int skipped = 0;
5540 struct list_head *rtmp;
5541 mdk_rdev_t *rdev;
5542 char *desc;
5543
5544
5545 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
5546 return;
5547 if (mddev->ro)
5548 return;
5549
5550 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
5551 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
5552 desc = "data-check";
5553 else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
5554 desc = "requested-resync";
5555 else
5556 desc = "resync";
5557 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5558 desc = "reshape";
5559 else
5560 desc = "recovery";
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578 do {
5579 mddev->curr_resync = 2;
5580
5581 try_again:
5582 if (kthread_should_stop()) {
5583 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5584 goto skip;
5585 }
5586 for_each_mddev(mddev2, tmp) {
5587 if (mddev2 == mddev)
5588 continue;
5589 if (!mddev->parallel_resync
5590 && mddev2->curr_resync
5591 && match_mddev_units(mddev, mddev2)) {
5592 DEFINE_WAIT(wq);
5593 if (mddev < mddev2 && mddev->curr_resync == 2) {
5594
5595 mddev->curr_resync = 1;
5596 wake_up(&resync_wait);
5597 }
5598 if (mddev > mddev2 && mddev->curr_resync == 1)
5599
5600
5601
5602 continue;
5603 prepare_to_wait(&resync_wait, &wq, TASK_UNINTERRUPTIBLE);
5604 if (!kthread_should_stop() &&
5605 mddev2->curr_resync >= mddev->curr_resync) {
5606 printk(KERN_INFO "md: delaying %s of %s"
5607 " until %s has finished (they"
5608 " share one or more physical units)\n",
5609 desc, mdname(mddev), mdname(mddev2));
5610 mddev_put(mddev2);
5611 schedule();
5612 finish_wait(&resync_wait, &wq);
5613 goto try_again;
5614 }
5615 finish_wait(&resync_wait, &wq);
5616 }
5617 }
5618 } while (mddev->curr_resync < 2);
5619
5620 j = 0;
5621 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
5622
5623
5624
5625 max_sectors = mddev->resync_max_sectors;
5626 mddev->resync_mismatches = 0;
5627
5628 if (!mddev->bitmap &&
5629 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
5630 j = mddev->recovery_cp;
5631 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5632 max_sectors = mddev->size << 1;
5633 else {
5634
5635 max_sectors = mddev->size << 1;
5636 j = MaxSector;
5637 rdev_for_each(rdev, rtmp, mddev)
5638 if (rdev->raid_disk >= 0 &&
5639 !test_bit(Faulty, &rdev->flags) &&
5640 !test_bit(In_sync, &rdev->flags) &&
5641 rdev->recovery_offset < j)
5642 j = rdev->recovery_offset;
5643 }
5644
5645 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
5646 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
5647 " %d KB/sec/disk.\n", speed_min(mddev));
5648 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
5649 "(but not more than %d KB/sec) for %s.\n",
5650 speed_max(mddev), desc);
5651
5652 is_mddev_idle(mddev);
5653
5654 io_sectors = 0;
5655 for (m = 0; m < SYNC_MARKS; m++) {
5656 mark[m] = jiffies;
5657 mark_cnt[m] = io_sectors;
5658 }
5659 last_mark = 0;
5660 mddev->resync_mark = mark[last_mark];
5661 mddev->resync_mark_cnt = mark_cnt[last_mark];
5662
5663
5664
5665
5666 window = 32*(PAGE_SIZE/512);
5667 printk(KERN_INFO "md: using %dk window, over a total of %llu blocks.\n",
5668 window/2,(unsigned long long) max_sectors/2);
5669
5670 atomic_set(&mddev->recovery_active, 0);
5671 last_check = 0;
5672
5673 if (j>2) {
5674 printk(KERN_INFO
5675 "md: resuming %s of %s from checkpoint.\n",
5676 desc, mdname(mddev));
5677 mddev->curr_resync = j;
5678 }
5679
5680 while (j < max_sectors) {
5681 sector_t sectors;
5682
5683 skipped = 0;
5684 if (j >= mddev->resync_max) {
5685 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
5686 wait_event(mddev->recovery_wait,
5687 mddev->resync_max > j
5688 || kthread_should_stop());
5689 }
5690 if (kthread_should_stop())
5691 goto interrupted;
5692 sectors = mddev->pers->sync_request(mddev, j, &skipped,
5693 currspeed < speed_min(mddev));
5694 if (sectors == 0) {
5695 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5696 goto out;
5697 }
5698
5699 if (!skipped) {
5700 io_sectors += sectors;
5701 atomic_add(sectors, &mddev->recovery_active);
5702 }
5703
5704 j += sectors;
5705 if (j>1) mddev->curr_resync = j;
5706 mddev->curr_mark_cnt = io_sectors;
5707 if (last_check == 0)
5708
5709
5710
5711 md_new_event(mddev);
5712
5713 if (last_check + window > io_sectors || j == max_sectors)
5714 continue;
5715
5716 last_check = io_sectors;
5717
5718 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
5719 break;
5720
5721 repeat:
5722 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
5723
5724 int next = (last_mark+1) % SYNC_MARKS;
5725
5726 mddev->resync_mark = mark[next];
5727 mddev->resync_mark_cnt = mark_cnt[next];
5728 mark[next] = jiffies;
5729 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
5730 last_mark = next;
5731 }
5732
5733
5734 if (kthread_should_stop())
5735 goto interrupted;
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746 blk_unplug(mddev->queue);
5747 cond_resched();
5748
5749 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
5750 /((jiffies-mddev->resync_mark)/HZ +1) +1;
5751
5752 if (currspeed > speed_min(mddev)) {
5753 if ((currspeed > speed_max(mddev)) ||
5754 !is_mddev_idle(mddev)) {
5755 msleep(500);
5756 goto repeat;
5757 }
5758 }
5759 }
5760 printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
5761
5762
5763
5764 out:
5765 blk_unplug(mddev->queue);
5766
5767 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
5768
5769
5770 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
5771
5772 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
5773 mddev->curr_resync > 2) {
5774 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
5775 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
5776 if (mddev->curr_resync >= mddev->recovery_cp) {
5777 printk(KERN_INFO
5778 "md: checkpointing %s of %s.\n",
5779 desc, mdname(mddev));
5780 mddev->recovery_cp = mddev->curr_resync;
5781 }
5782 } else
5783 mddev->recovery_cp = MaxSector;
5784 } else {
5785 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
5786 mddev->curr_resync = MaxSector;
5787 rdev_for_each(rdev, rtmp, mddev)
5788 if (rdev->raid_disk >= 0 &&
5789 !test_bit(Faulty, &rdev->flags) &&
5790 !test_bit(In_sync, &rdev->flags) &&
5791 rdev->recovery_offset < mddev->curr_resync)
5792 rdev->recovery_offset = mddev->curr_resync;
5793 }
5794 }
5795 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5796
5797 skip:
5798 mddev->curr_resync = 0;
5799 mddev->resync_max = MaxSector;
5800 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
5801 wake_up(&resync_wait);
5802 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
5803 md_wakeup_thread(mddev->thread);
5804 return;
5805
5806 interrupted:
5807
5808
5809
5810 printk(KERN_INFO
5811 "md: md_do_sync() got signal ... exiting\n");
5812 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5813 goto out;
5814
5815}
5816EXPORT_SYMBOL_GPL(md_do_sync);
5817
5818
5819static int remove_and_add_spares(mddev_t *mddev)
5820{
5821 mdk_rdev_t *rdev;
5822 struct list_head *rtmp;
5823 int spares = 0;
5824
5825 rdev_for_each(rdev, rtmp, mddev)
5826 if (rdev->raid_disk >= 0 &&
5827 !test_bit(Blocked, &rdev->flags) &&
5828 (test_bit(Faulty, &rdev->flags) ||
5829 ! test_bit(In_sync, &rdev->flags)) &&
5830 atomic_read(&rdev->nr_pending)==0) {
5831 if (mddev->pers->hot_remove_disk(
5832 mddev, rdev->raid_disk)==0) {
5833 char nm[20];
5834 sprintf(nm,"rd%d", rdev->raid_disk);
5835 sysfs_remove_link(&mddev->kobj, nm);
5836 rdev->raid_disk = -1;
5837 }
5838 }
5839
5840 if (mddev->degraded) {
5841 rdev_for_each(rdev, rtmp, mddev) {
5842 if (rdev->raid_disk >= 0 &&
5843 !test_bit(In_sync, &rdev->flags))
5844 spares++;
5845 if (rdev->raid_disk < 0
5846 && !test_bit(Faulty, &rdev->flags)) {
5847 rdev->recovery_offset = 0;
5848 if (mddev->pers->hot_add_disk(mddev,rdev)) {
5849 char nm[20];
5850 sprintf(nm, "rd%d", rdev->raid_disk);
5851 if (sysfs_create_link(&mddev->kobj,
5852 &rdev->kobj, nm))
5853 printk(KERN_WARNING
5854 "md: cannot register "
5855 "%s for %s\n",
5856 nm, mdname(mddev));
5857 spares++;
5858 md_new_event(mddev);
5859 } else
5860 break;
5861 }
5862 }
5863 }
5864 return spares;
5865}
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888void md_check_recovery(mddev_t *mddev)
5889{
5890 mdk_rdev_t *rdev;
5891 struct list_head *rtmp;
5892
5893
5894 if (mddev->bitmap)
5895 bitmap_daemon_work(mddev->bitmap);
5896
5897 if (mddev->ro)
5898 return;
5899
5900 if (signal_pending(current)) {
5901 if (mddev->pers->sync_request && !mddev->external) {
5902 printk(KERN_INFO "md: %s in immediate safe mode\n",
5903 mdname(mddev));
5904 mddev->safemode = 2;
5905 }
5906 flush_signals(current);
5907 }
5908
5909 if ( ! (
5910 (mddev->flags && !mddev->external) ||
5911 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
5912 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
5913 (mddev->external == 0 && mddev->safemode == 1) ||
5914 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
5915 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
5916 ))
5917 return;
5918
5919 if (mddev_trylock(mddev)) {
5920 int spares = 0;
5921
5922 if (!mddev->external) {
5923 spin_lock_irq(&mddev->write_lock);
5924 if (mddev->safemode &&
5925 !atomic_read(&mddev->writes_pending) &&
5926 !mddev->in_sync &&
5927 mddev->recovery_cp == MaxSector) {
5928 mddev->in_sync = 1;
5929 if (mddev->persistent)
5930 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
5931 }
5932 if (mddev->safemode == 1)
5933 mddev->safemode = 0;
5934 spin_unlock_irq(&mddev->write_lock);
5935 }
5936
5937 if (mddev->flags)
5938 md_update_sb(mddev, 0);
5939
5940
5941 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
5942 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
5943
5944 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5945 goto unlock;
5946 }
5947 if (mddev->sync_thread) {
5948
5949 md_unregister_thread(mddev->sync_thread);
5950 mddev->sync_thread = NULL;
5951 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
5952
5953
5954 mddev->pers->spare_active(mddev);
5955 }
5956 md_update_sb(mddev, 1);
5957
5958
5959
5960
5961 if (!mddev->degraded)
5962 rdev_for_each(rdev, rtmp, mddev)
5963 rdev->saved_raid_disk = -1;
5964
5965 mddev->recovery = 0;
5966
5967 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5968 md_new_event(mddev);
5969 goto unlock;
5970 }
5971
5972
5973
5974 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5975 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
5976 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
5977
5978 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
5979 goto unlock;
5980
5981
5982
5983
5984
5985
5986
5987 if (mddev->reshape_position != MaxSector) {
5988 if (mddev->pers->check_reshape(mddev) != 0)
5989
5990 goto unlock;
5991 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
5992 } else if ((spares = remove_and_add_spares(mddev))) {
5993 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
5994 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
5995 } else if (mddev->recovery_cp < MaxSector) {
5996 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
5997 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
5998
5999 goto unlock;
6000
6001 if (mddev->pers->sync_request) {
6002 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
6003 if (spares && mddev->bitmap && ! mddev->bitmap->file) {
6004
6005
6006
6007
6008 bitmap_write_all(mddev->bitmap);
6009 }
6010 mddev->sync_thread = md_register_thread(md_do_sync,
6011 mddev,
6012 "%s_resync");
6013 if (!mddev->sync_thread) {
6014 printk(KERN_ERR "%s: could not start resync"
6015 " thread...\n",
6016 mdname(mddev));
6017
6018 mddev->recovery = 0;
6019 } else
6020 md_wakeup_thread(mddev->sync_thread);
6021 md_new_event(mddev);
6022 }
6023 unlock:
6024 mddev_unlock(mddev);
6025 }
6026}
6027
6028void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
6029{
6030 sysfs_notify(&rdev->kobj, NULL, "state");
6031 wait_event_timeout(rdev->blocked_wait,
6032 !test_bit(Blocked, &rdev->flags),
6033 msecs_to_jiffies(5000));
6034 rdev_dec_pending(rdev, mddev);
6035}
6036EXPORT_SYMBOL(md_wait_for_blocked_rdev);
6037
6038static int md_notify_reboot(struct notifier_block *this,
6039 unsigned long code, void *x)
6040{
6041 struct list_head *tmp;
6042 mddev_t *mddev;
6043
6044 if ((code == SYS_DOWN) || (code == SYS_HALT) || (code == SYS_POWER_OFF)) {
6045
6046 printk(KERN_INFO "md: stopping all md devices.\n");
6047
6048 for_each_mddev(mddev, tmp)
6049 if (mddev_trylock(mddev)) {
6050 do_md_stop (mddev, 1);
6051 mddev_unlock(mddev);
6052 }
6053
6054
6055
6056
6057
6058
6059 mdelay(1000*1);
6060 }
6061 return NOTIFY_DONE;
6062}
6063
6064static struct notifier_block md_notifier = {
6065 .notifier_call = md_notify_reboot,
6066 .next = NULL,
6067 .priority = INT_MAX,
6068};
6069
6070static void md_geninit(void)
6071{
6072 dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
6073
6074 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
6075}
6076
6077static int __init md_init(void)
6078{
6079 if (register_blkdev(MAJOR_NR, "md"))
6080 return -1;
6081 if ((mdp_major=register_blkdev(0, "mdp"))<=0) {
6082 unregister_blkdev(MAJOR_NR, "md");
6083 return -1;
6084 }
6085 blk_register_region(MKDEV(MAJOR_NR, 0), 1UL<<MINORBITS, THIS_MODULE,
6086 md_probe, NULL, NULL);
6087 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
6088 md_probe, NULL, NULL);
6089
6090 register_reboot_notifier(&md_notifier);
6091 raid_table_header = register_sysctl_table(raid_root_table);
6092
6093 md_geninit();
6094 return (0);
6095}
6096
6097
6098#ifndef MODULE
6099
6100
6101
6102
6103
6104
6105static LIST_HEAD(all_detected_devices);
6106struct detected_devices_node {
6107 struct list_head list;
6108 dev_t dev;
6109};
6110
6111void md_autodetect_dev(dev_t dev)
6112{
6113 struct detected_devices_node *node_detected_dev;
6114
6115 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
6116 if (node_detected_dev) {
6117 node_detected_dev->dev = dev;
6118 list_add_tail(&node_detected_dev->list, &all_detected_devices);
6119 } else {
6120 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
6121 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
6122 }
6123}
6124
6125
6126static void autostart_arrays(int part)
6127{
6128 mdk_rdev_t *rdev;
6129 struct detected_devices_node *node_detected_dev;
6130 dev_t dev;
6131 int i_scanned, i_passed;
6132
6133 i_scanned = 0;
6134 i_passed = 0;
6135
6136 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
6137
6138 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
6139 i_scanned++;
6140 node_detected_dev = list_entry(all_detected_devices.next,
6141 struct detected_devices_node, list);
6142 list_del(&node_detected_dev->list);
6143 dev = node_detected_dev->dev;
6144 kfree(node_detected_dev);
6145 rdev = md_import_device(dev,0, 90);
6146 if (IS_ERR(rdev))
6147 continue;
6148
6149 if (test_bit(Faulty, &rdev->flags)) {
6150 MD_BUG();
6151 continue;
6152 }
6153 set_bit(AutoDetected, &rdev->flags);
6154 list_add(&rdev->same_set, &pending_raid_disks);
6155 i_passed++;
6156 }
6157
6158 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
6159 i_scanned, i_passed);
6160
6161 autorun_devices(part);
6162}
6163
6164#endif
6165
6166static __exit void md_exit(void)
6167{
6168 mddev_t *mddev;
6169 struct list_head *tmp;
6170
6171 blk_unregister_region(MKDEV(MAJOR_NR,0), 1U << MINORBITS);
6172 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
6173
6174 unregister_blkdev(MAJOR_NR,"md");
6175 unregister_blkdev(mdp_major, "mdp");
6176 unregister_reboot_notifier(&md_notifier);
6177 unregister_sysctl_table(raid_table_header);
6178 remove_proc_entry("mdstat", NULL);
6179 for_each_mddev(mddev, tmp) {
6180 struct gendisk *disk = mddev->gendisk;
6181 if (!disk)
6182 continue;
6183 export_array(mddev);
6184 del_gendisk(disk);
6185 put_disk(disk);
6186 mddev->gendisk = NULL;
6187 mddev_put(mddev);
6188 }
6189}
6190
6191subsys_initcall(md_init);
6192module_exit(md_exit)
6193
6194static int get_ro(char *buffer, struct kernel_param *kp)
6195{
6196 return sprintf(buffer, "%d", start_readonly);
6197}
6198static int set_ro(const char *val, struct kernel_param *kp)
6199{
6200 char *e;
6201 int num = simple_strtoul(val, &e, 10);
6202 if (*val && (*e == '\0' || *e == '\n')) {
6203 start_readonly = num;
6204 return 0;
6205 }
6206 return -EINVAL;
6207}
6208
6209module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
6210module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
6211
6212
6213EXPORT_SYMBOL(register_md_personality);
6214EXPORT_SYMBOL(unregister_md_personality);
6215EXPORT_SYMBOL(md_error);
6216EXPORT_SYMBOL(md_done_sync);
6217EXPORT_SYMBOL(md_write_start);
6218EXPORT_SYMBOL(md_write_end);
6219EXPORT_SYMBOL(md_register_thread);
6220EXPORT_SYMBOL(md_unregister_thread);
6221EXPORT_SYMBOL(md_wakeup_thread);
6222EXPORT_SYMBOL(md_check_recovery);
6223MODULE_LICENSE("GPL");
6224MODULE_ALIAS("md");
6225MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
6226