1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/raid/md.h>
37#include <linux/raid/bitmap.h>
38#include <linux/sysctl.h>
39#include <linux/buffer_head.h>
40#include <linux/poll.h>
41#include <linux/ctype.h>
42#include <linux/hdreg.h>
43#include <linux/proc_fs.h>
44#include <linux/random.h>
45#include <linux/reboot.h>
46#include <linux/file.h>
47#include <linux/delay.h>
48
49#define MAJOR_NR MD_MAJOR
50
51
52#define MdpMinorShift 6
53
54#define DEBUG 0
55#define dprintk(x...) ((void)(DEBUG && printk(x)))
56
57
58#ifndef MODULE
59static void autostart_arrays(int part);
60#endif
61
62static LIST_HEAD(pers_list);
63static DEFINE_SPINLOCK(pers_lock);
64
65static void md_print_devices(void);
66
67static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
68
69#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84static int sysctl_speed_limit_min = 1000;
85static int sysctl_speed_limit_max = 200000;
86static inline int speed_min(mddev_t *mddev)
87{
88 return mddev->sync_speed_min ?
89 mddev->sync_speed_min : sysctl_speed_limit_min;
90}
91
92static inline int speed_max(mddev_t *mddev)
93{
94 return mddev->sync_speed_max ?
95 mddev->sync_speed_max : sysctl_speed_limit_max;
96}
97
98static struct ctl_table_header *raid_table_header;
99
100static ctl_table raid_table[] = {
101 {
102 .ctl_name = DEV_RAID_SPEED_LIMIT_MIN,
103 .procname = "speed_limit_min",
104 .data = &sysctl_speed_limit_min,
105 .maxlen = sizeof(int),
106 .mode = S_IRUGO|S_IWUSR,
107 .proc_handler = &proc_dointvec,
108 },
109 {
110 .ctl_name = DEV_RAID_SPEED_LIMIT_MAX,
111 .procname = "speed_limit_max",
112 .data = &sysctl_speed_limit_max,
113 .maxlen = sizeof(int),
114 .mode = S_IRUGO|S_IWUSR,
115 .proc_handler = &proc_dointvec,
116 },
117 { .ctl_name = 0 }
118};
119
120static ctl_table raid_dir_table[] = {
121 {
122 .ctl_name = DEV_RAID,
123 .procname = "raid",
124 .maxlen = 0,
125 .mode = S_IRUGO|S_IXUGO,
126 .child = raid_table,
127 },
128 { .ctl_name = 0 }
129};
130
131static ctl_table raid_root_table[] = {
132 {
133 .ctl_name = CTL_DEV,
134 .procname = "dev",
135 .maxlen = 0,
136 .mode = 0555,
137 .child = raid_dir_table,
138 },
139 { .ctl_name = 0 }
140};
141
142static struct block_device_operations md_fops;
143
144static int start_readonly;
145
146
147
148
149
150
151
152
153
154
155
156static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
157static atomic_t md_event_count;
158void md_new_event(mddev_t *mddev)
159{
160 atomic_inc(&md_event_count);
161 wake_up(&md_event_waiters);
162}
163EXPORT_SYMBOL_GPL(md_new_event);
164
165
166
167
168static void md_new_event_inintr(mddev_t *mddev)
169{
170 atomic_inc(&md_event_count);
171 wake_up(&md_event_waiters);
172}
173
174
175
176
177
178static LIST_HEAD(all_mddevs);
179static DEFINE_SPINLOCK(all_mddevs_lock);
180
181
182
183
184
185
186
187
188
189#define for_each_mddev(mddev,tmp) \
190 \
191 for (({ spin_lock(&all_mddevs_lock); \
192 tmp = all_mddevs.next; \
193 mddev = NULL;}); \
194 ({ if (tmp != &all_mddevs) \
195 mddev_get(list_entry(tmp, mddev_t, all_mddevs));\
196 spin_unlock(&all_mddevs_lock); \
197 if (mddev) mddev_put(mddev); \
198 mddev = list_entry(tmp, mddev_t, all_mddevs); \
199 tmp != &all_mddevs;}); \
200 ({ spin_lock(&all_mddevs_lock); \
201 tmp = tmp->next;}) \
202 )
203
204
205static int md_fail_request(struct request_queue *q, struct bio *bio)
206{
207 bio_io_error(bio);
208 return 0;
209}
210
211static inline mddev_t *mddev_get(mddev_t *mddev)
212{
213 atomic_inc(&mddev->active);
214 return mddev;
215}
216
217static void mddev_put(mddev_t *mddev)
218{
219 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
220 return;
221 if (!mddev->raid_disks && list_empty(&mddev->disks)) {
222 list_del(&mddev->all_mddevs);
223 spin_unlock(&all_mddevs_lock);
224 blk_cleanup_queue(mddev->queue);
225 if (mddev->sysfs_state)
226 sysfs_put(mddev->sysfs_state);
227 mddev->sysfs_state = NULL;
228 kobject_put(&mddev->kobj);
229 } else
230 spin_unlock(&all_mddevs_lock);
231}
232
233static mddev_t * mddev_find(dev_t unit)
234{
235 mddev_t *mddev, *new = NULL;
236
237 retry:
238 spin_lock(&all_mddevs_lock);
239 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
240 if (mddev->unit == unit) {
241 mddev_get(mddev);
242 spin_unlock(&all_mddevs_lock);
243 kfree(new);
244 return mddev;
245 }
246
247 if (new) {
248 list_add(&new->all_mddevs, &all_mddevs);
249 spin_unlock(&all_mddevs_lock);
250 return new;
251 }
252 spin_unlock(&all_mddevs_lock);
253
254 new = kzalloc(sizeof(*new), GFP_KERNEL);
255 if (!new)
256 return NULL;
257
258 new->unit = unit;
259 if (MAJOR(unit) == MD_MAJOR)
260 new->md_minor = MINOR(unit);
261 else
262 new->md_minor = MINOR(unit) >> MdpMinorShift;
263
264 mutex_init(&new->reconfig_mutex);
265 INIT_LIST_HEAD(&new->disks);
266 INIT_LIST_HEAD(&new->all_mddevs);
267 init_timer(&new->safemode_timer);
268 atomic_set(&new->active, 1);
269 atomic_set(&new->openers, 0);
270 spin_lock_init(&new->write_lock);
271 init_waitqueue_head(&new->sb_wait);
272 init_waitqueue_head(&new->recovery_wait);
273 new->reshape_position = MaxSector;
274 new->resync_min = 0;
275 new->resync_max = MaxSector;
276 new->level = LEVEL_NONE;
277
278 new->queue = blk_alloc_queue(GFP_KERNEL);
279 if (!new->queue) {
280 kfree(new);
281 return NULL;
282 }
283
284 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
285
286 blk_queue_make_request(new->queue, md_fail_request);
287
288 goto retry;
289}
290
291static inline int mddev_lock(mddev_t * mddev)
292{
293 return mutex_lock_interruptible(&mddev->reconfig_mutex);
294}
295
296static inline int mddev_trylock(mddev_t * mddev)
297{
298 return mutex_trylock(&mddev->reconfig_mutex);
299}
300
301static inline void mddev_unlock(mddev_t * mddev)
302{
303 mutex_unlock(&mddev->reconfig_mutex);
304
305 md_wakeup_thread(mddev->thread);
306}
307
308static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
309{
310 mdk_rdev_t * rdev;
311 struct list_head *tmp;
312
313 rdev_for_each(rdev, tmp, mddev) {
314 if (rdev->desc_nr == nr)
315 return rdev;
316 }
317 return NULL;
318}
319
320static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
321{
322 struct list_head *tmp;
323 mdk_rdev_t *rdev;
324
325 rdev_for_each(rdev, tmp, mddev) {
326 if (rdev->bdev->bd_dev == dev)
327 return rdev;
328 }
329 return NULL;
330}
331
332static struct mdk_personality *find_pers(int level, char *clevel)
333{
334 struct mdk_personality *pers;
335 list_for_each_entry(pers, &pers_list, list) {
336 if (level != LEVEL_NONE && pers->level == level)
337 return pers;
338 if (strcmp(pers->name, clevel)==0)
339 return pers;
340 }
341 return NULL;
342}
343
344
345static inline sector_t calc_dev_sboffset(struct block_device *bdev)
346{
347 sector_t num_sectors = bdev->bd_inode->i_size / 512;
348 return MD_NEW_SIZE_SECTORS(num_sectors);
349}
350
351static sector_t calc_num_sectors(mdk_rdev_t *rdev, unsigned chunk_size)
352{
353 sector_t num_sectors = rdev->sb_start;
354
355 if (chunk_size)
356 num_sectors &= ~((sector_t)chunk_size/512 - 1);
357 return num_sectors;
358}
359
360static int alloc_disk_sb(mdk_rdev_t * rdev)
361{
362 if (rdev->sb_page)
363 MD_BUG();
364
365 rdev->sb_page = alloc_page(GFP_KERNEL);
366 if (!rdev->sb_page) {
367 printk(KERN_ALERT "md: out of memory.\n");
368 return -ENOMEM;
369 }
370
371 return 0;
372}
373
374static void free_disk_sb(mdk_rdev_t * rdev)
375{
376 if (rdev->sb_page) {
377 put_page(rdev->sb_page);
378 rdev->sb_loaded = 0;
379 rdev->sb_page = NULL;
380 rdev->sb_start = 0;
381 rdev->size = 0;
382 }
383}
384
385
386static void super_written(struct bio *bio, int error)
387{
388 mdk_rdev_t *rdev = bio->bi_private;
389 mddev_t *mddev = rdev->mddev;
390
391 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
392 printk("md: super_written gets error=%d, uptodate=%d\n",
393 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
394 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
395 md_error(mddev, rdev);
396 }
397
398 if (atomic_dec_and_test(&mddev->pending_writes))
399 wake_up(&mddev->sb_wait);
400 bio_put(bio);
401}
402
403static void super_written_barrier(struct bio *bio, int error)
404{
405 struct bio *bio2 = bio->bi_private;
406 mdk_rdev_t *rdev = bio2->bi_private;
407 mddev_t *mddev = rdev->mddev;
408
409 if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
410 error == -EOPNOTSUPP) {
411 unsigned long flags;
412
413 set_bit(BarriersNotsupp, &rdev->flags);
414 mddev->barriers_work = 0;
415 spin_lock_irqsave(&mddev->write_lock, flags);
416 bio2->bi_next = mddev->biolist;
417 mddev->biolist = bio2;
418 spin_unlock_irqrestore(&mddev->write_lock, flags);
419 wake_up(&mddev->sb_wait);
420 bio_put(bio);
421 } else {
422 bio_put(bio2);
423 bio->bi_private = rdev;
424 super_written(bio, error);
425 }
426}
427
428void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
429 sector_t sector, int size, struct page *page)
430{
431
432
433
434
435
436
437
438
439
440 struct bio *bio = bio_alloc(GFP_NOIO, 1);
441 int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNC);
442
443 bio->bi_bdev = rdev->bdev;
444 bio->bi_sector = sector;
445 bio_add_page(bio, page, size, 0);
446 bio->bi_private = rdev;
447 bio->bi_end_io = super_written;
448 bio->bi_rw = rw;
449
450 atomic_inc(&mddev->pending_writes);
451 if (!test_bit(BarriersNotsupp, &rdev->flags)) {
452 struct bio *rbio;
453 rw |= (1<<BIO_RW_BARRIER);
454 rbio = bio_clone(bio, GFP_NOIO);
455 rbio->bi_private = bio;
456 rbio->bi_end_io = super_written_barrier;
457 submit_bio(rw, rbio);
458 } else
459 submit_bio(rw, bio);
460}
461
462void md_super_wait(mddev_t *mddev)
463{
464
465
466
467 DEFINE_WAIT(wq);
468 for(;;) {
469 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
470 if (atomic_read(&mddev->pending_writes)==0)
471 break;
472 while (mddev->biolist) {
473 struct bio *bio;
474 spin_lock_irq(&mddev->write_lock);
475 bio = mddev->biolist;
476 mddev->biolist = bio->bi_next ;
477 bio->bi_next = NULL;
478 spin_unlock_irq(&mddev->write_lock);
479 submit_bio(bio->bi_rw, bio);
480 }
481 schedule();
482 }
483 finish_wait(&mddev->sb_wait, &wq);
484}
485
486static void bi_complete(struct bio *bio, int error)
487{
488 complete((struct completion*)bio->bi_private);
489}
490
491int sync_page_io(struct block_device *bdev, sector_t sector, int size,
492 struct page *page, int rw)
493{
494 struct bio *bio = bio_alloc(GFP_NOIO, 1);
495 struct completion event;
496 int ret;
497
498 rw |= (1 << BIO_RW_SYNC);
499
500 bio->bi_bdev = bdev;
501 bio->bi_sector = sector;
502 bio_add_page(bio, page, size, 0);
503 init_completion(&event);
504 bio->bi_private = &event;
505 bio->bi_end_io = bi_complete;
506 submit_bio(rw, bio);
507 wait_for_completion(&event);
508
509 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
510 bio_put(bio);
511 return ret;
512}
513EXPORT_SYMBOL_GPL(sync_page_io);
514
515static int read_disk_sb(mdk_rdev_t * rdev, int size)
516{
517 char b[BDEVNAME_SIZE];
518 if (!rdev->sb_page) {
519 MD_BUG();
520 return -EINVAL;
521 }
522 if (rdev->sb_loaded)
523 return 0;
524
525
526 if (!sync_page_io(rdev->bdev, rdev->sb_start, size, rdev->sb_page, READ))
527 goto fail;
528 rdev->sb_loaded = 1;
529 return 0;
530
531fail:
532 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
533 bdevname(rdev->bdev,b));
534 return -EINVAL;
535}
536
537static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
538{
539 return sb1->set_uuid0 == sb2->set_uuid0 &&
540 sb1->set_uuid1 == sb2->set_uuid1 &&
541 sb1->set_uuid2 == sb2->set_uuid2 &&
542 sb1->set_uuid3 == sb2->set_uuid3;
543}
544
545static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
546{
547 int ret;
548 mdp_super_t *tmp1, *tmp2;
549
550 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
551 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
552
553 if (!tmp1 || !tmp2) {
554 ret = 0;
555 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
556 goto abort;
557 }
558
559 *tmp1 = *sb1;
560 *tmp2 = *sb2;
561
562
563
564
565 tmp1->nr_disks = 0;
566 tmp2->nr_disks = 0;
567
568 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
569abort:
570 kfree(tmp1);
571 kfree(tmp2);
572 return ret;
573}
574
575
576static u32 md_csum_fold(u32 csum)
577{
578 csum = (csum & 0xffff) + (csum >> 16);
579 return (csum & 0xffff) + (csum >> 16);
580}
581
582static unsigned int calc_sb_csum(mdp_super_t * sb)
583{
584 u64 newcsum = 0;
585 u32 *sb32 = (u32*)sb;
586 int i;
587 unsigned int disk_csum, csum;
588
589 disk_csum = sb->sb_csum;
590 sb->sb_csum = 0;
591
592 for (i = 0; i < MD_SB_BYTES/4 ; i++)
593 newcsum += sb32[i];
594 csum = (newcsum & 0xffffffff) + (newcsum>>32);
595
596
597#ifdef CONFIG_ALPHA
598
599
600
601
602
603
604
605
606 sb->sb_csum = md_csum_fold(disk_csum);
607#else
608 sb->sb_csum = disk_csum;
609#endif
610 return csum;
611}
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644struct super_type {
645 char *name;
646 struct module *owner;
647 int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev,
648 int minor_version);
649 int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);
650 void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);
651 unsigned long long (*rdev_size_change)(mdk_rdev_t *rdev,
652 sector_t num_sectors);
653};
654
655
656
657
658static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
659{
660 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
661 mdp_super_t *sb;
662 int ret;
663
664
665
666
667
668
669
670 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
671
672 ret = read_disk_sb(rdev, MD_SB_BYTES);
673 if (ret) return ret;
674
675 ret = -EINVAL;
676
677 bdevname(rdev->bdev, b);
678 sb = (mdp_super_t*)page_address(rdev->sb_page);
679
680 if (sb->md_magic != MD_SB_MAGIC) {
681 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
682 b);
683 goto abort;
684 }
685
686 if (sb->major_version != 0 ||
687 sb->minor_version < 90 ||
688 sb->minor_version > 91) {
689 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
690 sb->major_version, sb->minor_version,
691 b);
692 goto abort;
693 }
694
695 if (sb->raid_disks <= 0)
696 goto abort;
697
698 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
699 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
700 b);
701 goto abort;
702 }
703
704 rdev->preferred_minor = sb->md_minor;
705 rdev->data_offset = 0;
706 rdev->sb_size = MD_SB_BYTES;
707
708 if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) {
709 if (sb->level != 1 && sb->level != 4
710 && sb->level != 5 && sb->level != 6
711 && sb->level != 10) {
712
713 printk(KERN_WARNING
714 "md: bitmaps not supported for this level.\n");
715 goto abort;
716 }
717 }
718
719 if (sb->level == LEVEL_MULTIPATH)
720 rdev->desc_nr = -1;
721 else
722 rdev->desc_nr = sb->this_disk.number;
723
724 if (!refdev) {
725 ret = 1;
726 } else {
727 __u64 ev1, ev2;
728 mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
729 if (!uuid_equal(refsb, sb)) {
730 printk(KERN_WARNING "md: %s has different UUID to %s\n",
731 b, bdevname(refdev->bdev,b2));
732 goto abort;
733 }
734 if (!sb_equal(refsb, sb)) {
735 printk(KERN_WARNING "md: %s has same UUID"
736 " but different superblock to %s\n",
737 b, bdevname(refdev->bdev, b2));
738 goto abort;
739 }
740 ev1 = md_event(sb);
741 ev2 = md_event(refsb);
742 if (ev1 > ev2)
743 ret = 1;
744 else
745 ret = 0;
746 }
747 rdev->size = calc_num_sectors(rdev, sb->chunk_size) / 2;
748
749 if (rdev->size < sb->size && sb->level > 1)
750
751 ret = -EINVAL;
752
753 abort:
754 return ret;
755}
756
757
758
759
760static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
761{
762 mdp_disk_t *desc;
763 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
764 __u64 ev1 = md_event(sb);
765
766 rdev->raid_disk = -1;
767 clear_bit(Faulty, &rdev->flags);
768 clear_bit(In_sync, &rdev->flags);
769 clear_bit(WriteMostly, &rdev->flags);
770 clear_bit(BarriersNotsupp, &rdev->flags);
771
772 if (mddev->raid_disks == 0) {
773 mddev->major_version = 0;
774 mddev->minor_version = sb->minor_version;
775 mddev->patch_version = sb->patch_version;
776 mddev->external = 0;
777 mddev->chunk_size = sb->chunk_size;
778 mddev->ctime = sb->ctime;
779 mddev->utime = sb->utime;
780 mddev->level = sb->level;
781 mddev->clevel[0] = 0;
782 mddev->layout = sb->layout;
783 mddev->raid_disks = sb->raid_disks;
784 mddev->size = sb->size;
785 mddev->events = ev1;
786 mddev->bitmap_offset = 0;
787 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
788
789 if (mddev->minor_version >= 91) {
790 mddev->reshape_position = sb->reshape_position;
791 mddev->delta_disks = sb->delta_disks;
792 mddev->new_level = sb->new_level;
793 mddev->new_layout = sb->new_layout;
794 mddev->new_chunk = sb->new_chunk;
795 } else {
796 mddev->reshape_position = MaxSector;
797 mddev->delta_disks = 0;
798 mddev->new_level = mddev->level;
799 mddev->new_layout = mddev->layout;
800 mddev->new_chunk = mddev->chunk_size;
801 }
802
803 if (sb->state & (1<<MD_SB_CLEAN))
804 mddev->recovery_cp = MaxSector;
805 else {
806 if (sb->events_hi == sb->cp_events_hi &&
807 sb->events_lo == sb->cp_events_lo) {
808 mddev->recovery_cp = sb->recovery_cp;
809 } else
810 mddev->recovery_cp = 0;
811 }
812
813 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
814 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
815 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
816 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
817
818 mddev->max_disks = MD_SB_DISKS;
819
820 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
821 mddev->bitmap_file == NULL)
822 mddev->bitmap_offset = mddev->default_bitmap_offset;
823
824 } else if (mddev->pers == NULL) {
825
826 ++ev1;
827 if (ev1 < mddev->events)
828 return -EINVAL;
829 } else if (mddev->bitmap) {
830
831
832
833 if (ev1 < mddev->bitmap->events_cleared)
834 return 0;
835 } else {
836 if (ev1 < mddev->events)
837
838 return 0;
839 }
840
841 if (mddev->level != LEVEL_MULTIPATH) {
842 desc = sb->disks + rdev->desc_nr;
843
844 if (desc->state & (1<<MD_DISK_FAULTY))
845 set_bit(Faulty, &rdev->flags);
846 else if (desc->state & (1<<MD_DISK_SYNC)
847) {
848 set_bit(In_sync, &rdev->flags);
849 rdev->raid_disk = desc->raid_disk;
850 }
851 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
852 set_bit(WriteMostly, &rdev->flags);
853 } else
854 set_bit(In_sync, &rdev->flags);
855 return 0;
856}
857
858
859
860
861static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
862{
863 mdp_super_t *sb;
864 struct list_head *tmp;
865 mdk_rdev_t *rdev2;
866 int next_spare = mddev->raid_disks;
867
868
869
870
871
872
873
874
875
876
877
878
879 int i;
880 int active=0, working=0,failed=0,spare=0,nr_disks=0;
881
882 rdev->sb_size = MD_SB_BYTES;
883
884 sb = (mdp_super_t*)page_address(rdev->sb_page);
885
886 memset(sb, 0, sizeof(*sb));
887
888 sb->md_magic = MD_SB_MAGIC;
889 sb->major_version = mddev->major_version;
890 sb->patch_version = mddev->patch_version;
891 sb->gvalid_words = 0;
892 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
893 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
894 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
895 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
896
897 sb->ctime = mddev->ctime;
898 sb->level = mddev->level;
899 sb->size = mddev->size;
900 sb->raid_disks = mddev->raid_disks;
901 sb->md_minor = mddev->md_minor;
902 sb->not_persistent = 0;
903 sb->utime = mddev->utime;
904 sb->state = 0;
905 sb->events_hi = (mddev->events>>32);
906 sb->events_lo = (u32)mddev->events;
907
908 if (mddev->reshape_position == MaxSector)
909 sb->minor_version = 90;
910 else {
911 sb->minor_version = 91;
912 sb->reshape_position = mddev->reshape_position;
913 sb->new_level = mddev->new_level;
914 sb->delta_disks = mddev->delta_disks;
915 sb->new_layout = mddev->new_layout;
916 sb->new_chunk = mddev->new_chunk;
917 }
918 mddev->minor_version = sb->minor_version;
919 if (mddev->in_sync)
920 {
921 sb->recovery_cp = mddev->recovery_cp;
922 sb->cp_events_hi = (mddev->events>>32);
923 sb->cp_events_lo = (u32)mddev->events;
924 if (mddev->recovery_cp == MaxSector)
925 sb->state = (1<< MD_SB_CLEAN);
926 } else
927 sb->recovery_cp = 0;
928
929 sb->layout = mddev->layout;
930 sb->chunk_size = mddev->chunk_size;
931
932 if (mddev->bitmap && mddev->bitmap_file == NULL)
933 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
934
935 sb->disks[0].state = (1<<MD_DISK_REMOVED);
936 rdev_for_each(rdev2, tmp, mddev) {
937 mdp_disk_t *d;
938 int desc_nr;
939 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
940 && !test_bit(Faulty, &rdev2->flags))
941 desc_nr = rdev2->raid_disk;
942 else
943 desc_nr = next_spare++;
944 rdev2->desc_nr = desc_nr;
945 d = &sb->disks[rdev2->desc_nr];
946 nr_disks++;
947 d->number = rdev2->desc_nr;
948 d->major = MAJOR(rdev2->bdev->bd_dev);
949 d->minor = MINOR(rdev2->bdev->bd_dev);
950 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
951 && !test_bit(Faulty, &rdev2->flags))
952 d->raid_disk = rdev2->raid_disk;
953 else
954 d->raid_disk = rdev2->desc_nr;
955 if (test_bit(Faulty, &rdev2->flags))
956 d->state = (1<<MD_DISK_FAULTY);
957 else if (test_bit(In_sync, &rdev2->flags)) {
958 d->state = (1<<MD_DISK_ACTIVE);
959 d->state |= (1<<MD_DISK_SYNC);
960 active++;
961 working++;
962 } else {
963 d->state = 0;
964 spare++;
965 working++;
966 }
967 if (test_bit(WriteMostly, &rdev2->flags))
968 d->state |= (1<<MD_DISK_WRITEMOSTLY);
969 }
970
971 for (i=0 ; i < mddev->raid_disks ; i++) {
972 mdp_disk_t *d = &sb->disks[i];
973 if (d->state == 0 && d->number == 0) {
974 d->number = i;
975 d->raid_disk = i;
976 d->state = (1<<MD_DISK_REMOVED);
977 d->state |= (1<<MD_DISK_FAULTY);
978 failed++;
979 }
980 }
981 sb->nr_disks = nr_disks;
982 sb->active_disks = active;
983 sb->working_disks = working;
984 sb->failed_disks = failed;
985 sb->spare_disks = spare;
986
987 sb->this_disk = sb->disks[rdev->desc_nr];
988 sb->sb_csum = calc_sb_csum(sb);
989}
990
991
992
993
994static unsigned long long
995super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
996{
997 if (num_sectors && num_sectors < rdev->mddev->size * 2)
998 return 0;
999 if (rdev->mddev->bitmap_offset)
1000 return 0;
1001 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
1002 if (!num_sectors || num_sectors > rdev->sb_start)
1003 num_sectors = rdev->sb_start;
1004 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1005 rdev->sb_page);
1006 md_super_wait(rdev->mddev);
1007 return num_sectors / 2;
1008}
1009
1010
1011
1012
1013
1014
1015static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1016{
1017 __le32 disk_csum;
1018 u32 csum;
1019 unsigned long long newcsum;
1020 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1021 __le32 *isuper = (__le32*)sb;
1022 int i;
1023
1024 disk_csum = sb->sb_csum;
1025 sb->sb_csum = 0;
1026 newcsum = 0;
1027 for (i=0; size>=4; size -= 4 )
1028 newcsum += le32_to_cpu(*isuper++);
1029
1030 if (size == 2)
1031 newcsum += le16_to_cpu(*(__le16*) isuper);
1032
1033 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1034 sb->sb_csum = disk_csum;
1035 return cpu_to_le32(csum);
1036}
1037
1038static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1039{
1040 struct mdp_superblock_1 *sb;
1041 int ret;
1042 sector_t sb_start;
1043 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1044 int bmask;
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054 switch(minor_version) {
1055 case 0:
1056 sb_start = rdev->bdev->bd_inode->i_size >> 9;
1057 sb_start -= 8*2;
1058 sb_start &= ~(sector_t)(4*2-1);
1059 break;
1060 case 1:
1061 sb_start = 0;
1062 break;
1063 case 2:
1064 sb_start = 8;
1065 break;
1066 default:
1067 return -EINVAL;
1068 }
1069 rdev->sb_start = sb_start;
1070
1071
1072
1073
1074 ret = read_disk_sb(rdev, 4096);
1075 if (ret) return ret;
1076
1077
1078 sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1079
1080 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1081 sb->major_version != cpu_to_le32(1) ||
1082 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1083 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1084 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1085 return -EINVAL;
1086
1087 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1088 printk("md: invalid superblock checksum on %s\n",
1089 bdevname(rdev->bdev,b));
1090 return -EINVAL;
1091 }
1092 if (le64_to_cpu(sb->data_size) < 10) {
1093 printk("md: data_size too small on %s\n",
1094 bdevname(rdev->bdev,b));
1095 return -EINVAL;
1096 }
1097 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) {
1098 if (sb->level != cpu_to_le32(1) &&
1099 sb->level != cpu_to_le32(4) &&
1100 sb->level != cpu_to_le32(5) &&
1101 sb->level != cpu_to_le32(6) &&
1102 sb->level != cpu_to_le32(10)) {
1103 printk(KERN_WARNING
1104 "md: bitmaps not supported for this level.\n");
1105 return -EINVAL;
1106 }
1107 }
1108
1109 rdev->preferred_minor = 0xffff;
1110 rdev->data_offset = le64_to_cpu(sb->data_offset);
1111 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1112
1113 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1114 bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
1115 if (rdev->sb_size & bmask)
1116 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1117
1118 if (minor_version
1119 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1120 return -EINVAL;
1121
1122 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1123 rdev->desc_nr = -1;
1124 else
1125 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1126
1127 if (!refdev) {
1128 ret = 1;
1129 } else {
1130 __u64 ev1, ev2;
1131 struct mdp_superblock_1 *refsb =
1132 (struct mdp_superblock_1*)page_address(refdev->sb_page);
1133
1134 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1135 sb->level != refsb->level ||
1136 sb->layout != refsb->layout ||
1137 sb->chunksize != refsb->chunksize) {
1138 printk(KERN_WARNING "md: %s has strangely different"
1139 " superblock to %s\n",
1140 bdevname(rdev->bdev,b),
1141 bdevname(refdev->bdev,b2));
1142 return -EINVAL;
1143 }
1144 ev1 = le64_to_cpu(sb->events);
1145 ev2 = le64_to_cpu(refsb->events);
1146
1147 if (ev1 > ev2)
1148 ret = 1;
1149 else
1150 ret = 0;
1151 }
1152 if (minor_version)
1153 rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2;
1154 else
1155 rdev->size = rdev->sb_start / 2;
1156 if (rdev->size < le64_to_cpu(sb->data_size)/2)
1157 return -EINVAL;
1158 rdev->size = le64_to_cpu(sb->data_size)/2;
1159 if (le32_to_cpu(sb->chunksize))
1160 rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1);
1161
1162 if (le64_to_cpu(sb->size) > rdev->size*2)
1163 return -EINVAL;
1164 return ret;
1165}
1166
1167static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1168{
1169 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1170 __u64 ev1 = le64_to_cpu(sb->events);
1171
1172 rdev->raid_disk = -1;
1173 clear_bit(Faulty, &rdev->flags);
1174 clear_bit(In_sync, &rdev->flags);
1175 clear_bit(WriteMostly, &rdev->flags);
1176 clear_bit(BarriersNotsupp, &rdev->flags);
1177
1178 if (mddev->raid_disks == 0) {
1179 mddev->major_version = 1;
1180 mddev->patch_version = 0;
1181 mddev->external = 0;
1182 mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
1183 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1184 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1185 mddev->level = le32_to_cpu(sb->level);
1186 mddev->clevel[0] = 0;
1187 mddev->layout = le32_to_cpu(sb->layout);
1188 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1189 mddev->size = le64_to_cpu(sb->size)/2;
1190 mddev->events = ev1;
1191 mddev->bitmap_offset = 0;
1192 mddev->default_bitmap_offset = 1024 >> 9;
1193
1194 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1195 memcpy(mddev->uuid, sb->set_uuid, 16);
1196
1197 mddev->max_disks = (4096-256)/2;
1198
1199 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1200 mddev->bitmap_file == NULL )
1201 mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
1202
1203 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1204 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1205 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1206 mddev->new_level = le32_to_cpu(sb->new_level);
1207 mddev->new_layout = le32_to_cpu(sb->new_layout);
1208 mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9;
1209 } else {
1210 mddev->reshape_position = MaxSector;
1211 mddev->delta_disks = 0;
1212 mddev->new_level = mddev->level;
1213 mddev->new_layout = mddev->layout;
1214 mddev->new_chunk = mddev->chunk_size;
1215 }
1216
1217 } else if (mddev->pers == NULL) {
1218
1219 ++ev1;
1220 if (ev1 < mddev->events)
1221 return -EINVAL;
1222 } else if (mddev->bitmap) {
1223
1224
1225
1226 if (ev1 < mddev->bitmap->events_cleared)
1227 return 0;
1228 } else {
1229 if (ev1 < mddev->events)
1230
1231 return 0;
1232 }
1233 if (mddev->level != LEVEL_MULTIPATH) {
1234 int role;
1235 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1236 switch(role) {
1237 case 0xffff:
1238 break;
1239 case 0xfffe:
1240 set_bit(Faulty, &rdev->flags);
1241 break;
1242 default:
1243 if ((le32_to_cpu(sb->feature_map) &
1244 MD_FEATURE_RECOVERY_OFFSET))
1245 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1246 else
1247 set_bit(In_sync, &rdev->flags);
1248 rdev->raid_disk = role;
1249 break;
1250 }
1251 if (sb->devflags & WriteMostly1)
1252 set_bit(WriteMostly, &rdev->flags);
1253 } else
1254 set_bit(In_sync, &rdev->flags);
1255
1256 return 0;
1257}
1258
1259static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1260{
1261 struct mdp_superblock_1 *sb;
1262 struct list_head *tmp;
1263 mdk_rdev_t *rdev2;
1264 int max_dev, i;
1265
1266
1267 sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1268
1269 sb->feature_map = 0;
1270 sb->pad0 = 0;
1271 sb->recovery_offset = cpu_to_le64(0);
1272 memset(sb->pad1, 0, sizeof(sb->pad1));
1273 memset(sb->pad2, 0, sizeof(sb->pad2));
1274 memset(sb->pad3, 0, sizeof(sb->pad3));
1275
1276 sb->utime = cpu_to_le64((__u64)mddev->utime);
1277 sb->events = cpu_to_le64(mddev->events);
1278 if (mddev->in_sync)
1279 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1280 else
1281 sb->resync_offset = cpu_to_le64(0);
1282
1283 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1284
1285 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1286 sb->size = cpu_to_le64(mddev->size<<1);
1287
1288 if (mddev->bitmap && mddev->bitmap_file == NULL) {
1289 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
1290 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1291 }
1292
1293 if (rdev->raid_disk >= 0 &&
1294 !test_bit(In_sync, &rdev->flags) &&
1295 rdev->recovery_offset > 0) {
1296 sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1297 sb->recovery_offset = cpu_to_le64(rdev->recovery_offset);
1298 }
1299
1300 if (mddev->reshape_position != MaxSector) {
1301 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1302 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1303 sb->new_layout = cpu_to_le32(mddev->new_layout);
1304 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1305 sb->new_level = cpu_to_le32(mddev->new_level);
1306 sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9);
1307 }
1308
1309 max_dev = 0;
1310 rdev_for_each(rdev2, tmp, mddev)
1311 if (rdev2->desc_nr+1 > max_dev)
1312 max_dev = rdev2->desc_nr+1;
1313
1314 if (max_dev > le32_to_cpu(sb->max_dev))
1315 sb->max_dev = cpu_to_le32(max_dev);
1316 for (i=0; i<max_dev;i++)
1317 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1318
1319 rdev_for_each(rdev2, tmp, mddev) {
1320 i = rdev2->desc_nr;
1321 if (test_bit(Faulty, &rdev2->flags))
1322 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1323 else if (test_bit(In_sync, &rdev2->flags))
1324 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1325 else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0)
1326 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1327 else
1328 sb->dev_roles[i] = cpu_to_le16(0xffff);
1329 }
1330
1331 sb->sb_csum = calc_sb_1_csum(sb);
1332}
1333
1334static unsigned long long
1335super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1336{
1337 struct mdp_superblock_1 *sb;
1338 sector_t max_sectors;
1339 if (num_sectors && num_sectors < rdev->mddev->size * 2)
1340 return 0;
1341 if (rdev->sb_start < rdev->data_offset) {
1342
1343 max_sectors = rdev->bdev->bd_inode->i_size >> 9;
1344 max_sectors -= rdev->data_offset;
1345 if (!num_sectors || num_sectors > max_sectors)
1346 num_sectors = max_sectors;
1347 } else if (rdev->mddev->bitmap_offset) {
1348
1349 return 0;
1350 } else {
1351
1352 sector_t sb_start;
1353 sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2;
1354 sb_start &= ~(sector_t)(4*2 - 1);
1355 max_sectors = rdev->size * 2 + sb_start - rdev->sb_start;
1356 if (!num_sectors || num_sectors > max_sectors)
1357 num_sectors = max_sectors;
1358 rdev->sb_start = sb_start;
1359 }
1360 sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page);
1361 sb->data_size = cpu_to_le64(num_sectors);
1362 sb->super_offset = rdev->sb_start;
1363 sb->sb_csum = calc_sb_1_csum(sb);
1364 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1365 rdev->sb_page);
1366 md_super_wait(rdev->mddev);
1367 return num_sectors / 2;
1368}
1369
1370static struct super_type super_types[] = {
1371 [0] = {
1372 .name = "0.90.0",
1373 .owner = THIS_MODULE,
1374 .load_super = super_90_load,
1375 .validate_super = super_90_validate,
1376 .sync_super = super_90_sync,
1377 .rdev_size_change = super_90_rdev_size_change,
1378 },
1379 [1] = {
1380 .name = "md-1",
1381 .owner = THIS_MODULE,
1382 .load_super = super_1_load,
1383 .validate_super = super_1_validate,
1384 .sync_super = super_1_sync,
1385 .rdev_size_change = super_1_rdev_size_change,
1386 },
1387};
1388
1389static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1390{
1391 mdk_rdev_t *rdev, *rdev2;
1392
1393 rcu_read_lock();
1394 rdev_for_each_rcu(rdev, mddev1)
1395 rdev_for_each_rcu(rdev2, mddev2)
1396 if (rdev->bdev->bd_contains ==
1397 rdev2->bdev->bd_contains) {
1398 rcu_read_unlock();
1399 return 1;
1400 }
1401 rcu_read_unlock();
1402 return 0;
1403}
1404
1405static LIST_HEAD(pending_raid_disks);
1406
1407static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1408{
1409 char b[BDEVNAME_SIZE];
1410 struct kobject *ko;
1411 char *s;
1412 int err;
1413
1414 if (rdev->mddev) {
1415 MD_BUG();
1416 return -EINVAL;
1417 }
1418
1419
1420 if (find_rdev(mddev, rdev->bdev->bd_dev))
1421 return -EEXIST;
1422
1423
1424 if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {
1425 if (mddev->pers) {
1426
1427
1428
1429
1430 if (mddev->level > 0)
1431 return -ENOSPC;
1432 } else
1433 mddev->size = rdev->size;
1434 }
1435
1436
1437
1438
1439
1440 if (rdev->desc_nr < 0) {
1441 int choice = 0;
1442 if (mddev->pers) choice = mddev->raid_disks;
1443 while (find_rdev_nr(mddev, choice))
1444 choice++;
1445 rdev->desc_nr = choice;
1446 } else {
1447 if (find_rdev_nr(mddev, rdev->desc_nr))
1448 return -EBUSY;
1449 }
1450 bdevname(rdev->bdev,b);
1451 while ( (s=strchr(b, '/')) != NULL)
1452 *s = '!';
1453
1454 rdev->mddev = mddev;
1455 printk(KERN_INFO "md: bind<%s>\n", b);
1456
1457 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
1458 goto fail;
1459
1460 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
1461 if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) {
1462 kobject_del(&rdev->kobj);
1463 goto fail;
1464 }
1465 rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, "state");
1466
1467 list_add_rcu(&rdev->same_set, &mddev->disks);
1468 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
1469 return 0;
1470
1471 fail:
1472 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
1473 b, mdname(mddev));
1474 return err;
1475}
1476
1477static void md_delayed_delete(struct work_struct *ws)
1478{
1479 mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
1480 kobject_del(&rdev->kobj);
1481 kobject_put(&rdev->kobj);
1482}
1483
1484static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1485{
1486 char b[BDEVNAME_SIZE];
1487 if (!rdev->mddev) {
1488 MD_BUG();
1489 return;
1490 }
1491 bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
1492 list_del_rcu(&rdev->same_set);
1493 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
1494 rdev->mddev = NULL;
1495 sysfs_remove_link(&rdev->kobj, "block");
1496 sysfs_put(rdev->sysfs_state);
1497 rdev->sysfs_state = NULL;
1498
1499
1500
1501
1502 synchronize_rcu();
1503 INIT_WORK(&rdev->del_work, md_delayed_delete);
1504 kobject_get(&rdev->kobj);
1505 schedule_work(&rdev->del_work);
1506}
1507
1508
1509
1510
1511
1512
1513static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
1514{
1515 int err = 0;
1516 struct block_device *bdev;
1517 char b[BDEVNAME_SIZE];
1518
1519 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
1520 if (IS_ERR(bdev)) {
1521 printk(KERN_ERR "md: could not open %s.\n",
1522 __bdevname(dev, b));
1523 return PTR_ERR(bdev);
1524 }
1525 err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
1526 if (err) {
1527 printk(KERN_ERR "md: could not bd_claim %s.\n",
1528 bdevname(bdev, b));
1529 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1530 return err;
1531 }
1532 if (!shared)
1533 set_bit(AllReserved, &rdev->flags);
1534 rdev->bdev = bdev;
1535 return err;
1536}
1537
1538static void unlock_rdev(mdk_rdev_t *rdev)
1539{
1540 struct block_device *bdev = rdev->bdev;
1541 rdev->bdev = NULL;
1542 if (!bdev)
1543 MD_BUG();
1544 bd_release(bdev);
1545 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1546}
1547
1548void md_autodetect_dev(dev_t dev);
1549
1550static void export_rdev(mdk_rdev_t * rdev)
1551{
1552 char b[BDEVNAME_SIZE];
1553 printk(KERN_INFO "md: export_rdev(%s)\n",
1554 bdevname(rdev->bdev,b));
1555 if (rdev->mddev)
1556 MD_BUG();
1557 free_disk_sb(rdev);
1558#ifndef MODULE
1559 if (test_bit(AutoDetected, &rdev->flags))
1560 md_autodetect_dev(rdev->bdev->bd_dev);
1561#endif
1562 unlock_rdev(rdev);
1563 kobject_put(&rdev->kobj);
1564}
1565
1566static void kick_rdev_from_array(mdk_rdev_t * rdev)
1567{
1568 unbind_rdev_from_array(rdev);
1569 export_rdev(rdev);
1570}
1571
1572static void export_array(mddev_t *mddev)
1573{
1574 struct list_head *tmp;
1575 mdk_rdev_t *rdev;
1576
1577 rdev_for_each(rdev, tmp, mddev) {
1578 if (!rdev->mddev) {
1579 MD_BUG();
1580 continue;
1581 }
1582 kick_rdev_from_array(rdev);
1583 }
1584 if (!list_empty(&mddev->disks))
1585 MD_BUG();
1586 mddev->raid_disks = 0;
1587 mddev->major_version = 0;
1588}
1589
1590static void print_desc(mdp_disk_t *desc)
1591{
1592 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
1593 desc->major,desc->minor,desc->raid_disk,desc->state);
1594}
1595
1596static void print_sb(mdp_super_t *sb)
1597{
1598 int i;
1599
1600 printk(KERN_INFO
1601 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
1602 sb->major_version, sb->minor_version, sb->patch_version,
1603 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
1604 sb->ctime);
1605 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
1606 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
1607 sb->md_minor, sb->layout, sb->chunk_size);
1608 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
1609 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
1610 sb->utime, sb->state, sb->active_disks, sb->working_disks,
1611 sb->failed_disks, sb->spare_disks,
1612 sb->sb_csum, (unsigned long)sb->events_lo);
1613
1614 printk(KERN_INFO);
1615 for (i = 0; i < MD_SB_DISKS; i++) {
1616 mdp_disk_t *desc;
1617
1618 desc = sb->disks + i;
1619 if (desc->number || desc->major || desc->minor ||
1620 desc->raid_disk || (desc->state && (desc->state != 4))) {
1621 printk(" D %2d: ", i);
1622 print_desc(desc);
1623 }
1624 }
1625 printk(KERN_INFO "md: THIS: ");
1626 print_desc(&sb->this_disk);
1627
1628}
1629
1630static void print_rdev(mdk_rdev_t *rdev)
1631{
1632 char b[BDEVNAME_SIZE];
1633 printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n",
1634 bdevname(rdev->bdev,b), (unsigned long long)rdev->size,
1635 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
1636 rdev->desc_nr);
1637 if (rdev->sb_loaded) {
1638 printk(KERN_INFO "md: rdev superblock:\n");
1639 print_sb((mdp_super_t*)page_address(rdev->sb_page));
1640 } else
1641 printk(KERN_INFO "md: no rdev superblock!\n");
1642}
1643
1644static void md_print_devices(void)
1645{
1646 struct list_head *tmp, *tmp2;
1647 mdk_rdev_t *rdev;
1648 mddev_t *mddev;
1649 char b[BDEVNAME_SIZE];
1650
1651 printk("\n");
1652 printk("md: **********************************\n");
1653 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
1654 printk("md: **********************************\n");
1655 for_each_mddev(mddev, tmp) {
1656
1657 if (mddev->bitmap)
1658 bitmap_print_sb(mddev->bitmap);
1659 else
1660 printk("%s: ", mdname(mddev));
1661 rdev_for_each(rdev, tmp2, mddev)
1662 printk("<%s>", bdevname(rdev->bdev,b));
1663 printk("\n");
1664
1665 rdev_for_each(rdev, tmp2, mddev)
1666 print_rdev(rdev);
1667 }
1668 printk("md: **********************************\n");
1669 printk("\n");
1670}
1671
1672
1673static void sync_sbs(mddev_t * mddev, int nospares)
1674{
1675
1676
1677
1678
1679
1680
1681 mdk_rdev_t *rdev;
1682 struct list_head *tmp;
1683
1684 rdev_for_each(rdev, tmp, mddev) {
1685 if (rdev->sb_events == mddev->events ||
1686 (nospares &&
1687 rdev->raid_disk < 0 &&
1688 (rdev->sb_events&1)==0 &&
1689 rdev->sb_events+1 == mddev->events)) {
1690
1691 rdev->sb_loaded = 2;
1692 } else {
1693 super_types[mddev->major_version].
1694 sync_super(mddev, rdev);
1695 rdev->sb_loaded = 1;
1696 }
1697 }
1698}
1699
1700static void md_update_sb(mddev_t * mddev, int force_change)
1701{
1702 struct list_head *tmp;
1703 mdk_rdev_t *rdev;
1704 int sync_req;
1705 int nospares = 0;
1706
1707 if (mddev->external)
1708 return;
1709repeat:
1710 spin_lock_irq(&mddev->write_lock);
1711
1712 set_bit(MD_CHANGE_PENDING, &mddev->flags);
1713 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
1714 force_change = 1;
1715 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
1716
1717
1718
1719
1720 nospares = 1;
1721 if (force_change)
1722 nospares = 0;
1723 if (mddev->degraded)
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733 nospares = 0;
1734
1735 sync_req = mddev->in_sync;
1736 mddev->utime = get_seconds();
1737
1738
1739
1740 if (nospares
1741 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
1742 && (mddev->events & 1)
1743 && mddev->events != 1)
1744 mddev->events--;
1745 else {
1746
1747 mddev->events ++;
1748 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) {
1749
1750 if ((mddev->events&1)==0) {
1751 mddev->events++;
1752 nospares = 0;
1753 }
1754 } else {
1755
1756 if ((mddev->events&1)) {
1757 mddev->events++;
1758 nospares = 0;
1759 }
1760 }
1761 }
1762
1763 if (!mddev->events) {
1764
1765
1766
1767
1768
1769 MD_BUG();
1770 mddev->events --;
1771 }
1772
1773
1774
1775
1776
1777 if (!mddev->persistent) {
1778 if (!mddev->external)
1779 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
1780
1781 spin_unlock_irq(&mddev->write_lock);
1782 wake_up(&mddev->sb_wait);
1783 return;
1784 }
1785 sync_sbs(mddev, nospares);
1786 spin_unlock_irq(&mddev->write_lock);
1787
1788 dprintk(KERN_INFO
1789 "md: updating %s RAID superblock on device (in sync %d)\n",
1790 mdname(mddev),mddev->in_sync);
1791
1792 bitmap_update_sb(mddev->bitmap);
1793 rdev_for_each(rdev, tmp, mddev) {
1794 char b[BDEVNAME_SIZE];
1795 dprintk(KERN_INFO "md: ");
1796 if (rdev->sb_loaded != 1)
1797 continue;
1798 if (test_bit(Faulty, &rdev->flags))
1799 dprintk("(skipping faulty ");
1800
1801 dprintk("%s ", bdevname(rdev->bdev,b));
1802 if (!test_bit(Faulty, &rdev->flags)) {
1803 md_super_write(mddev,rdev,
1804 rdev->sb_start, rdev->sb_size,
1805 rdev->sb_page);
1806 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
1807 bdevname(rdev->bdev,b),
1808 (unsigned long long)rdev->sb_start);
1809 rdev->sb_events = mddev->events;
1810
1811 } else
1812 dprintk(")\n");
1813 if (mddev->level == LEVEL_MULTIPATH)
1814
1815 break;
1816 }
1817 md_super_wait(mddev);
1818
1819
1820 spin_lock_irq(&mddev->write_lock);
1821 if (mddev->in_sync != sync_req ||
1822 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
1823
1824 spin_unlock_irq(&mddev->write_lock);
1825 goto repeat;
1826 }
1827 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
1828 spin_unlock_irq(&mddev->write_lock);
1829 wake_up(&mddev->sb_wait);
1830
1831}
1832
1833
1834
1835
1836static int cmd_match(const char *cmd, const char *str)
1837{
1838
1839
1840
1841
1842 while (*cmd && *str && *cmd == *str) {
1843 cmd++;
1844 str++;
1845 }
1846 if (*cmd == '\n')
1847 cmd++;
1848 if (*str || *cmd)
1849 return 0;
1850 return 1;
1851}
1852
1853struct rdev_sysfs_entry {
1854 struct attribute attr;
1855 ssize_t (*show)(mdk_rdev_t *, char *);
1856 ssize_t (*store)(mdk_rdev_t *, const char *, size_t);
1857};
1858
1859static ssize_t
1860state_show(mdk_rdev_t *rdev, char *page)
1861{
1862 char *sep = "";
1863 size_t len = 0;
1864
1865 if (test_bit(Faulty, &rdev->flags)) {
1866 len+= sprintf(page+len, "%sfaulty",sep);
1867 sep = ",";
1868 }
1869 if (test_bit(In_sync, &rdev->flags)) {
1870 len += sprintf(page+len, "%sin_sync",sep);
1871 sep = ",";
1872 }
1873 if (test_bit(WriteMostly, &rdev->flags)) {
1874 len += sprintf(page+len, "%swrite_mostly",sep);
1875 sep = ",";
1876 }
1877 if (test_bit(Blocked, &rdev->flags)) {
1878 len += sprintf(page+len, "%sblocked", sep);
1879 sep = ",";
1880 }
1881 if (!test_bit(Faulty, &rdev->flags) &&
1882 !test_bit(In_sync, &rdev->flags)) {
1883 len += sprintf(page+len, "%sspare", sep);
1884 sep = ",";
1885 }
1886 return len+sprintf(page+len, "\n");
1887}
1888
1889static ssize_t
1890state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1891{
1892
1893
1894
1895
1896
1897
1898
1899
1900 int err = -EINVAL;
1901 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
1902 md_error(rdev->mddev, rdev);
1903 err = 0;
1904 } else if (cmd_match(buf, "remove")) {
1905 if (rdev->raid_disk >= 0)
1906 err = -EBUSY;
1907 else {
1908 mddev_t *mddev = rdev->mddev;
1909 kick_rdev_from_array(rdev);
1910 if (mddev->pers)
1911 md_update_sb(mddev, 1);
1912 md_new_event(mddev);
1913 err = 0;
1914 }
1915 } else if (cmd_match(buf, "writemostly")) {
1916 set_bit(WriteMostly, &rdev->flags);
1917 err = 0;
1918 } else if (cmd_match(buf, "-writemostly")) {
1919 clear_bit(WriteMostly, &rdev->flags);
1920 err = 0;
1921 } else if (cmd_match(buf, "blocked")) {
1922 set_bit(Blocked, &rdev->flags);
1923 err = 0;
1924 } else if (cmd_match(buf, "-blocked")) {
1925 clear_bit(Blocked, &rdev->flags);
1926 wake_up(&rdev->blocked_wait);
1927 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
1928 md_wakeup_thread(rdev->mddev->thread);
1929
1930 err = 0;
1931 }
1932 if (!err && rdev->sysfs_state)
1933 sysfs_notify_dirent(rdev->sysfs_state);
1934 return err ? err : len;
1935}
1936static struct rdev_sysfs_entry rdev_state =
1937__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
1938
1939static ssize_t
1940errors_show(mdk_rdev_t *rdev, char *page)
1941{
1942 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
1943}
1944
1945static ssize_t
1946errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1947{
1948 char *e;
1949 unsigned long n = simple_strtoul(buf, &e, 10);
1950 if (*buf && (*e == 0 || *e == '\n')) {
1951 atomic_set(&rdev->corrected_errors, n);
1952 return len;
1953 }
1954 return -EINVAL;
1955}
1956static struct rdev_sysfs_entry rdev_errors =
1957__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
1958
1959static ssize_t
1960slot_show(mdk_rdev_t *rdev, char *page)
1961{
1962 if (rdev->raid_disk < 0)
1963 return sprintf(page, "none\n");
1964 else
1965 return sprintf(page, "%d\n", rdev->raid_disk);
1966}
1967
1968static ssize_t
1969slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1970{
1971 char *e;
1972 int err;
1973 char nm[20];
1974 int slot = simple_strtoul(buf, &e, 10);
1975 if (strncmp(buf, "none", 4)==0)
1976 slot = -1;
1977 else if (e==buf || (*e && *e!= '\n'))
1978 return -EINVAL;
1979 if (rdev->mddev->pers && slot == -1) {
1980
1981
1982
1983
1984
1985
1986
1987 if (rdev->raid_disk == -1)
1988 return -EEXIST;
1989
1990 if (rdev->mddev->pers->hot_add_disk == NULL)
1991 return -EINVAL;
1992 err = rdev->mddev->pers->
1993 hot_remove_disk(rdev->mddev, rdev->raid_disk);
1994 if (err)
1995 return err;
1996 sprintf(nm, "rd%d", rdev->raid_disk);
1997 sysfs_remove_link(&rdev->mddev->kobj, nm);
1998 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
1999 md_wakeup_thread(rdev->mddev->thread);
2000 } else if (rdev->mddev->pers) {
2001 mdk_rdev_t *rdev2;
2002 struct list_head *tmp;
2003
2004
2005
2006
2007 if (rdev->raid_disk != -1)
2008 return -EBUSY;
2009
2010 if (rdev->mddev->pers->hot_add_disk == NULL)
2011 return -EINVAL;
2012
2013 rdev_for_each(rdev2, tmp, rdev->mddev)
2014 if (rdev2->raid_disk == slot)
2015 return -EEXIST;
2016
2017 rdev->raid_disk = slot;
2018 if (test_bit(In_sync, &rdev->flags))
2019 rdev->saved_raid_disk = slot;
2020 else
2021 rdev->saved_raid_disk = -1;
2022 err = rdev->mddev->pers->
2023 hot_add_disk(rdev->mddev, rdev);
2024 if (err) {
2025 rdev->raid_disk = -1;
2026 return err;
2027 } else
2028 sysfs_notify_dirent(rdev->sysfs_state);
2029 sprintf(nm, "rd%d", rdev->raid_disk);
2030 if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
2031 printk(KERN_WARNING
2032 "md: cannot register "
2033 "%s for %s\n",
2034 nm, mdname(rdev->mddev));
2035
2036
2037 } else {
2038 if (slot >= rdev->mddev->raid_disks)
2039 return -ENOSPC;
2040 rdev->raid_disk = slot;
2041
2042 clear_bit(Faulty, &rdev->flags);
2043 clear_bit(WriteMostly, &rdev->flags);
2044 set_bit(In_sync, &rdev->flags);
2045 sysfs_notify_dirent(rdev->sysfs_state);
2046 }
2047 return len;
2048}
2049
2050
2051static struct rdev_sysfs_entry rdev_slot =
2052__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2053
2054static ssize_t
2055offset_show(mdk_rdev_t *rdev, char *page)
2056{
2057 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2058}
2059
2060static ssize_t
2061offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2062{
2063 char *e;
2064 unsigned long long offset = simple_strtoull(buf, &e, 10);
2065 if (e==buf || (*e && *e != '\n'))
2066 return -EINVAL;
2067 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2068 return -EBUSY;
2069 if (rdev->size && rdev->mddev->external)
2070
2071
2072 return -EBUSY;
2073 rdev->data_offset = offset;
2074 return len;
2075}
2076
2077static struct rdev_sysfs_entry rdev_offset =
2078__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2079
2080static ssize_t
2081rdev_size_show(mdk_rdev_t *rdev, char *page)
2082{
2083 return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
2084}
2085
2086static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2087{
2088
2089 if (s1+l1 <= s2)
2090 return 0;
2091 if (s2+l2 <= s1)
2092 return 0;
2093 return 1;
2094}
2095
2096static ssize_t
2097rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2098{
2099 unsigned long long size;
2100 unsigned long long oldsize = rdev->size;
2101 mddev_t *my_mddev = rdev->mddev;
2102
2103 if (strict_strtoull(buf, 10, &size) < 0)
2104 return -EINVAL;
2105 if (my_mddev->pers && rdev->raid_disk >= 0) {
2106 if (my_mddev->persistent) {
2107 size = super_types[my_mddev->major_version].
2108 rdev_size_change(rdev, size * 2);
2109 if (!size)
2110 return -EBUSY;
2111 } else if (!size) {
2112 size = (rdev->bdev->bd_inode->i_size >> 10);
2113 size -= rdev->data_offset/2;
2114 }
2115 }
2116 if (size < my_mddev->size)
2117 return -EINVAL;
2118
2119 rdev->size = size;
2120 if (size > oldsize && my_mddev->external) {
2121
2122
2123
2124
2125
2126 mddev_t *mddev;
2127 int overlap = 0;
2128 struct list_head *tmp, *tmp2;
2129
2130 mddev_unlock(my_mddev);
2131 for_each_mddev(mddev, tmp) {
2132 mdk_rdev_t *rdev2;
2133
2134 mddev_lock(mddev);
2135 rdev_for_each(rdev2, tmp2, mddev)
2136 if (test_bit(AllReserved, &rdev2->flags) ||
2137 (rdev->bdev == rdev2->bdev &&
2138 rdev != rdev2 &&
2139 overlaps(rdev->data_offset, rdev->size * 2,
2140 rdev2->data_offset,
2141 rdev2->size * 2))) {
2142 overlap = 1;
2143 break;
2144 }
2145 mddev_unlock(mddev);
2146 if (overlap) {
2147 mddev_put(mddev);
2148 break;
2149 }
2150 }
2151 mddev_lock(my_mddev);
2152 if (overlap) {
2153
2154
2155
2156
2157
2158
2159 rdev->size = oldsize;
2160 return -EBUSY;
2161 }
2162 }
2163 return len;
2164}
2165
2166static struct rdev_sysfs_entry rdev_size =
2167__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
2168
2169static struct attribute *rdev_default_attrs[] = {
2170 &rdev_state.attr,
2171 &rdev_errors.attr,
2172 &rdev_slot.attr,
2173 &rdev_offset.attr,
2174 &rdev_size.attr,
2175 NULL,
2176};
2177static ssize_t
2178rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
2179{
2180 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2181 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2182 mddev_t *mddev = rdev->mddev;
2183 ssize_t rv;
2184
2185 if (!entry->show)
2186 return -EIO;
2187
2188 rv = mddev ? mddev_lock(mddev) : -EBUSY;
2189 if (!rv) {
2190 if (rdev->mddev == NULL)
2191 rv = -EBUSY;
2192 else
2193 rv = entry->show(rdev, page);
2194 mddev_unlock(mddev);
2195 }
2196 return rv;
2197}
2198
2199static ssize_t
2200rdev_attr_store(struct kobject *kobj, struct attribute *attr,
2201 const char *page, size_t length)
2202{
2203 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2204 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2205 ssize_t rv;
2206 mddev_t *mddev = rdev->mddev;
2207
2208 if (!entry->store)
2209 return -EIO;
2210 if (!capable(CAP_SYS_ADMIN))
2211 return -EACCES;
2212 rv = mddev ? mddev_lock(mddev): -EBUSY;
2213 if (!rv) {
2214 if (rdev->mddev == NULL)
2215 rv = -EBUSY;
2216 else
2217 rv = entry->store(rdev, page, length);
2218 mddev_unlock(mddev);
2219 }
2220 return rv;
2221}
2222
2223static void rdev_free(struct kobject *ko)
2224{
2225 mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
2226 kfree(rdev);
2227}
2228static struct sysfs_ops rdev_sysfs_ops = {
2229 .show = rdev_attr_show,
2230 .store = rdev_attr_store,
2231};
2232static struct kobj_type rdev_ktype = {
2233 .release = rdev_free,
2234 .sysfs_ops = &rdev_sysfs_ops,
2235 .default_attrs = rdev_default_attrs,
2236};
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_minor)
2249{
2250 char b[BDEVNAME_SIZE];
2251 int err;
2252 mdk_rdev_t *rdev;
2253 sector_t size;
2254
2255 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
2256 if (!rdev) {
2257 printk(KERN_ERR "md: could not alloc mem for new device!\n");
2258 return ERR_PTR(-ENOMEM);
2259 }
2260
2261 if ((err = alloc_disk_sb(rdev)))
2262 goto abort_free;
2263
2264 err = lock_rdev(rdev, newdev, super_format == -2);
2265 if (err)
2266 goto abort_free;
2267
2268 kobject_init(&rdev->kobj, &rdev_ktype);
2269
2270 rdev->desc_nr = -1;
2271 rdev->saved_raid_disk = -1;
2272 rdev->raid_disk = -1;
2273 rdev->flags = 0;
2274 rdev->data_offset = 0;
2275 rdev->sb_events = 0;
2276 atomic_set(&rdev->nr_pending, 0);
2277 atomic_set(&rdev->read_errors, 0);
2278 atomic_set(&rdev->corrected_errors, 0);
2279
2280 size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
2281 if (!size) {
2282 printk(KERN_WARNING
2283 "md: %s has zero or unknown size, marking faulty!\n",
2284 bdevname(rdev->bdev,b));
2285 err = -EINVAL;
2286 goto abort_free;
2287 }
2288
2289 if (super_format >= 0) {
2290 err = super_types[super_format].
2291 load_super(rdev, NULL, super_minor);
2292 if (err == -EINVAL) {
2293 printk(KERN_WARNING
2294 "md: %s does not have a valid v%d.%d "
2295 "superblock, not importing!\n",
2296 bdevname(rdev->bdev,b),
2297 super_format, super_minor);
2298 goto abort_free;
2299 }
2300 if (err < 0) {
2301 printk(KERN_WARNING
2302 "md: could not read %s's sb, not importing!\n",
2303 bdevname(rdev->bdev,b));
2304 goto abort_free;
2305 }
2306 }
2307
2308 INIT_LIST_HEAD(&rdev->same_set);
2309 init_waitqueue_head(&rdev->blocked_wait);
2310
2311 return rdev;
2312
2313abort_free:
2314 if (rdev->sb_page) {
2315 if (rdev->bdev)
2316 unlock_rdev(rdev);
2317 free_disk_sb(rdev);
2318 }
2319 kfree(rdev);
2320 return ERR_PTR(err);
2321}
2322
2323
2324
2325
2326
2327
2328static void analyze_sbs(mddev_t * mddev)
2329{
2330 int i;
2331 struct list_head *tmp;
2332 mdk_rdev_t *rdev, *freshest;
2333 char b[BDEVNAME_SIZE];
2334
2335 freshest = NULL;
2336 rdev_for_each(rdev, tmp, mddev)
2337 switch (super_types[mddev->major_version].
2338 load_super(rdev, freshest, mddev->minor_version)) {
2339 case 1:
2340 freshest = rdev;
2341 break;
2342 case 0:
2343 break;
2344 default:
2345 printk( KERN_ERR \
2346 "md: fatal superblock inconsistency in %s"
2347 " -- removing from array\n",
2348 bdevname(rdev->bdev,b));
2349 kick_rdev_from_array(rdev);
2350 }
2351
2352
2353 super_types[mddev->major_version].
2354 validate_super(mddev, freshest);
2355
2356 i = 0;
2357 rdev_for_each(rdev, tmp, mddev) {
2358 if (rdev != freshest)
2359 if (super_types[mddev->major_version].
2360 validate_super(mddev, rdev)) {
2361 printk(KERN_WARNING "md: kicking non-fresh %s"
2362 " from array!\n",
2363 bdevname(rdev->bdev,b));
2364 kick_rdev_from_array(rdev);
2365 continue;
2366 }
2367 if (mddev->level == LEVEL_MULTIPATH) {
2368 rdev->desc_nr = i++;
2369 rdev->raid_disk = rdev->desc_nr;
2370 set_bit(In_sync, &rdev->flags);
2371 } else if (rdev->raid_disk >= mddev->raid_disks) {
2372 rdev->raid_disk = -1;
2373 clear_bit(In_sync, &rdev->flags);
2374 }
2375 }
2376
2377
2378
2379 if (mddev->recovery_cp != MaxSector &&
2380 mddev->level >= 1)
2381 printk(KERN_ERR "md: %s: raid array is not clean"
2382 " -- starting background reconstruction\n",
2383 mdname(mddev));
2384
2385}
2386
2387static void md_safemode_timeout(unsigned long data);
2388
2389static ssize_t
2390safe_delay_show(mddev_t *mddev, char *page)
2391{
2392 int msec = (mddev->safemode_delay*1000)/HZ;
2393 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
2394}
2395static ssize_t
2396safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
2397{
2398 int scale=1;
2399 int dot=0;
2400 int i;
2401 unsigned long msec;
2402 char buf[30];
2403
2404
2405 if (len >= sizeof(buf))
2406 return -EINVAL;
2407 strlcpy(buf, cbuf, sizeof(buf));
2408 for (i=0; i<len; i++) {
2409 if (dot) {
2410 if (isdigit(buf[i])) {
2411 buf[i-1] = buf[i];
2412 scale *= 10;
2413 }
2414 buf[i] = 0;
2415 } else if (buf[i] == '.') {
2416 dot=1;
2417 buf[i] = 0;
2418 }
2419 }
2420 if (strict_strtoul(buf, 10, &msec) < 0)
2421 return -EINVAL;
2422 msec = (msec * 1000) / scale;
2423 if (msec == 0)
2424 mddev->safemode_delay = 0;
2425 else {
2426 unsigned long old_delay = mddev->safemode_delay;
2427 mddev->safemode_delay = (msec*HZ)/1000;
2428 if (mddev->safemode_delay == 0)
2429 mddev->safemode_delay = 1;
2430 if (mddev->safemode_delay < old_delay)
2431 md_safemode_timeout((unsigned long)mddev);
2432 }
2433 return len;
2434}
2435static struct md_sysfs_entry md_safe_delay =
2436__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
2437
2438static ssize_t
2439level_show(mddev_t *mddev, char *page)
2440{
2441 struct mdk_personality *p = mddev->pers;
2442 if (p)
2443 return sprintf(page, "%s\n", p->name);
2444 else if (mddev->clevel[0])
2445 return sprintf(page, "%s\n", mddev->clevel);
2446 else if (mddev->level != LEVEL_NONE)
2447 return sprintf(page, "%d\n", mddev->level);
2448 else
2449 return 0;
2450}
2451
2452static ssize_t
2453level_store(mddev_t *mddev, const char *buf, size_t len)
2454{
2455 ssize_t rv = len;
2456 if (mddev->pers)
2457 return -EBUSY;
2458 if (len == 0)
2459 return 0;
2460 if (len >= sizeof(mddev->clevel))
2461 return -ENOSPC;
2462 strncpy(mddev->clevel, buf, len);
2463 if (mddev->clevel[len-1] == '\n')
2464 len--;
2465 mddev->clevel[len] = 0;
2466 mddev->level = LEVEL_NONE;
2467 return rv;
2468}
2469
2470static struct md_sysfs_entry md_level =
2471__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
2472
2473
2474static ssize_t
2475layout_show(mddev_t *mddev, char *page)
2476{
2477
2478 if (mddev->reshape_position != MaxSector &&
2479 mddev->layout != mddev->new_layout)
2480 return sprintf(page, "%d (%d)\n",
2481 mddev->new_layout, mddev->layout);
2482 return sprintf(page, "%d\n", mddev->layout);
2483}
2484
2485static ssize_t
2486layout_store(mddev_t *mddev, const char *buf, size_t len)
2487{
2488 char *e;
2489 unsigned long n = simple_strtoul(buf, &e, 10);
2490
2491 if (!*buf || (*e && *e != '\n'))
2492 return -EINVAL;
2493
2494 if (mddev->pers)
2495 return -EBUSY;
2496 if (mddev->reshape_position != MaxSector)
2497 mddev->new_layout = n;
2498 else
2499 mddev->layout = n;
2500 return len;
2501}
2502static struct md_sysfs_entry md_layout =
2503__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
2504
2505
2506static ssize_t
2507raid_disks_show(mddev_t *mddev, char *page)
2508{
2509 if (mddev->raid_disks == 0)
2510 return 0;
2511 if (mddev->reshape_position != MaxSector &&
2512 mddev->delta_disks != 0)
2513 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
2514 mddev->raid_disks - mddev->delta_disks);
2515 return sprintf(page, "%d\n", mddev->raid_disks);
2516}
2517
2518static int update_raid_disks(mddev_t *mddev, int raid_disks);
2519
2520static ssize_t
2521raid_disks_store(mddev_t *mddev, const char *buf, size_t len)
2522{
2523 char *e;
2524 int rv = 0;
2525 unsigned long n = simple_strtoul(buf, &e, 10);
2526
2527 if (!*buf || (*e && *e != '\n'))
2528 return -EINVAL;
2529
2530 if (mddev->pers)
2531 rv = update_raid_disks(mddev, n);
2532 else if (mddev->reshape_position != MaxSector) {
2533 int olddisks = mddev->raid_disks - mddev->delta_disks;
2534 mddev->delta_disks = n - olddisks;
2535 mddev->raid_disks = n;
2536 } else
2537 mddev->raid_disks = n;
2538 return rv ? rv : len;
2539}
2540static struct md_sysfs_entry md_raid_disks =
2541__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
2542
2543static ssize_t
2544chunk_size_show(mddev_t *mddev, char *page)
2545{
2546 if (mddev->reshape_position != MaxSector &&
2547 mddev->chunk_size != mddev->new_chunk)
2548 return sprintf(page, "%d (%d)\n", mddev->new_chunk,
2549 mddev->chunk_size);
2550 return sprintf(page, "%d\n", mddev->chunk_size);
2551}
2552
2553static ssize_t
2554chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
2555{
2556
2557 char *e;
2558 unsigned long n = simple_strtoul(buf, &e, 10);
2559
2560 if (!*buf || (*e && *e != '\n'))
2561 return -EINVAL;
2562
2563 if (mddev->pers)
2564 return -EBUSY;
2565 else if (mddev->reshape_position != MaxSector)
2566 mddev->new_chunk = n;
2567 else
2568 mddev->chunk_size = n;
2569 return len;
2570}
2571static struct md_sysfs_entry md_chunk_size =
2572__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
2573
2574static ssize_t
2575resync_start_show(mddev_t *mddev, char *page)
2576{
2577 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
2578}
2579
2580static ssize_t
2581resync_start_store(mddev_t *mddev, const char *buf, size_t len)
2582{
2583 char *e;
2584 unsigned long long n = simple_strtoull(buf, &e, 10);
2585
2586 if (mddev->pers)
2587 return -EBUSY;
2588 if (!*buf || (*e && *e != '\n'))
2589 return -EINVAL;
2590
2591 mddev->recovery_cp = n;
2592 return len;
2593}
2594static struct md_sysfs_entry md_resync_start =
2595__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
2634 write_pending, active_idle, bad_word};
2635static char *array_states[] = {
2636 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
2637 "write-pending", "active-idle", NULL };
2638
2639static int match_word(const char *word, char **list)
2640{
2641 int n;
2642 for (n=0; list[n]; n++)
2643 if (cmd_match(word, list[n]))
2644 break;
2645 return n;
2646}
2647
2648static ssize_t
2649array_state_show(mddev_t *mddev, char *page)
2650{
2651 enum array_state st = inactive;
2652
2653 if (mddev->pers)
2654 switch(mddev->ro) {
2655 case 1:
2656 st = readonly;
2657 break;
2658 case 2:
2659 st = read_auto;
2660 break;
2661 case 0:
2662 if (mddev->in_sync)
2663 st = clean;
2664 else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
2665 st = write_pending;
2666 else if (mddev->safemode)
2667 st = active_idle;
2668 else
2669 st = active;
2670 }
2671 else {
2672 if (list_empty(&mddev->disks) &&
2673 mddev->raid_disks == 0 &&
2674 mddev->size == 0)
2675 st = clear;
2676 else
2677 st = inactive;
2678 }
2679 return sprintf(page, "%s\n", array_states[st]);
2680}
2681
2682static int do_md_stop(mddev_t * mddev, int ro, int is_open);
2683static int do_md_run(mddev_t * mddev);
2684static int restart_array(mddev_t *mddev);
2685
2686static ssize_t
2687array_state_store(mddev_t *mddev, const char *buf, size_t len)
2688{
2689 int err = -EINVAL;
2690 enum array_state st = match_word(buf, array_states);
2691 switch(st) {
2692 case bad_word:
2693 break;
2694 case clear:
2695
2696 if (atomic_read(&mddev->openers) > 0)
2697 return -EBUSY;
2698 err = do_md_stop(mddev, 0, 0);
2699 break;
2700 case inactive:
2701
2702 if (mddev->pers) {
2703 if (atomic_read(&mddev->openers) > 0)
2704 return -EBUSY;
2705 err = do_md_stop(mddev, 2, 0);
2706 } else
2707 err = 0;
2708 break;
2709 case suspended:
2710 break;
2711 case readonly:
2712 if (mddev->pers)
2713 err = do_md_stop(mddev, 1, 0);
2714 else {
2715 mddev->ro = 1;
2716 set_disk_ro(mddev->gendisk, 1);
2717 err = do_md_run(mddev);
2718 }
2719 break;
2720 case read_auto:
2721 if (mddev->pers) {
2722 if (mddev->ro == 0)
2723 err = do_md_stop(mddev, 1, 0);
2724 else if (mddev->ro == 1)
2725 err = restart_array(mddev);
2726 if (err == 0) {
2727 mddev->ro = 2;
2728 set_disk_ro(mddev->gendisk, 0);
2729 }
2730 } else {
2731 mddev->ro = 2;
2732 err = do_md_run(mddev);
2733 }
2734 break;
2735 case clean:
2736 if (mddev->pers) {
2737 restart_array(mddev);
2738 spin_lock_irq(&mddev->write_lock);
2739 if (atomic_read(&mddev->writes_pending) == 0) {
2740 if (mddev->in_sync == 0) {
2741 mddev->in_sync = 1;
2742 if (mddev->safemode == 1)
2743 mddev->safemode = 0;
2744 if (mddev->persistent)
2745 set_bit(MD_CHANGE_CLEAN,
2746 &mddev->flags);
2747 }
2748 err = 0;
2749 } else
2750 err = -EBUSY;
2751 spin_unlock_irq(&mddev->write_lock);
2752 } else {
2753 mddev->ro = 0;
2754 mddev->recovery_cp = MaxSector;
2755 err = do_md_run(mddev);
2756 }
2757 break;
2758 case active:
2759 if (mddev->pers) {
2760 restart_array(mddev);
2761 if (mddev->external)
2762 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2763 wake_up(&mddev->sb_wait);
2764 err = 0;
2765 } else {
2766 mddev->ro = 0;
2767 set_disk_ro(mddev->gendisk, 0);
2768 err = do_md_run(mddev);
2769 }
2770 break;
2771 case write_pending:
2772 case active_idle:
2773
2774 break;
2775 }
2776 if (err)
2777 return err;
2778 else {
2779 sysfs_notify_dirent(mddev->sysfs_state);
2780 return len;
2781 }
2782}
2783static struct md_sysfs_entry md_array_state =
2784__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
2785
2786static ssize_t
2787null_show(mddev_t *mddev, char *page)
2788{
2789 return -EINVAL;
2790}
2791
2792static ssize_t
2793new_dev_store(mddev_t *mddev, const char *buf, size_t len)
2794{
2795
2796
2797
2798
2799
2800
2801
2802 char *e;
2803 int major = simple_strtoul(buf, &e, 10);
2804 int minor;
2805 dev_t dev;
2806 mdk_rdev_t *rdev;
2807 int err;
2808
2809 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
2810 return -EINVAL;
2811 minor = simple_strtoul(e+1, &e, 10);
2812 if (*e && *e != '\n')
2813 return -EINVAL;
2814 dev = MKDEV(major, minor);
2815 if (major != MAJOR(dev) ||
2816 minor != MINOR(dev))
2817 return -EOVERFLOW;
2818
2819
2820 if (mddev->persistent) {
2821 rdev = md_import_device(dev, mddev->major_version,
2822 mddev->minor_version);
2823 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
2824 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
2825 mdk_rdev_t, same_set);
2826 err = super_types[mddev->major_version]
2827 .load_super(rdev, rdev0, mddev->minor_version);
2828 if (err < 0)
2829 goto out;
2830 }
2831 } else if (mddev->external)
2832 rdev = md_import_device(dev, -2, -1);
2833 else
2834 rdev = md_import_device(dev, -1, -1);
2835
2836 if (IS_ERR(rdev))
2837 return PTR_ERR(rdev);
2838 err = bind_rdev_to_array(rdev, mddev);
2839 out:
2840 if (err)
2841 export_rdev(rdev);
2842 return err ? err : len;
2843}
2844
2845static struct md_sysfs_entry md_new_device =
2846__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
2847
2848static ssize_t
2849bitmap_store(mddev_t *mddev, const char *buf, size_t len)
2850{
2851 char *end;
2852 unsigned long chunk, end_chunk;
2853
2854 if (!mddev->bitmap)
2855 goto out;
2856
2857 while (*buf) {
2858 chunk = end_chunk = simple_strtoul(buf, &end, 0);
2859 if (buf == end) break;
2860 if (*end == '-') {
2861 buf = end + 1;
2862 end_chunk = simple_strtoul(buf, &end, 0);
2863 if (buf == end) break;
2864 }
2865 if (*end && !isspace(*end)) break;
2866 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
2867 buf = end;
2868 while (isspace(*buf)) buf++;
2869 }
2870 bitmap_unplug(mddev->bitmap);
2871out:
2872 return len;
2873}
2874
2875static struct md_sysfs_entry md_bitmap =
2876__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
2877
2878static ssize_t
2879size_show(mddev_t *mddev, char *page)
2880{
2881 return sprintf(page, "%llu\n", (unsigned long long)mddev->size);
2882}
2883
2884static int update_size(mddev_t *mddev, sector_t num_sectors);
2885
2886static ssize_t
2887size_store(mddev_t *mddev, const char *buf, size_t len)
2888{
2889
2890
2891
2892
2893 char *e;
2894 int err = 0;
2895 unsigned long long size = simple_strtoull(buf, &e, 10);
2896 if (!*buf || *buf == '\n' ||
2897 (*e && *e != '\n'))
2898 return -EINVAL;
2899
2900 if (mddev->pers) {
2901 err = update_size(mddev, size * 2);
2902 md_update_sb(mddev, 1);
2903 } else {
2904 if (mddev->size == 0 ||
2905 mddev->size > size)
2906 mddev->size = size;
2907 else
2908 err = -ENOSPC;
2909 }
2910 return err ? err : len;
2911}
2912
2913static struct md_sysfs_entry md_size =
2914__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
2915
2916
2917
2918
2919
2920
2921
2922
2923static ssize_t
2924metadata_show(mddev_t *mddev, char *page)
2925{
2926 if (mddev->persistent)
2927 return sprintf(page, "%d.%d\n",
2928 mddev->major_version, mddev->minor_version);
2929 else if (mddev->external)
2930 return sprintf(page, "external:%s\n", mddev->metadata_type);
2931 else
2932 return sprintf(page, "none\n");
2933}
2934
2935static ssize_t
2936metadata_store(mddev_t *mddev, const char *buf, size_t len)
2937{
2938 int major, minor;
2939 char *e;
2940
2941
2942
2943
2944 if (mddev->external && strncmp(buf, "external:", 9) == 0)
2945 ;
2946 else if (!list_empty(&mddev->disks))
2947 return -EBUSY;
2948
2949 if (cmd_match(buf, "none")) {
2950 mddev->persistent = 0;
2951 mddev->external = 0;
2952 mddev->major_version = 0;
2953 mddev->minor_version = 90;
2954 return len;
2955 }
2956 if (strncmp(buf, "external:", 9) == 0) {
2957 size_t namelen = len-9;
2958 if (namelen >= sizeof(mddev->metadata_type))
2959 namelen = sizeof(mddev->metadata_type)-1;
2960 strncpy(mddev->metadata_type, buf+9, namelen);
2961 mddev->metadata_type[namelen] = 0;
2962 if (namelen && mddev->metadata_type[namelen-1] == '\n')
2963 mddev->metadata_type[--namelen] = 0;
2964 mddev->persistent = 0;
2965 mddev->external = 1;
2966 mddev->major_version = 0;
2967 mddev->minor_version = 90;
2968 return len;
2969 }
2970 major = simple_strtoul(buf, &e, 10);
2971 if (e==buf || *e != '.')
2972 return -EINVAL;
2973 buf = e+1;
2974 minor = simple_strtoul(buf, &e, 10);
2975 if (e==buf || (*e && *e != '\n') )
2976 return -EINVAL;
2977 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
2978 return -ENOENT;
2979 mddev->major_version = major;
2980 mddev->minor_version = minor;
2981 mddev->persistent = 1;
2982 mddev->external = 0;
2983 return len;
2984}
2985
2986static struct md_sysfs_entry md_metadata =
2987__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2988
2989static ssize_t
2990action_show(mddev_t *mddev, char *page)
2991{
2992 char *type = "idle";
2993 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
2994 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
2995 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
2996 type = "reshape";
2997 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
2998 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
2999 type = "resync";
3000 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
3001 type = "check";
3002 else
3003 type = "repair";
3004 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
3005 type = "recover";
3006 }
3007 return sprintf(page, "%s\n", type);
3008}
3009
3010static ssize_t
3011action_store(mddev_t *mddev, const char *page, size_t len)
3012{
3013 if (!mddev->pers || !mddev->pers->sync_request)
3014 return -EINVAL;
3015
3016 if (cmd_match(page, "idle")) {
3017 if (mddev->sync_thread) {
3018 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3019 md_unregister_thread(mddev->sync_thread);
3020 mddev->sync_thread = NULL;
3021 mddev->recovery = 0;
3022 }
3023 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3024 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
3025 return -EBUSY;
3026 else if (cmd_match(page, "resync"))
3027 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3028 else if (cmd_match(page, "recover")) {
3029 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
3030 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3031 } else if (cmd_match(page, "reshape")) {
3032 int err;
3033 if (mddev->pers->start_reshape == NULL)
3034 return -EINVAL;
3035 err = mddev->pers->start_reshape(mddev);
3036 if (err)
3037 return err;
3038 sysfs_notify(&mddev->kobj, NULL, "degraded");
3039 } else {
3040 if (cmd_match(page, "check"))
3041 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
3042 else if (!cmd_match(page, "repair"))
3043 return -EINVAL;
3044 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
3045 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
3046 }
3047 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3048 md_wakeup_thread(mddev->thread);
3049 sysfs_notify(&mddev->kobj, NULL, "sync_action");
3050 return len;
3051}
3052
3053static ssize_t
3054mismatch_cnt_show(mddev_t *mddev, char *page)
3055{
3056 return sprintf(page, "%llu\n",
3057 (unsigned long long) mddev->resync_mismatches);
3058}
3059
3060static struct md_sysfs_entry md_scan_mode =
3061__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
3062
3063
3064static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
3065
3066static ssize_t
3067sync_min_show(mddev_t *mddev, char *page)
3068{
3069 return sprintf(page, "%d (%s)\n", speed_min(mddev),
3070 mddev->sync_speed_min ? "local": "system");
3071}
3072
3073static ssize_t
3074sync_min_store(mddev_t *mddev, const char *buf, size_t len)
3075{
3076 int min;
3077 char *e;
3078 if (strncmp(buf, "system", 6)==0) {
3079 mddev->sync_speed_min = 0;
3080 return len;
3081 }
3082 min = simple_strtoul(buf, &e, 10);
3083 if (buf == e || (*e && *e != '\n') || min <= 0)
3084 return -EINVAL;
3085 mddev->sync_speed_min = min;
3086 return len;
3087}
3088
3089static struct md_sysfs_entry md_sync_min =
3090__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
3091
3092static ssize_t
3093sync_max_show(mddev_t *mddev, char *page)
3094{
3095 return sprintf(page, "%d (%s)\n", speed_max(mddev),
3096 mddev->sync_speed_max ? "local": "system");
3097}
3098
3099static ssize_t
3100sync_max_store(mddev_t *mddev, const char *buf, size_t len)
3101{
3102 int max;
3103 char *e;
3104 if (strncmp(buf, "system", 6)==0) {
3105 mddev->sync_speed_max = 0;
3106 return len;
3107 }
3108 max = simple_strtoul(buf, &e, 10);
3109 if (buf == e || (*e && *e != '\n') || max <= 0)
3110 return -EINVAL;
3111 mddev->sync_speed_max = max;
3112 return len;
3113}
3114
3115static struct md_sysfs_entry md_sync_max =
3116__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
3117
3118static ssize_t
3119degraded_show(mddev_t *mddev, char *page)
3120{
3121 return sprintf(page, "%d\n", mddev->degraded);
3122}
3123static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
3124
3125static ssize_t
3126sync_force_parallel_show(mddev_t *mddev, char *page)
3127{
3128 return sprintf(page, "%d\n", mddev->parallel_resync);
3129}
3130
3131static ssize_t
3132sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
3133{
3134 long n;
3135
3136 if (strict_strtol(buf, 10, &n))
3137 return -EINVAL;
3138
3139 if (n != 0 && n != 1)
3140 return -EINVAL;
3141
3142 mddev->parallel_resync = n;
3143
3144 if (mddev->sync_thread)
3145 wake_up(&resync_wait);
3146
3147 return len;
3148}
3149
3150
3151static struct md_sysfs_entry md_sync_force_parallel =
3152__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
3153 sync_force_parallel_show, sync_force_parallel_store);
3154
3155static ssize_t
3156sync_speed_show(mddev_t *mddev, char *page)
3157{
3158 unsigned long resync, dt, db;
3159 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
3160 dt = (jiffies - mddev->resync_mark) / HZ;
3161 if (!dt) dt++;
3162 db = resync - mddev->resync_mark_cnt;
3163 return sprintf(page, "%lu\n", db/dt/2);
3164}
3165
3166static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
3167
3168static ssize_t
3169sync_completed_show(mddev_t *mddev, char *page)
3170{
3171 unsigned long max_blocks, resync;
3172
3173 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
3174 max_blocks = mddev->resync_max_sectors;
3175 else
3176 max_blocks = mddev->size << 1;
3177
3178 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
3179 return sprintf(page, "%lu / %lu\n", resync, max_blocks);
3180}
3181
3182static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
3183
3184static ssize_t
3185min_sync_show(mddev_t *mddev, char *page)
3186{
3187 return sprintf(page, "%llu\n",
3188 (unsigned long long)mddev->resync_min);
3189}
3190static ssize_t
3191min_sync_store(mddev_t *mddev, const char *buf, size_t len)
3192{
3193 unsigned long long min;
3194 if (strict_strtoull(buf, 10, &min))
3195 return -EINVAL;
3196 if (min > mddev->resync_max)
3197 return -EINVAL;
3198 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3199 return -EBUSY;
3200
3201
3202 if (mddev->chunk_size) {
3203 if (min & (sector_t)((mddev->chunk_size>>9)-1))
3204 return -EINVAL;
3205 }
3206 mddev->resync_min = min;
3207
3208 return len;
3209}
3210
3211static struct md_sysfs_entry md_min_sync =
3212__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
3213
3214static ssize_t
3215max_sync_show(mddev_t *mddev, char *page)
3216{
3217 if (mddev->resync_max == MaxSector)
3218 return sprintf(page, "max\n");
3219 else
3220 return sprintf(page, "%llu\n",
3221 (unsigned long long)mddev->resync_max);
3222}
3223static ssize_t
3224max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3225{
3226 if (strncmp(buf, "max", 3) == 0)
3227 mddev->resync_max = MaxSector;
3228 else {
3229 unsigned long long max;
3230 if (strict_strtoull(buf, 10, &max))
3231 return -EINVAL;
3232 if (max < mddev->resync_min)
3233 return -EINVAL;
3234 if (max < mddev->resync_max &&
3235 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3236 return -EBUSY;
3237
3238
3239 if (mddev->chunk_size) {
3240 if (max & (sector_t)((mddev->chunk_size>>9)-1))
3241 return -EINVAL;
3242 }
3243 mddev->resync_max = max;
3244 }
3245 wake_up(&mddev->recovery_wait);
3246 return len;
3247}
3248
3249static struct md_sysfs_entry md_max_sync =
3250__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
3251
3252static ssize_t
3253suspend_lo_show(mddev_t *mddev, char *page)
3254{
3255 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
3256}
3257
3258static ssize_t
3259suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
3260{
3261 char *e;
3262 unsigned long long new = simple_strtoull(buf, &e, 10);
3263
3264 if (mddev->pers->quiesce == NULL)
3265 return -EINVAL;
3266 if (buf == e || (*e && *e != '\n'))
3267 return -EINVAL;
3268 if (new >= mddev->suspend_hi ||
3269 (new > mddev->suspend_lo && new < mddev->suspend_hi)) {
3270 mddev->suspend_lo = new;
3271 mddev->pers->quiesce(mddev, 2);
3272 return len;
3273 } else
3274 return -EINVAL;
3275}
3276static struct md_sysfs_entry md_suspend_lo =
3277__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
3278
3279
3280static ssize_t
3281suspend_hi_show(mddev_t *mddev, char *page)
3282{
3283 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
3284}
3285
3286static ssize_t
3287suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
3288{
3289 char *e;
3290 unsigned long long new = simple_strtoull(buf, &e, 10);
3291
3292 if (mddev->pers->quiesce == NULL)
3293 return -EINVAL;
3294 if (buf == e || (*e && *e != '\n'))
3295 return -EINVAL;
3296 if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) ||
3297 (new > mddev->suspend_lo && new > mddev->suspend_hi)) {
3298 mddev->suspend_hi = new;
3299 mddev->pers->quiesce(mddev, 1);
3300 mddev->pers->quiesce(mddev, 0);
3301 return len;
3302 } else
3303 return -EINVAL;
3304}
3305static struct md_sysfs_entry md_suspend_hi =
3306__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
3307
3308static ssize_t
3309reshape_position_show(mddev_t *mddev, char *page)
3310{
3311 if (mddev->reshape_position != MaxSector)
3312 return sprintf(page, "%llu\n",
3313 (unsigned long long)mddev->reshape_position);
3314 strcpy(page, "none\n");
3315 return 5;
3316}
3317
3318static ssize_t
3319reshape_position_store(mddev_t *mddev, const char *buf, size_t len)
3320{
3321 char *e;
3322 unsigned long long new = simple_strtoull(buf, &e, 10);
3323 if (mddev->pers)
3324 return -EBUSY;
3325 if (buf == e || (*e && *e != '\n'))
3326 return -EINVAL;
3327 mddev->reshape_position = new;
3328 mddev->delta_disks = 0;
3329 mddev->new_level = mddev->level;
3330 mddev->new_layout = mddev->layout;
3331 mddev->new_chunk = mddev->chunk_size;
3332 return len;
3333}
3334
3335static struct md_sysfs_entry md_reshape_position =
3336__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
3337 reshape_position_store);
3338
3339
3340static struct attribute *md_default_attrs[] = {
3341 &md_level.attr,
3342 &md_layout.attr,
3343 &md_raid_disks.attr,
3344 &md_chunk_size.attr,
3345 &md_size.attr,
3346 &md_resync_start.attr,
3347 &md_metadata.attr,
3348 &md_new_device.attr,
3349 &md_safe_delay.attr,
3350 &md_array_state.attr,
3351 &md_reshape_position.attr,
3352 NULL,
3353};
3354
3355static struct attribute *md_redundancy_attrs[] = {
3356 &md_scan_mode.attr,
3357 &md_mismatches.attr,
3358 &md_sync_min.attr,
3359 &md_sync_max.attr,
3360 &md_sync_speed.attr,
3361 &md_sync_force_parallel.attr,
3362 &md_sync_completed.attr,
3363 &md_min_sync.attr,
3364 &md_max_sync.attr,
3365 &md_suspend_lo.attr,
3366 &md_suspend_hi.attr,
3367 &md_bitmap.attr,
3368 &md_degraded.attr,
3369 NULL,
3370};
3371static struct attribute_group md_redundancy_group = {
3372 .name = NULL,
3373 .attrs = md_redundancy_attrs,
3374};
3375
3376
3377static ssize_t
3378md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3379{
3380 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
3381 mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
3382 ssize_t rv;
3383
3384 if (!entry->show)
3385 return -EIO;
3386 rv = mddev_lock(mddev);
3387 if (!rv) {
3388 rv = entry->show(mddev, page);
3389 mddev_unlock(mddev);
3390 }
3391 return rv;
3392}
3393
3394static ssize_t
3395md_attr_store(struct kobject *kobj, struct attribute *attr,
3396 const char *page, size_t length)
3397{
3398 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
3399 mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
3400 ssize_t rv;
3401
3402 if (!entry->store)
3403 return -EIO;
3404 if (!capable(CAP_SYS_ADMIN))
3405 return -EACCES;
3406 rv = mddev_lock(mddev);
3407 if (!rv) {
3408 rv = entry->store(mddev, page, length);
3409 mddev_unlock(mddev);
3410 }
3411 return rv;
3412}
3413
3414static void md_free(struct kobject *ko)
3415{
3416 mddev_t *mddev = container_of(ko, mddev_t, kobj);
3417 kfree(mddev);
3418}
3419
3420static struct sysfs_ops md_sysfs_ops = {
3421 .show = md_attr_show,
3422 .store = md_attr_store,
3423};
3424static struct kobj_type md_ktype = {
3425 .release = md_free,
3426 .sysfs_ops = &md_sysfs_ops,
3427 .default_attrs = md_default_attrs,
3428};
3429
3430int mdp_major = 0;
3431
3432static struct kobject *md_probe(dev_t dev, int *part, void *data)
3433{
3434 static DEFINE_MUTEX(disks_mutex);
3435 mddev_t *mddev = mddev_find(dev);
3436 struct gendisk *disk;
3437 int partitioned = (MAJOR(dev) != MD_MAJOR);
3438 int shift = partitioned ? MdpMinorShift : 0;
3439 int unit = MINOR(dev) >> shift;
3440 int error;
3441
3442 if (!mddev)
3443 return NULL;
3444
3445 mutex_lock(&disks_mutex);
3446 if (mddev->gendisk) {
3447 mutex_unlock(&disks_mutex);
3448 mddev_put(mddev);
3449 return NULL;
3450 }
3451 disk = alloc_disk(1 << shift);
3452 if (!disk) {
3453 mutex_unlock(&disks_mutex);
3454 mddev_put(mddev);
3455 return NULL;
3456 }
3457 disk->major = MAJOR(dev);
3458 disk->first_minor = unit << shift;
3459 if (partitioned)
3460 sprintf(disk->disk_name, "md_d%d", unit);
3461 else
3462 sprintf(disk->disk_name, "md%d", unit);
3463 disk->fops = &md_fops;
3464 disk->private_data = mddev;
3465 disk->queue = mddev->queue;
3466
3467
3468
3469
3470 disk->flags |= GENHD_FL_EXT_DEVT;
3471 add_disk(disk);
3472 mddev->gendisk = disk;
3473 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
3474 &disk_to_dev(disk)->kobj, "%s", "md");
3475 mutex_unlock(&disks_mutex);
3476 if (error)
3477 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
3478 disk->disk_name);
3479 else {
3480 kobject_uevent(&mddev->kobj, KOBJ_ADD);
3481 mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
3482 }
3483 return NULL;
3484}
3485
3486static void md_safemode_timeout(unsigned long data)
3487{
3488 mddev_t *mddev = (mddev_t *) data;
3489
3490 if (!atomic_read(&mddev->writes_pending)) {
3491 mddev->safemode = 1;
3492 if (mddev->external)
3493 sysfs_notify_dirent(mddev->sysfs_state);
3494 }
3495 md_wakeup_thread(mddev->thread);
3496}
3497
3498static int start_dirty_degraded;
3499
3500static int do_md_run(mddev_t * mddev)
3501{
3502 int err;
3503 int chunk_size;
3504 struct list_head *tmp;
3505 mdk_rdev_t *rdev;
3506 struct gendisk *disk;
3507 struct mdk_personality *pers;
3508 char b[BDEVNAME_SIZE];
3509
3510 if (list_empty(&mddev->disks))
3511
3512 return -EINVAL;
3513
3514 if (mddev->pers)
3515 return -EBUSY;
3516
3517
3518
3519
3520 if (!mddev->raid_disks) {
3521 if (!mddev->persistent)
3522 return -EINVAL;
3523 analyze_sbs(mddev);
3524 }
3525
3526 chunk_size = mddev->chunk_size;
3527
3528 if (chunk_size) {
3529 if (chunk_size > MAX_CHUNK_SIZE) {
3530 printk(KERN_ERR "too big chunk_size: %d > %d\n",
3531 chunk_size, MAX_CHUNK_SIZE);
3532 return -EINVAL;
3533 }
3534
3535
3536
3537 if ( (1 << ffz(~chunk_size)) != chunk_size) {
3538 printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size);
3539 return -EINVAL;
3540 }
3541
3542
3543 rdev_for_each(rdev, tmp, mddev) {
3544 if (test_bit(Faulty, &rdev->flags))
3545 continue;
3546 if (rdev->size < chunk_size / 1024) {
3547 printk(KERN_WARNING
3548 "md: Dev %s smaller than chunk_size:"
3549 " %lluk < %dk\n",
3550 bdevname(rdev->bdev,b),
3551 (unsigned long long)rdev->size,
3552 chunk_size / 1024);
3553 return -EINVAL;
3554 }
3555 }
3556 }
3557
3558 if (mddev->level != LEVEL_NONE)
3559 request_module("md-level-%d", mddev->level);
3560 else if (mddev->clevel[0])
3561 request_module("md-%s", mddev->clevel);
3562
3563
3564
3565
3566
3567
3568 rdev_for_each(rdev, tmp, mddev) {
3569 if (test_bit(Faulty, &rdev->flags))
3570 continue;
3571 sync_blockdev(rdev->bdev);
3572 invalidate_bdev(rdev->bdev);
3573
3574
3575
3576
3577
3578 if (rdev->data_offset < rdev->sb_start) {
3579 if (mddev->size &&
3580 rdev->data_offset + mddev->size*2
3581 > rdev->sb_start) {
3582 printk("md: %s: data overlaps metadata\n",
3583 mdname(mddev));
3584 return -EINVAL;
3585 }
3586 } else {
3587 if (rdev->sb_start + rdev->sb_size/512
3588 > rdev->data_offset) {
3589 printk("md: %s: metadata overlaps data\n",
3590 mdname(mddev));
3591 return -EINVAL;
3592 }
3593 }
3594 sysfs_notify_dirent(rdev->sysfs_state);
3595 }
3596
3597 md_probe(mddev->unit, NULL, NULL);
3598 disk = mddev->gendisk;
3599 if (!disk)
3600 return -ENOMEM;
3601
3602 spin_lock(&pers_lock);
3603 pers = find_pers(mddev->level, mddev->clevel);
3604 if (!pers || !try_module_get(pers->owner)) {
3605 spin_unlock(&pers_lock);
3606 if (mddev->level != LEVEL_NONE)
3607 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
3608 mddev->level);
3609 else
3610 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
3611 mddev->clevel);
3612 return -EINVAL;
3613 }
3614 mddev->pers = pers;
3615 spin_unlock(&pers_lock);
3616 mddev->level = pers->level;
3617 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3618
3619 if (mddev->reshape_position != MaxSector &&
3620 pers->start_reshape == NULL) {
3621
3622 mddev->pers = NULL;
3623 module_put(pers->owner);
3624 return -EINVAL;
3625 }
3626
3627 if (pers->sync_request) {
3628
3629
3630
3631 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
3632 mdk_rdev_t *rdev2;
3633 struct list_head *tmp2;
3634 int warned = 0;
3635 rdev_for_each(rdev, tmp, mddev) {
3636 rdev_for_each(rdev2, tmp2, mddev) {
3637 if (rdev < rdev2 &&
3638 rdev->bdev->bd_contains ==
3639 rdev2->bdev->bd_contains) {
3640 printk(KERN_WARNING
3641 "%s: WARNING: %s appears to be"
3642 " on the same physical disk as"
3643 " %s.\n",
3644 mdname(mddev),
3645 bdevname(rdev->bdev,b),
3646 bdevname(rdev2->bdev,b2));
3647 warned = 1;
3648 }
3649 }
3650 }
3651 if (warned)
3652 printk(KERN_WARNING
3653 "True protection against single-disk"
3654 " failure might be compromised.\n");
3655 }
3656
3657 mddev->recovery = 0;
3658 mddev->resync_max_sectors = mddev->size << 1;
3659 mddev->barriers_work = 1;
3660 mddev->ok_start_degraded = start_dirty_degraded;
3661
3662 if (start_readonly)
3663 mddev->ro = 2;
3664
3665 err = mddev->pers->run(mddev);
3666 if (err)
3667 printk(KERN_ERR "md: pers->run() failed ...\n");
3668 else if (mddev->pers->sync_request) {
3669 err = bitmap_create(mddev);
3670 if (err) {
3671 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
3672 mdname(mddev), err);
3673 mddev->pers->stop(mddev);
3674 }
3675 }
3676 if (err) {
3677 module_put(mddev->pers->owner);
3678 mddev->pers = NULL;
3679 bitmap_destroy(mddev);
3680 return err;
3681 }
3682 if (mddev->pers->sync_request) {
3683 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3684 printk(KERN_WARNING
3685 "md: cannot register extra attributes for %s\n",
3686 mdname(mddev));
3687 } else if (mddev->ro == 2)
3688 mddev->ro = 0;
3689
3690 atomic_set(&mddev->writes_pending,0);
3691 mddev->safemode = 0;
3692 mddev->safemode_timer.function = md_safemode_timeout;
3693 mddev->safemode_timer.data = (unsigned long) mddev;
3694 mddev->safemode_delay = (200 * HZ)/1000 +1;
3695 mddev->in_sync = 1;
3696
3697 rdev_for_each(rdev, tmp, mddev)
3698 if (rdev->raid_disk >= 0) {
3699 char nm[20];
3700 sprintf(nm, "rd%d", rdev->raid_disk);
3701 if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
3702 printk("md: cannot register %s for %s\n",
3703 nm, mdname(mddev));
3704 }
3705
3706 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3707
3708 if (mddev->flags)
3709 md_update_sb(mddev, 0);
3710
3711 set_capacity(disk, mddev->array_sectors);
3712
3713
3714
3715
3716
3717
3718
3719
3720 mddev->queue->queuedata = mddev;
3721 mddev->queue->make_request_fn = mddev->pers->make_request;
3722
3723
3724
3725
3726
3727 if (mddev->degraded && !mddev->sync_thread) {
3728 struct list_head *rtmp;
3729 int spares = 0;
3730 rdev_for_each(rdev, rtmp, mddev)
3731 if (rdev->raid_disk >= 0 &&
3732 !test_bit(In_sync, &rdev->flags) &&
3733 !test_bit(Faulty, &rdev->flags))
3734
3735 spares++;
3736 if (spares && mddev->pers->sync_request) {
3737 mddev->recovery = 0;
3738 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
3739 mddev->sync_thread = md_register_thread(md_do_sync,
3740 mddev,
3741 "%s_resync");
3742 if (!mddev->sync_thread) {
3743 printk(KERN_ERR "%s: could not start resync"
3744 " thread...\n",
3745 mdname(mddev));
3746
3747 mddev->recovery = 0;
3748 }
3749 }
3750 }
3751 md_wakeup_thread(mddev->thread);
3752 md_wakeup_thread(mddev->sync_thread);
3753
3754 mddev->changed = 1;
3755 md_new_event(mddev);
3756 sysfs_notify_dirent(mddev->sysfs_state);
3757 sysfs_notify(&mddev->kobj, NULL, "sync_action");
3758 sysfs_notify(&mddev->kobj, NULL, "degraded");
3759 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
3760 return 0;
3761}
3762
3763static int restart_array(mddev_t *mddev)
3764{
3765 struct gendisk *disk = mddev->gendisk;
3766
3767
3768 if (list_empty(&mddev->disks))
3769 return -ENXIO;
3770 if (!mddev->pers)
3771 return -EINVAL;
3772 if (!mddev->ro)
3773 return -EBUSY;
3774 mddev->safemode = 0;
3775 mddev->ro = 0;
3776 set_disk_ro(disk, 0);
3777 printk(KERN_INFO "md: %s switched to read-write mode.\n",
3778 mdname(mddev));
3779
3780 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3781 md_wakeup_thread(mddev->thread);
3782 md_wakeup_thread(mddev->sync_thread);
3783 sysfs_notify_dirent(mddev->sysfs_state);
3784 return 0;
3785}
3786
3787
3788
3789static int deny_bitmap_write_access(struct file * file)
3790{
3791 struct inode *inode = file->f_mapping->host;
3792
3793 spin_lock(&inode->i_lock);
3794 if (atomic_read(&inode->i_writecount) > 1) {
3795 spin_unlock(&inode->i_lock);
3796 return -ETXTBSY;
3797 }
3798 atomic_set(&inode->i_writecount, -1);
3799 spin_unlock(&inode->i_lock);
3800
3801 return 0;
3802}
3803
3804static void restore_bitmap_write_access(struct file *file)
3805{
3806 struct inode *inode = file->f_mapping->host;
3807
3808 spin_lock(&inode->i_lock);
3809 atomic_set(&inode->i_writecount, 1);
3810 spin_unlock(&inode->i_lock);
3811}
3812
3813
3814
3815
3816
3817
3818static int do_md_stop(mddev_t * mddev, int mode, int is_open)
3819{
3820 int err = 0;
3821 struct gendisk *disk = mddev->gendisk;
3822
3823 if (atomic_read(&mddev->openers) > is_open) {
3824 printk("md: %s still in use.\n",mdname(mddev));
3825 return -EBUSY;
3826 }
3827
3828 if (mddev->pers) {
3829
3830 if (mddev->sync_thread) {
3831 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3832 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3833 md_unregister_thread(mddev->sync_thread);
3834 mddev->sync_thread = NULL;
3835 }
3836
3837 del_timer_sync(&mddev->safemode_timer);
3838
3839 switch(mode) {
3840 case 1:
3841 err = -ENXIO;
3842 if (mddev->ro==1)
3843 goto out;
3844 mddev->ro = 1;
3845 break;
3846 case 0:
3847 case 2:
3848 bitmap_flush(mddev);
3849 md_super_wait(mddev);
3850 if (mddev->ro)
3851 set_disk_ro(disk, 0);
3852 blk_queue_make_request(mddev->queue, md_fail_request);
3853 mddev->pers->stop(mddev);
3854 mddev->queue->merge_bvec_fn = NULL;
3855 mddev->queue->unplug_fn = NULL;
3856 mddev->queue->backing_dev_info.congested_fn = NULL;
3857 if (mddev->pers->sync_request)
3858 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
3859
3860 module_put(mddev->pers->owner);
3861 mddev->pers = NULL;
3862
3863 sysfs_notify_dirent(mddev->sysfs_state);
3864
3865 set_capacity(disk, 0);
3866 mddev->changed = 1;
3867
3868 if (mddev->ro)
3869 mddev->ro = 0;
3870 }
3871 if (!mddev->in_sync || mddev->flags) {
3872
3873 mddev->in_sync = 1;
3874 md_update_sb(mddev, 1);
3875 }
3876 if (mode == 1)
3877 set_disk_ro(disk, 1);
3878 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3879 }
3880
3881
3882
3883
3884 if (mode == 0) {
3885 mdk_rdev_t *rdev;
3886 struct list_head *tmp;
3887
3888 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
3889
3890 bitmap_destroy(mddev);
3891 if (mddev->bitmap_file) {
3892 restore_bitmap_write_access(mddev->bitmap_file);
3893 fput(mddev->bitmap_file);
3894 mddev->bitmap_file = NULL;
3895 }
3896 mddev->bitmap_offset = 0;
3897
3898 rdev_for_each(rdev, tmp, mddev)
3899 if (rdev->raid_disk >= 0) {
3900 char nm[20];
3901 sprintf(nm, "rd%d", rdev->raid_disk);
3902 sysfs_remove_link(&mddev->kobj, nm);
3903 }
3904
3905
3906 flush_scheduled_work();
3907
3908 export_array(mddev);
3909
3910 mddev->array_sectors = 0;
3911 mddev->size = 0;
3912 mddev->raid_disks = 0;
3913 mddev->recovery_cp = 0;
3914 mddev->resync_min = 0;
3915 mddev->resync_max = MaxSector;
3916 mddev->reshape_position = MaxSector;
3917 mddev->external = 0;
3918 mddev->persistent = 0;
3919 mddev->level = LEVEL_NONE;
3920 mddev->clevel[0] = 0;
3921 mddev->flags = 0;
3922 mddev->ro = 0;
3923 mddev->metadata_type[0] = 0;
3924 mddev->chunk_size = 0;
3925 mddev->ctime = mddev->utime = 0;
3926 mddev->layout = 0;
3927 mddev->max_disks = 0;
3928 mddev->events = 0;
3929 mddev->delta_disks = 0;
3930 mddev->new_level = LEVEL_NONE;
3931 mddev->new_layout = 0;
3932 mddev->new_chunk = 0;
3933 mddev->curr_resync = 0;
3934 mddev->resync_mismatches = 0;
3935 mddev->suspend_lo = mddev->suspend_hi = 0;
3936 mddev->sync_speed_min = mddev->sync_speed_max = 0;
3937 mddev->recovery = 0;
3938 mddev->in_sync = 0;
3939 mddev->changed = 0;
3940 mddev->degraded = 0;
3941 mddev->barriers_work = 0;
3942 mddev->safemode = 0;
3943 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
3944
3945 } else if (mddev->pers)
3946 printk(KERN_INFO "md: %s switched to read-only mode.\n",
3947 mdname(mddev));
3948 err = 0;
3949 md_new_event(mddev);
3950 sysfs_notify_dirent(mddev->sysfs_state);
3951out:
3952 return err;
3953}
3954
3955#ifndef MODULE
3956static void autorun_array(mddev_t *mddev)
3957{
3958 mdk_rdev_t *rdev;
3959 struct list_head *tmp;
3960 int err;
3961
3962 if (list_empty(&mddev->disks))
3963 return;
3964
3965 printk(KERN_INFO "md: running: ");
3966
3967 rdev_for_each(rdev, tmp, mddev) {
3968 char b[BDEVNAME_SIZE];
3969 printk("<%s>", bdevname(rdev->bdev,b));
3970 }
3971 printk("\n");
3972
3973 err = do_md_run(mddev);
3974 if (err) {
3975 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
3976 do_md_stop(mddev, 0, 0);
3977 }
3978}
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992static void autorun_devices(int part)
3993{
3994 struct list_head *tmp;
3995 mdk_rdev_t *rdev0, *rdev;
3996 mddev_t *mddev;
3997 char b[BDEVNAME_SIZE];
3998
3999 printk(KERN_INFO "md: autorun ...\n");
4000 while (!list_empty(&pending_raid_disks)) {
4001 int unit;
4002 dev_t dev;
4003 LIST_HEAD(candidates);
4004 rdev0 = list_entry(pending_raid_disks.next,
4005 mdk_rdev_t, same_set);
4006
4007 printk(KERN_INFO "md: considering %s ...\n",
4008 bdevname(rdev0->bdev,b));
4009 INIT_LIST_HEAD(&candidates);
4010 rdev_for_each_list(rdev, tmp, pending_raid_disks)
4011 if (super_90_load(rdev, rdev0, 0) >= 0) {
4012 printk(KERN_INFO "md: adding %s ...\n",
4013 bdevname(rdev->bdev,b));
4014 list_move(&rdev->same_set, &candidates);
4015 }
4016
4017
4018
4019
4020
4021 if (part) {
4022 dev = MKDEV(mdp_major,
4023 rdev0->preferred_minor << MdpMinorShift);
4024 unit = MINOR(dev) >> MdpMinorShift;
4025 } else {
4026 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
4027 unit = MINOR(dev);
4028 }
4029 if (rdev0->preferred_minor != unit) {
4030 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
4031 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
4032 break;
4033 }
4034
4035 md_probe(dev, NULL, NULL);
4036 mddev = mddev_find(dev);
4037 if (!mddev || !mddev->gendisk) {
4038 if (mddev)
4039 mddev_put(mddev);
4040 printk(KERN_ERR
4041 "md: cannot allocate memory for md drive.\n");
4042 break;
4043 }
4044 if (mddev_lock(mddev))
4045 printk(KERN_WARNING "md: %s locked, cannot run\n",
4046 mdname(mddev));
4047 else if (mddev->raid_disks || mddev->major_version
4048 || !list_empty(&mddev->disks)) {
4049 printk(KERN_WARNING
4050 "md: %s already running, cannot run %s\n",
4051 mdname(mddev), bdevname(rdev0->bdev,b));
4052 mddev_unlock(mddev);
4053 } else {
4054 printk(KERN_INFO "md: created %s\n", mdname(mddev));
4055 mddev->persistent = 1;
4056 rdev_for_each_list(rdev, tmp, candidates) {
4057 list_del_init(&rdev->same_set);
4058 if (bind_rdev_to_array(rdev, mddev))
4059 export_rdev(rdev);
4060 }
4061 autorun_array(mddev);
4062 mddev_unlock(mddev);
4063 }
4064
4065
4066
4067 rdev_for_each_list(rdev, tmp, candidates) {
4068 list_del_init(&rdev->same_set);
4069 export_rdev(rdev);
4070 }
4071 mddev_put(mddev);
4072 }
4073 printk(KERN_INFO "md: ... autorun DONE.\n");
4074}
4075#endif
4076
4077static int get_version(void __user * arg)
4078{
4079 mdu_version_t ver;
4080
4081 ver.major = MD_MAJOR_VERSION;
4082 ver.minor = MD_MINOR_VERSION;
4083 ver.patchlevel = MD_PATCHLEVEL_VERSION;
4084
4085 if (copy_to_user(arg, &ver, sizeof(ver)))
4086 return -EFAULT;
4087
4088 return 0;
4089}
4090
4091static int get_array_info(mddev_t * mddev, void __user * arg)
4092{
4093 mdu_array_info_t info;
4094 int nr,working,active,failed,spare;
4095 mdk_rdev_t *rdev;
4096 struct list_head *tmp;
4097
4098 nr=working=active=failed=spare=0;
4099 rdev_for_each(rdev, tmp, mddev) {
4100 nr++;
4101 if (test_bit(Faulty, &rdev->flags))
4102 failed++;
4103 else {
4104 working++;
4105 if (test_bit(In_sync, &rdev->flags))
4106 active++;
4107 else
4108 spare++;
4109 }
4110 }
4111
4112 info.major_version = mddev->major_version;
4113 info.minor_version = mddev->minor_version;
4114 info.patch_version = MD_PATCHLEVEL_VERSION;
4115 info.ctime = mddev->ctime;
4116 info.level = mddev->level;
4117 info.size = mddev->size;
4118 if (info.size != mddev->size)
4119 info.size = -1;
4120 info.nr_disks = nr;
4121 info.raid_disks = mddev->raid_disks;
4122 info.md_minor = mddev->md_minor;
4123 info.not_persistent= !mddev->persistent;
4124
4125 info.utime = mddev->utime;
4126 info.state = 0;
4127 if (mddev->in_sync)
4128 info.state = (1<<MD_SB_CLEAN);
4129 if (mddev->bitmap && mddev->bitmap_offset)
4130 info.state = (1<<MD_SB_BITMAP_PRESENT);
4131 info.active_disks = active;
4132 info.working_disks = working;
4133 info.failed_disks = failed;
4134 info.spare_disks = spare;
4135
4136 info.layout = mddev->layout;
4137 info.chunk_size = mddev->chunk_size;
4138
4139 if (copy_to_user(arg, &info, sizeof(info)))
4140 return -EFAULT;
4141
4142 return 0;
4143}
4144
4145static int get_bitmap_file(mddev_t * mddev, void __user * arg)
4146{
4147 mdu_bitmap_file_t *file = NULL;
4148 char *ptr, *buf = NULL;
4149 int err = -ENOMEM;
4150
4151 if (md_allow_write(mddev))
4152 file = kmalloc(sizeof(*file), GFP_NOIO);
4153 else
4154 file = kmalloc(sizeof(*file), GFP_KERNEL);
4155
4156 if (!file)
4157 goto out;
4158
4159
4160 if (!mddev->bitmap || !mddev->bitmap->file) {
4161 file->pathname[0] = '\0';
4162 goto copy_out;
4163 }
4164
4165 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
4166 if (!buf)
4167 goto out;
4168
4169 ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
4170 if (IS_ERR(ptr))
4171 goto out;
4172
4173 strcpy(file->pathname, ptr);
4174
4175copy_out:
4176 err = 0;
4177 if (copy_to_user(arg, file, sizeof(*file)))
4178 err = -EFAULT;
4179out:
4180 kfree(buf);
4181 kfree(file);
4182 return err;
4183}
4184
4185static int get_disk_info(mddev_t * mddev, void __user * arg)
4186{
4187 mdu_disk_info_t info;
4188 mdk_rdev_t *rdev;
4189
4190 if (copy_from_user(&info, arg, sizeof(info)))
4191 return -EFAULT;
4192
4193 rdev = find_rdev_nr(mddev, info.number);
4194 if (rdev) {
4195 info.major = MAJOR(rdev->bdev->bd_dev);
4196 info.minor = MINOR(rdev->bdev->bd_dev);
4197 info.raid_disk = rdev->raid_disk;
4198 info.state = 0;
4199 if (test_bit(Faulty, &rdev->flags))
4200 info.state |= (1<<MD_DISK_FAULTY);
4201 else if (test_bit(In_sync, &rdev->flags)) {
4202 info.state |= (1<<MD_DISK_ACTIVE);
4203 info.state |= (1<<MD_DISK_SYNC);
4204 }
4205 if (test_bit(WriteMostly, &rdev->flags))
4206 info.state |= (1<<MD_DISK_WRITEMOSTLY);
4207 } else {
4208 info.major = info.minor = 0;
4209 info.raid_disk = -1;
4210 info.state = (1<<MD_DISK_REMOVED);
4211 }
4212
4213 if (copy_to_user(arg, &info, sizeof(info)))
4214 return -EFAULT;
4215
4216 return 0;
4217}
4218
4219static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
4220{
4221 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
4222 mdk_rdev_t *rdev;
4223 dev_t dev = MKDEV(info->major,info->minor);
4224
4225 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
4226 return -EOVERFLOW;
4227
4228 if (!mddev->raid_disks) {
4229 int err;
4230
4231 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
4232 if (IS_ERR(rdev)) {
4233 printk(KERN_WARNING
4234 "md: md_import_device returned %ld\n",
4235 PTR_ERR(rdev));
4236 return PTR_ERR(rdev);
4237 }
4238 if (!list_empty(&mddev->disks)) {
4239 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
4240 mdk_rdev_t, same_set);
4241 int err = super_types[mddev->major_version]
4242 .load_super(rdev, rdev0, mddev->minor_version);
4243 if (err < 0) {
4244 printk(KERN_WARNING
4245 "md: %s has different UUID to %s\n",
4246 bdevname(rdev->bdev,b),
4247 bdevname(rdev0->bdev,b2));
4248 export_rdev(rdev);
4249 return -EINVAL;
4250 }
4251 }
4252 err = bind_rdev_to_array(rdev, mddev);
4253 if (err)
4254 export_rdev(rdev);
4255 return err;
4256 }
4257
4258
4259
4260
4261
4262
4263 if (mddev->pers) {
4264 int err;
4265 if (!mddev->pers->hot_add_disk) {
4266 printk(KERN_WARNING
4267 "%s: personality does not support diskops!\n",
4268 mdname(mddev));
4269 return -EINVAL;
4270 }
4271 if (mddev->persistent)
4272 rdev = md_import_device(dev, mddev->major_version,
4273 mddev->minor_version);
4274 else
4275 rdev = md_import_device(dev, -1, -1);
4276 if (IS_ERR(rdev)) {
4277 printk(KERN_WARNING
4278 "md: md_import_device returned %ld\n",
4279 PTR_ERR(rdev));
4280 return PTR_ERR(rdev);
4281 }
4282
4283 if (!mddev->persistent) {
4284 if (info->state & (1<<MD_DISK_SYNC) &&
4285 info->raid_disk < mddev->raid_disks)
4286 rdev->raid_disk = info->raid_disk;
4287 else
4288 rdev->raid_disk = -1;
4289 } else
4290 super_types[mddev->major_version].
4291 validate_super(mddev, rdev);
4292 rdev->saved_raid_disk = rdev->raid_disk;
4293
4294 clear_bit(In_sync, &rdev->flags);
4295 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
4296 set_bit(WriteMostly, &rdev->flags);
4297
4298 rdev->raid_disk = -1;
4299 err = bind_rdev_to_array(rdev, mddev);
4300 if (!err && !mddev->pers->hot_remove_disk) {
4301
4302
4303
4304
4305 super_types[mddev->major_version].
4306 validate_super(mddev, rdev);
4307 err = mddev->pers->hot_add_disk(mddev, rdev);
4308 if (err)
4309 unbind_rdev_from_array(rdev);
4310 }
4311 if (err)
4312 export_rdev(rdev);
4313 else
4314 sysfs_notify_dirent(rdev->sysfs_state);
4315
4316 md_update_sb(mddev, 1);
4317 if (mddev->degraded)
4318 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4319 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4320 md_wakeup_thread(mddev->thread);
4321 return err;
4322 }
4323
4324
4325
4326
4327 if (mddev->major_version != 0) {
4328 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
4329 mdname(mddev));
4330 return -EINVAL;
4331 }
4332
4333 if (!(info->state & (1<<MD_DISK_FAULTY))) {
4334 int err;
4335 rdev = md_import_device(dev, -1, 0);
4336 if (IS_ERR(rdev)) {
4337 printk(KERN_WARNING
4338 "md: error, md_import_device() returned %ld\n",
4339 PTR_ERR(rdev));
4340 return PTR_ERR(rdev);
4341 }
4342 rdev->desc_nr = info->number;
4343 if (info->raid_disk < mddev->raid_disks)
4344 rdev->raid_disk = info->raid_disk;
4345 else
4346 rdev->raid_disk = -1;
4347
4348 if (rdev->raid_disk < mddev->raid_disks)
4349 if (info->state & (1<<MD_DISK_SYNC))
4350 set_bit(In_sync, &rdev->flags);
4351
4352 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
4353 set_bit(WriteMostly, &rdev->flags);
4354
4355 if (!mddev->persistent) {
4356 printk(KERN_INFO "md: nonpersistent superblock ...\n");
4357 rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
4358 } else
4359 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
4360 rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2;
4361
4362 err = bind_rdev_to_array(rdev, mddev);
4363 if (err) {
4364 export_rdev(rdev);
4365 return err;
4366 }
4367 }
4368
4369 return 0;
4370}
4371
4372static int hot_remove_disk(mddev_t * mddev, dev_t dev)
4373{
4374 char b[BDEVNAME_SIZE];
4375 mdk_rdev_t *rdev;
4376
4377 rdev = find_rdev(mddev, dev);
4378 if (!rdev)
4379 return -ENXIO;
4380
4381 if (rdev->raid_disk >= 0)
4382 goto busy;
4383
4384 kick_rdev_from_array(rdev);
4385 md_update_sb(mddev, 1);
4386 md_new_event(mddev);
4387
4388 return 0;
4389busy:
4390 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
4391 bdevname(rdev->bdev,b), mdname(mddev));
4392 return -EBUSY;
4393}
4394
4395static int hot_add_disk(mddev_t * mddev, dev_t dev)
4396{
4397 char b[BDEVNAME_SIZE];
4398 int err;
4399 mdk_rdev_t *rdev;
4400
4401 if (!mddev->pers)
4402 return -ENODEV;
4403
4404 if (mddev->major_version != 0) {
4405 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
4406 " version-0 superblocks.\n",
4407 mdname(mddev));
4408 return -EINVAL;
4409 }
4410 if (!mddev->pers->hot_add_disk) {
4411 printk(KERN_WARNING
4412 "%s: personality does not support diskops!\n",
4413 mdname(mddev));
4414 return -EINVAL;
4415 }
4416
4417 rdev = md_import_device(dev, -1, 0);
4418 if (IS_ERR(rdev)) {
4419 printk(KERN_WARNING
4420 "md: error, md_import_device() returned %ld\n",
4421 PTR_ERR(rdev));
4422 return -EINVAL;
4423 }
4424
4425 if (mddev->persistent)
4426 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
4427 else
4428 rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
4429
4430 rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2;
4431
4432 if (test_bit(Faulty, &rdev->flags)) {
4433 printk(KERN_WARNING
4434 "md: can not hot-add faulty %s disk to %s!\n",
4435 bdevname(rdev->bdev,b), mdname(mddev));
4436 err = -EINVAL;
4437 goto abort_export;
4438 }
4439 clear_bit(In_sync, &rdev->flags);
4440 rdev->desc_nr = -1;
4441 rdev->saved_raid_disk = -1;
4442 err = bind_rdev_to_array(rdev, mddev);
4443 if (err)
4444 goto abort_export;
4445
4446
4447
4448
4449
4450
4451 if (rdev->desc_nr == mddev->max_disks) {
4452 printk(KERN_WARNING "%s: can not hot-add to full array!\n",
4453 mdname(mddev));
4454 err = -EBUSY;
4455 goto abort_unbind_export;
4456 }
4457
4458 rdev->raid_disk = -1;
4459
4460 md_update_sb(mddev, 1);
4461
4462
4463
4464
4465
4466 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4467 md_wakeup_thread(mddev->thread);
4468 md_new_event(mddev);
4469 return 0;
4470
4471abort_unbind_export:
4472 unbind_rdev_from_array(rdev);
4473
4474abort_export:
4475 export_rdev(rdev);
4476 return err;
4477}
4478
4479static int set_bitmap_file(mddev_t *mddev, int fd)
4480{
4481 int err;
4482
4483 if (mddev->pers) {
4484 if (!mddev->pers->quiesce)
4485 return -EBUSY;
4486 if (mddev->recovery || mddev->sync_thread)
4487 return -EBUSY;
4488
4489 }
4490
4491
4492 if (fd >= 0) {
4493 if (mddev->bitmap)
4494 return -EEXIST;
4495 mddev->bitmap_file = fget(fd);
4496
4497 if (mddev->bitmap_file == NULL) {
4498 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
4499 mdname(mddev));
4500 return -EBADF;
4501 }
4502
4503 err = deny_bitmap_write_access(mddev->bitmap_file);
4504 if (err) {
4505 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
4506 mdname(mddev));
4507 fput(mddev->bitmap_file);
4508 mddev->bitmap_file = NULL;
4509 return err;
4510 }
4511 mddev->bitmap_offset = 0;
4512 } else if (mddev->bitmap == NULL)
4513 return -ENOENT;
4514 err = 0;
4515 if (mddev->pers) {
4516 mddev->pers->quiesce(mddev, 1);
4517 if (fd >= 0)
4518 err = bitmap_create(mddev);
4519 if (fd < 0 || err) {
4520 bitmap_destroy(mddev);
4521 fd = -1;
4522 }
4523 mddev->pers->quiesce(mddev, 0);
4524 }
4525 if (fd < 0) {
4526 if (mddev->bitmap_file) {
4527 restore_bitmap_write_access(mddev->bitmap_file);
4528 fput(mddev->bitmap_file);
4529 }
4530 mddev->bitmap_file = NULL;
4531 }
4532
4533 return err;
4534}
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
4550{
4551
4552 if (info->raid_disks == 0) {
4553
4554 if (info->major_version < 0 ||
4555 info->major_version >= ARRAY_SIZE(super_types) ||
4556 super_types[info->major_version].name == NULL) {
4557
4558 printk(KERN_INFO
4559 "md: superblock version %d not known\n",
4560 info->major_version);
4561 return -EINVAL;
4562 }
4563 mddev->major_version = info->major_version;
4564 mddev->minor_version = info->minor_version;
4565 mddev->patch_version = info->patch_version;
4566 mddev->persistent = !info->not_persistent;
4567 return 0;
4568 }
4569 mddev->major_version = MD_MAJOR_VERSION;
4570 mddev->minor_version = MD_MINOR_VERSION;
4571 mddev->patch_version = MD_PATCHLEVEL_VERSION;
4572 mddev->ctime = get_seconds();
4573
4574 mddev->level = info->level;
4575 mddev->clevel[0] = 0;
4576 mddev->size = info->size;
4577 mddev->raid_disks = info->raid_disks;
4578
4579
4580
4581 if (info->state & (1<<MD_SB_CLEAN))
4582 mddev->recovery_cp = MaxSector;
4583 else
4584 mddev->recovery_cp = 0;
4585 mddev->persistent = ! info->not_persistent;
4586 mddev->external = 0;
4587
4588 mddev->layout = info->layout;
4589 mddev->chunk_size = info->chunk_size;
4590
4591 mddev->max_disks = MD_SB_DISKS;
4592
4593 if (mddev->persistent)
4594 mddev->flags = 0;
4595 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4596
4597 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
4598 mddev->bitmap_offset = 0;
4599
4600 mddev->reshape_position = MaxSector;
4601
4602
4603
4604
4605 get_random_bytes(mddev->uuid, 16);
4606
4607 mddev->new_level = mddev->level;
4608 mddev->new_chunk = mddev->chunk_size;
4609 mddev->new_layout = mddev->layout;
4610 mddev->delta_disks = 0;
4611
4612 return 0;
4613}
4614
4615static int update_size(mddev_t *mddev, sector_t num_sectors)
4616{
4617 mdk_rdev_t * rdev;
4618 int rv;
4619 struct list_head *tmp;
4620 int fit = (num_sectors == 0);
4621
4622 if (mddev->pers->resize == NULL)
4623 return -EINVAL;
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634 if (mddev->sync_thread)
4635 return -EBUSY;
4636 if (mddev->bitmap)
4637
4638
4639
4640 return -EBUSY;
4641 rdev_for_each(rdev, tmp, mddev) {
4642 sector_t avail;
4643 avail = rdev->size * 2;
4644
4645 if (fit && (num_sectors == 0 || num_sectors > avail))
4646 num_sectors = avail;
4647 if (avail < num_sectors)
4648 return -ENOSPC;
4649 }
4650 rv = mddev->pers->resize(mddev, num_sectors);
4651 if (!rv) {
4652 struct block_device *bdev;
4653
4654 bdev = bdget_disk(mddev->gendisk, 0);
4655 if (bdev) {
4656 mutex_lock(&bdev->bd_inode->i_mutex);
4657 i_size_write(bdev->bd_inode,
4658 (loff_t)mddev->array_sectors << 9);
4659 mutex_unlock(&bdev->bd_inode->i_mutex);
4660 bdput(bdev);
4661 }
4662 }
4663 return rv;
4664}
4665
4666static int update_raid_disks(mddev_t *mddev, int raid_disks)
4667{
4668 int rv;
4669
4670 if (mddev->pers->check_reshape == NULL)
4671 return -EINVAL;
4672 if (raid_disks <= 0 ||
4673 raid_disks >= mddev->max_disks)
4674 return -EINVAL;
4675 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
4676 return -EBUSY;
4677 mddev->delta_disks = raid_disks - mddev->raid_disks;
4678
4679 rv = mddev->pers->check_reshape(mddev);
4680 return rv;
4681}
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
4693{
4694 int rv = 0;
4695 int cnt = 0;
4696 int state = 0;
4697
4698
4699 if (mddev->bitmap && mddev->bitmap_offset)
4700 state |= (1 << MD_SB_BITMAP_PRESENT);
4701
4702 if (mddev->major_version != info->major_version ||
4703 mddev->minor_version != info->minor_version ||
4704
4705 mddev->ctime != info->ctime ||
4706 mddev->level != info->level ||
4707
4708 !mddev->persistent != info->not_persistent||
4709 mddev->chunk_size != info->chunk_size ||
4710
4711 ((state^info->state) & 0xfffffe00)
4712 )
4713 return -EINVAL;
4714
4715 if (info->size >= 0 && mddev->size != info->size) cnt++;
4716 if (mddev->raid_disks != info->raid_disks) cnt++;
4717 if (mddev->layout != info->layout) cnt++;
4718 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++;
4719 if (cnt == 0) return 0;
4720 if (cnt > 1) return -EINVAL;
4721
4722 if (mddev->layout != info->layout) {
4723
4724
4725
4726
4727 if (mddev->pers->reconfig == NULL)
4728 return -EINVAL;
4729 else
4730 return mddev->pers->reconfig(mddev, info->layout, -1);
4731 }
4732 if (info->size >= 0 && mddev->size != info->size)
4733 rv = update_size(mddev, (sector_t)info->size * 2);
4734
4735 if (mddev->raid_disks != info->raid_disks)
4736 rv = update_raid_disks(mddev, info->raid_disks);
4737
4738 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
4739 if (mddev->pers->quiesce == NULL)
4740 return -EINVAL;
4741 if (mddev->recovery || mddev->sync_thread)
4742 return -EBUSY;
4743 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
4744
4745 if (mddev->bitmap)
4746 return -EEXIST;
4747 if (mddev->default_bitmap_offset == 0)
4748 return -EINVAL;
4749 mddev->bitmap_offset = mddev->default_bitmap_offset;
4750 mddev->pers->quiesce(mddev, 1);
4751 rv = bitmap_create(mddev);
4752 if (rv)
4753 bitmap_destroy(mddev);
4754 mddev->pers->quiesce(mddev, 0);
4755 } else {
4756
4757 if (!mddev->bitmap)
4758 return -ENOENT;
4759 if (mddev->bitmap->file)
4760 return -EINVAL;
4761 mddev->pers->quiesce(mddev, 1);
4762 bitmap_destroy(mddev);
4763 mddev->pers->quiesce(mddev, 0);
4764 mddev->bitmap_offset = 0;
4765 }
4766 }
4767 md_update_sb(mddev, 1);
4768 return rv;
4769}
4770
4771static int set_disk_faulty(mddev_t *mddev, dev_t dev)
4772{
4773 mdk_rdev_t *rdev;
4774
4775 if (mddev->pers == NULL)
4776 return -ENODEV;
4777
4778 rdev = find_rdev(mddev, dev);
4779 if (!rdev)
4780 return -ENODEV;
4781
4782 md_error(mddev, rdev);
4783 return 0;
4784}
4785
4786
4787
4788
4789
4790
4791
4792static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
4793{
4794 mddev_t *mddev = bdev->bd_disk->private_data;
4795
4796 geo->heads = 2;
4797 geo->sectors = 4;
4798 geo->cylinders = get_capacity(mddev->gendisk) / 8;
4799 return 0;
4800}
4801
4802static int md_ioctl(struct block_device *bdev, fmode_t mode,
4803 unsigned int cmd, unsigned long arg)
4804{
4805 int err = 0;
4806 void __user *argp = (void __user *)arg;
4807 mddev_t *mddev = NULL;
4808
4809 if (!capable(CAP_SYS_ADMIN))
4810 return -EACCES;
4811
4812
4813
4814
4815
4816 switch (cmd)
4817 {
4818 case RAID_VERSION:
4819 err = get_version(argp);
4820 goto done;
4821
4822 case PRINT_RAID_DEBUG:
4823 err = 0;
4824 md_print_devices();
4825 goto done;
4826
4827#ifndef MODULE
4828 case RAID_AUTORUN:
4829 err = 0;
4830 autostart_arrays(arg);
4831 goto done;
4832#endif
4833 default:;
4834 }
4835
4836
4837
4838
4839
4840 mddev = bdev->bd_disk->private_data;
4841
4842 if (!mddev) {
4843 BUG();
4844 goto abort;
4845 }
4846
4847 err = mddev_lock(mddev);
4848 if (err) {
4849 printk(KERN_INFO
4850 "md: ioctl lock interrupted, reason %d, cmd %d\n",
4851 err, cmd);
4852 goto abort;
4853 }
4854
4855 switch (cmd)
4856 {
4857 case SET_ARRAY_INFO:
4858 {
4859 mdu_array_info_t info;
4860 if (!arg)
4861 memset(&info, 0, sizeof(info));
4862 else if (copy_from_user(&info, argp, sizeof(info))) {
4863 err = -EFAULT;
4864 goto abort_unlock;
4865 }
4866 if (mddev->pers) {
4867 err = update_array_info(mddev, &info);
4868 if (err) {
4869 printk(KERN_WARNING "md: couldn't update"
4870 " array info. %d\n", err);
4871 goto abort_unlock;
4872 }
4873 goto done_unlock;
4874 }
4875 if (!list_empty(&mddev->disks)) {
4876 printk(KERN_WARNING
4877 "md: array %s already has disks!\n",
4878 mdname(mddev));
4879 err = -EBUSY;
4880 goto abort_unlock;
4881 }
4882 if (mddev->raid_disks) {
4883 printk(KERN_WARNING
4884 "md: array %s already initialised!\n",
4885 mdname(mddev));
4886 err = -EBUSY;
4887 goto abort_unlock;
4888 }
4889 err = set_array_info(mddev, &info);
4890 if (err) {
4891 printk(KERN_WARNING "md: couldn't set"
4892 " array info. %d\n", err);
4893 goto abort_unlock;
4894 }
4895 }
4896 goto done_unlock;
4897
4898 default:;
4899 }
4900
4901
4902
4903
4904
4905
4906 if ((!mddev->raid_disks && !mddev->external)
4907 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
4908 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
4909 && cmd != GET_BITMAP_FILE) {
4910 err = -ENODEV;
4911 goto abort_unlock;
4912 }
4913
4914
4915
4916
4917 switch (cmd)
4918 {
4919 case GET_ARRAY_INFO:
4920 err = get_array_info(mddev, argp);
4921 goto done_unlock;
4922
4923 case GET_BITMAP_FILE:
4924 err = get_bitmap_file(mddev, argp);
4925 goto done_unlock;
4926
4927 case GET_DISK_INFO:
4928 err = get_disk_info(mddev, argp);
4929 goto done_unlock;
4930
4931 case RESTART_ARRAY_RW:
4932 err = restart_array(mddev);
4933 goto done_unlock;
4934
4935 case STOP_ARRAY:
4936 err = do_md_stop(mddev, 0, 1);
4937 goto done_unlock;
4938
4939 case STOP_ARRAY_RO:
4940 err = do_md_stop(mddev, 1, 1);
4941 goto done_unlock;
4942
4943 }
4944
4945
4946
4947
4948
4949
4950
4951
4952 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
4953 if (mddev->ro == 2) {
4954 mddev->ro = 0;
4955 sysfs_notify_dirent(mddev->sysfs_state);
4956 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4957 md_wakeup_thread(mddev->thread);
4958 } else {
4959 err = -EROFS;
4960 goto abort_unlock;
4961 }
4962 }
4963
4964 switch (cmd)
4965 {
4966 case ADD_NEW_DISK:
4967 {
4968 mdu_disk_info_t info;
4969 if (copy_from_user(&info, argp, sizeof(info)))
4970 err = -EFAULT;
4971 else
4972 err = add_new_disk(mddev, &info);
4973 goto done_unlock;
4974 }
4975
4976 case HOT_REMOVE_DISK:
4977 err = hot_remove_disk(mddev, new_decode_dev(arg));
4978 goto done_unlock;
4979
4980 case HOT_ADD_DISK:
4981 err = hot_add_disk(mddev, new_decode_dev(arg));
4982 goto done_unlock;
4983
4984 case SET_DISK_FAULTY:
4985 err = set_disk_faulty(mddev, new_decode_dev(arg));
4986 goto done_unlock;
4987
4988 case RUN_ARRAY:
4989 err = do_md_run(mddev);
4990 goto done_unlock;
4991
4992 case SET_BITMAP_FILE:
4993 err = set_bitmap_file(mddev, (int)arg);
4994 goto done_unlock;
4995
4996 default:
4997 err = -EINVAL;
4998 goto abort_unlock;
4999 }
5000
5001done_unlock:
5002abort_unlock:
5003 mddev_unlock(mddev);
5004
5005 return err;
5006done:
5007 if (err)
5008 MD_BUG();
5009abort:
5010 return err;
5011}
5012
5013static int md_open(struct block_device *bdev, fmode_t mode)
5014{
5015
5016
5017
5018
5019 mddev_t *mddev = bdev->bd_disk->private_data;
5020 int err;
5021
5022 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
5023 goto out;
5024
5025 err = 0;
5026 mddev_get(mddev);
5027 atomic_inc(&mddev->openers);
5028 mddev_unlock(mddev);
5029
5030 check_disk_change(bdev);
5031 out:
5032 return err;
5033}
5034
5035static int md_release(struct gendisk *disk, fmode_t mode)
5036{
5037 mddev_t *mddev = disk->private_data;
5038
5039 BUG_ON(!mddev);
5040 atomic_dec(&mddev->openers);
5041 mddev_put(mddev);
5042
5043 return 0;
5044}
5045
5046static int md_media_changed(struct gendisk *disk)
5047{
5048 mddev_t *mddev = disk->private_data;
5049
5050 return mddev->changed;
5051}
5052
5053static int md_revalidate(struct gendisk *disk)
5054{
5055 mddev_t *mddev = disk->private_data;
5056
5057 mddev->changed = 0;
5058 return 0;
5059}
5060static struct block_device_operations md_fops =
5061{
5062 .owner = THIS_MODULE,
5063 .open = md_open,
5064 .release = md_release,
5065 .locked_ioctl = md_ioctl,
5066 .getgeo = md_getgeo,
5067 .media_changed = md_media_changed,
5068 .revalidate_disk= md_revalidate,
5069};
5070
5071static int md_thread(void * arg)
5072{
5073 mdk_thread_t *thread = arg;
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087 allow_signal(SIGKILL);
5088 while (!kthread_should_stop()) {
5089
5090
5091
5092
5093
5094
5095 if (signal_pending(current))
5096 flush_signals(current);
5097
5098 wait_event_interruptible_timeout
5099 (thread->wqueue,
5100 test_bit(THREAD_WAKEUP, &thread->flags)
5101 || kthread_should_stop(),
5102 thread->timeout);
5103
5104 clear_bit(THREAD_WAKEUP, &thread->flags);
5105
5106 thread->run(thread->mddev);
5107 }
5108
5109 return 0;
5110}
5111
5112void md_wakeup_thread(mdk_thread_t *thread)
5113{
5114 if (thread) {
5115 dprintk("md: waking up MD thread %s.\n", thread->tsk->comm);
5116 set_bit(THREAD_WAKEUP, &thread->flags);
5117 wake_up(&thread->wqueue);
5118 }
5119}
5120
5121mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
5122 const char *name)
5123{
5124 mdk_thread_t *thread;
5125
5126 thread = kzalloc(sizeof(mdk_thread_t), GFP_KERNEL);
5127 if (!thread)
5128 return NULL;
5129
5130 init_waitqueue_head(&thread->wqueue);
5131
5132 thread->run = run;
5133 thread->mddev = mddev;
5134 thread->timeout = MAX_SCHEDULE_TIMEOUT;
5135 thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev));
5136 if (IS_ERR(thread->tsk)) {
5137 kfree(thread);
5138 return NULL;
5139 }
5140 return thread;
5141}
5142
5143void md_unregister_thread(mdk_thread_t *thread)
5144{
5145 dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
5146
5147 kthread_stop(thread->tsk);
5148 kfree(thread);
5149}
5150
5151void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
5152{
5153 if (!mddev) {
5154 MD_BUG();
5155 return;
5156 }
5157
5158 if (!rdev || test_bit(Faulty, &rdev->flags))
5159 return;
5160
5161 if (mddev->external)
5162 set_bit(Blocked, &rdev->flags);
5163
5164
5165
5166
5167
5168
5169
5170 if (!mddev->pers)
5171 return;
5172 if (!mddev->pers->error_handler)
5173 return;
5174 mddev->pers->error_handler(mddev,rdev);
5175 if (mddev->degraded)
5176 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5177 set_bit(StateChanged, &rdev->flags);
5178 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5179 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5180 md_wakeup_thread(mddev->thread);
5181 md_new_event_inintr(mddev);
5182}
5183
5184
5185
5186static void status_unused(struct seq_file *seq)
5187{
5188 int i = 0;
5189 mdk_rdev_t *rdev;
5190 struct list_head *tmp;
5191
5192 seq_printf(seq, "unused devices: ");
5193
5194 rdev_for_each_list(rdev, tmp, pending_raid_disks) {
5195 char b[BDEVNAME_SIZE];
5196 i++;
5197 seq_printf(seq, "%s ",
5198 bdevname(rdev->bdev,b));
5199 }
5200 if (!i)
5201 seq_printf(seq, "<none>");
5202
5203 seq_printf(seq, "\n");
5204}
5205
5206
5207static void status_resync(struct seq_file *seq, mddev_t * mddev)
5208{
5209 sector_t max_blocks, resync, res;
5210 unsigned long dt, db, rt;
5211 int scale;
5212 unsigned int per_milli;
5213
5214 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
5215
5216 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
5217 max_blocks = mddev->resync_max_sectors >> 1;
5218 else
5219 max_blocks = mddev->size;
5220
5221
5222
5223
5224 if (!max_blocks) {
5225 MD_BUG();
5226 return;
5227 }
5228
5229
5230
5231
5232
5233 scale = 10;
5234 if (sizeof(sector_t) > sizeof(unsigned long)) {
5235 while ( max_blocks/2 > (1ULL<<(scale+32)))
5236 scale++;
5237 }
5238 res = (resync>>scale)*1000;
5239 sector_div(res, (u32)((max_blocks>>scale)+1));
5240
5241 per_milli = res;
5242 {
5243 int i, x = per_milli/50, y = 20-x;
5244 seq_printf(seq, "[");
5245 for (i = 0; i < x; i++)
5246 seq_printf(seq, "=");
5247 seq_printf(seq, ">");
5248 for (i = 0; i < y; i++)
5249 seq_printf(seq, ".");
5250 seq_printf(seq, "] ");
5251 }
5252 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
5253 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
5254 "reshape" :
5255 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
5256 "check" :
5257 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
5258 "resync" : "recovery"))),
5259 per_milli/10, per_milli % 10,
5260 (unsigned long long) resync,
5261 (unsigned long long) max_blocks);
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272 dt = ((jiffies - mddev->resync_mark) / HZ);
5273 if (!dt) dt++;
5274 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
5275 - mddev->resync_mark_cnt;
5276 rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100;
5277
5278 seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
5279
5280 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
5281}
5282
5283static void *md_seq_start(struct seq_file *seq, loff_t *pos)
5284{
5285 struct list_head *tmp;
5286 loff_t l = *pos;
5287 mddev_t *mddev;
5288
5289 if (l >= 0x10000)
5290 return NULL;
5291 if (!l--)
5292
5293 return (void*)1;
5294
5295 spin_lock(&all_mddevs_lock);
5296 list_for_each(tmp,&all_mddevs)
5297 if (!l--) {
5298 mddev = list_entry(tmp, mddev_t, all_mddevs);
5299 mddev_get(mddev);
5300 spin_unlock(&all_mddevs_lock);
5301 return mddev;
5302 }
5303 spin_unlock(&all_mddevs_lock);
5304 if (!l--)
5305 return (void*)2;
5306 return NULL;
5307}
5308
5309static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
5310{
5311 struct list_head *tmp;
5312 mddev_t *next_mddev, *mddev = v;
5313
5314 ++*pos;
5315 if (v == (void*)2)
5316 return NULL;
5317
5318 spin_lock(&all_mddevs_lock);
5319 if (v == (void*)1)
5320 tmp = all_mddevs.next;
5321 else
5322 tmp = mddev->all_mddevs.next;
5323 if (tmp != &all_mddevs)
5324 next_mddev = mddev_get(list_entry(tmp,mddev_t,all_mddevs));
5325 else {
5326 next_mddev = (void*)2;
5327 *pos = 0x10000;
5328 }
5329 spin_unlock(&all_mddevs_lock);
5330
5331 if (