1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/module.h>
36#include <linux/kernel.h>
37#include <linux/kthread.h>
38#include <linux/linkage.h>
39#include <linux/raid/md.h>
40#include <linux/raid/bitmap.h>
41#include <linux/sysctl.h>
42#include <linux/buffer_head.h>
43#include <linux/poll.h>
44#include <linux/mutex.h>
45#include <linux/ctype.h>
46#include <linux/freezer.h>
47
48#include <linux/init.h>
49
50#include <linux/file.h>
51
52#ifdef CONFIG_KMOD
53#include <linux/kmod.h>
54#endif
55
56#include <asm/unaligned.h>
57
58#define MAJOR_NR MD_MAJOR
59#define MD_DRIVER
60
61
62#define MdpMinorShift 6
63
64#define DEBUG 0
65#define dprintk(x...) ((void)(DEBUG && printk(x)))
66
67
68#ifndef MODULE
69static void autostart_arrays (int part);
70#endif
71
72static LIST_HEAD(pers_list);
73static DEFINE_SPINLOCK(pers_lock);
74
75static void md_print_devices(void);
76
77#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92static int sysctl_speed_limit_min = 1000;
93static int sysctl_speed_limit_max = 200000;
94static inline int speed_min(mddev_t *mddev)
95{
96 return mddev->sync_speed_min ?
97 mddev->sync_speed_min : sysctl_speed_limit_min;
98}
99
100static inline int speed_max(mddev_t *mddev)
101{
102 return mddev->sync_speed_max ?
103 mddev->sync_speed_max : sysctl_speed_limit_max;
104}
105
106static struct ctl_table_header *raid_table_header;
107
108static ctl_table raid_table[] = {
109 {
110 .ctl_name = DEV_RAID_SPEED_LIMIT_MIN,
111 .procname = "speed_limit_min",
112 .data = &sysctl_speed_limit_min,
113 .maxlen = sizeof(int),
114 .mode = S_IRUGO|S_IWUSR,
115 .proc_handler = &proc_dointvec,
116 },
117 {
118 .ctl_name = DEV_RAID_SPEED_LIMIT_MAX,
119 .procname = "speed_limit_max",
120 .data = &sysctl_speed_limit_max,
121 .maxlen = sizeof(int),
122 .mode = S_IRUGO|S_IWUSR,
123 .proc_handler = &proc_dointvec,
124 },
125 { .ctl_name = 0 }
126};
127
128static ctl_table raid_dir_table[] = {
129 {
130 .ctl_name = DEV_RAID,
131 .procname = "raid",
132 .maxlen = 0,
133 .mode = S_IRUGO|S_IXUGO,
134 .child = raid_table,
135 },
136 { .ctl_name = 0 }
137};
138
139static ctl_table raid_root_table[] = {
140 {
141 .ctl_name = CTL_DEV,
142 .procname = "dev",
143 .maxlen = 0,
144 .mode = 0555,
145 .child = raid_dir_table,
146 },
147 { .ctl_name = 0 }
148};
149
150static struct block_device_operations md_fops;
151
152static int start_readonly;
153
154
155
156
157
158
159
160
161
162
163
164static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
165static atomic_t md_event_count;
166void md_new_event(mddev_t *mddev)
167{
168 atomic_inc(&md_event_count);
169 wake_up(&md_event_waiters);
170 sysfs_notify(&mddev->kobj, NULL, "sync_action");
171}
172EXPORT_SYMBOL_GPL(md_new_event);
173
174
175
176
177static void md_new_event_inintr(mddev_t *mddev)
178{
179 atomic_inc(&md_event_count);
180 wake_up(&md_event_waiters);
181}
182
183
184
185
186
187static LIST_HEAD(all_mddevs);
188static DEFINE_SPINLOCK(all_mddevs_lock);
189
190
191
192
193
194
195
196
197
198#define for_each_mddev(mddev,tmp) \
199 \
200 for (({ spin_lock(&all_mddevs_lock); \
201 tmp = all_mddevs.next; \
202 mddev = NULL;}); \
203 ({ if (tmp != &all_mddevs) \
204 mddev_get(list_entry(tmp, mddev_t, all_mddevs));\
205 spin_unlock(&all_mddevs_lock); \
206 if (mddev) mddev_put(mddev); \
207 mddev = list_entry(tmp, mddev_t, all_mddevs); \
208 tmp != &all_mddevs;}); \
209 ({ spin_lock(&all_mddevs_lock); \
210 tmp = tmp->next;}) \
211 )
212
213
214static int md_fail_request (struct request_queue *q, struct bio *bio)
215{
216 bio_io_error(bio);
217 return 0;
218}
219
220static inline mddev_t *mddev_get(mddev_t *mddev)
221{
222 atomic_inc(&mddev->active);
223 return mddev;
224}
225
226static void mddev_put(mddev_t *mddev)
227{
228 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
229 return;
230 if (!mddev->raid_disks && list_empty(&mddev->disks)) {
231 list_del(&mddev->all_mddevs);
232 spin_unlock(&all_mddevs_lock);
233 blk_cleanup_queue(mddev->queue);
234 kobject_put(&mddev->kobj);
235 } else
236 spin_unlock(&all_mddevs_lock);
237}
238
239static mddev_t * mddev_find(dev_t unit)
240{
241 mddev_t *mddev, *new = NULL;
242
243 retry:
244 spin_lock(&all_mddevs_lock);
245 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
246 if (mddev->unit == unit) {
247 mddev_get(mddev);
248 spin_unlock(&all_mddevs_lock);
249 kfree(new);
250 return mddev;
251 }
252
253 if (new) {
254 list_add(&new->all_mddevs, &all_mddevs);
255 spin_unlock(&all_mddevs_lock);
256 return new;
257 }
258 spin_unlock(&all_mddevs_lock);
259
260 new = kzalloc(sizeof(*new), GFP_KERNEL);
261 if (!new)
262 return NULL;
263
264 new->unit = unit;
265 if (MAJOR(unit) == MD_MAJOR)
266 new->md_minor = MINOR(unit);
267 else
268 new->md_minor = MINOR(unit) >> MdpMinorShift;
269
270 mutex_init(&new->reconfig_mutex);
271 INIT_LIST_HEAD(&new->disks);
272 INIT_LIST_HEAD(&new->all_mddevs);
273 init_timer(&new->safemode_timer);
274 atomic_set(&new->active, 1);
275 spin_lock_init(&new->write_lock);
276 init_waitqueue_head(&new->sb_wait);
277 new->reshape_position = MaxSector;
278 new->resync_max = MaxSector;
279
280 new->queue = blk_alloc_queue(GFP_KERNEL);
281 if (!new->queue) {
282 kfree(new);
283 return NULL;
284 }
285 set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags);
286
287 blk_queue_make_request(new->queue, md_fail_request);
288
289 goto retry;
290}
291
292static inline int mddev_lock(mddev_t * mddev)
293{
294 return mutex_lock_interruptible(&mddev->reconfig_mutex);
295}
296
297static inline int mddev_trylock(mddev_t * mddev)
298{
299 return mutex_trylock(&mddev->reconfig_mutex);
300}
301
302static inline void mddev_unlock(mddev_t * mddev)
303{
304 mutex_unlock(&mddev->reconfig_mutex);
305
306 md_wakeup_thread(mddev->thread);
307}
308
309static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
310{
311 mdk_rdev_t * rdev;
312 struct list_head *tmp;
313
314 rdev_for_each(rdev, tmp, mddev) {
315 if (rdev->desc_nr == nr)
316 return rdev;
317 }
318 return NULL;
319}
320
321static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
322{
323 struct list_head *tmp;
324 mdk_rdev_t *rdev;
325
326 rdev_for_each(rdev, tmp, mddev) {
327 if (rdev->bdev->bd_dev == dev)
328 return rdev;
329 }
330 return NULL;
331}
332
333static struct mdk_personality *find_pers(int level, char *clevel)
334{
335 struct mdk_personality *pers;
336 list_for_each_entry(pers, &pers_list, list) {
337 if (level != LEVEL_NONE && pers->level == level)
338 return pers;
339 if (strcmp(pers->name, clevel)==0)
340 return pers;
341 }
342 return NULL;
343}
344
345static inline sector_t calc_dev_sboffset(struct block_device *bdev)
346{
347 sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
348 return MD_NEW_SIZE_BLOCKS(size);
349}
350
351static sector_t calc_dev_size(mdk_rdev_t *rdev, unsigned chunk_size)
352{
353 sector_t size;
354
355 size = rdev->sb_offset;
356
357 if (chunk_size)
358 size &= ~((sector_t)chunk_size/1024 - 1);
359 return size;
360}
361
362static int alloc_disk_sb(mdk_rdev_t * rdev)
363{
364 if (rdev->sb_page)
365 MD_BUG();
366
367 rdev->sb_page = alloc_page(GFP_KERNEL);
368 if (!rdev->sb_page) {
369 printk(KERN_ALERT "md: out of memory.\n");
370 return -EINVAL;
371 }
372
373 return 0;
374}
375
376static void free_disk_sb(mdk_rdev_t * rdev)
377{
378 if (rdev->sb_page) {
379 put_page(rdev->sb_page);
380 rdev->sb_loaded = 0;
381 rdev->sb_page = NULL;
382 rdev->sb_offset = 0;
383 rdev->size = 0;
384 }
385}
386
387
388static void super_written(struct bio *bio, int error)
389{
390 mdk_rdev_t *rdev = bio->bi_private;
391 mddev_t *mddev = rdev->mddev;
392
393 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
394 printk("md: super_written gets error=%d, uptodate=%d\n",
395 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
396 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
397 md_error(mddev, rdev);
398 }
399
400 if (atomic_dec_and_test(&mddev->pending_writes))
401 wake_up(&mddev->sb_wait);
402 bio_put(bio);
403}
404
405static void super_written_barrier(struct bio *bio, int error)
406{
407 struct bio *bio2 = bio->bi_private;
408 mdk_rdev_t *rdev = bio2->bi_private;
409 mddev_t *mddev = rdev->mddev;
410
411 if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
412 error == -EOPNOTSUPP) {
413 unsigned long flags;
414
415 set_bit(BarriersNotsupp, &rdev->flags);
416 mddev->barriers_work = 0;
417 spin_lock_irqsave(&mddev->write_lock, flags);
418 bio2->bi_next = mddev->biolist;
419 mddev->biolist = bio2;
420 spin_unlock_irqrestore(&mddev->write_lock, flags);
421 wake_up(&mddev->sb_wait);
422 bio_put(bio);
423 } else {
424 bio_put(bio2);
425 bio->bi_private = rdev;
426 super_written(bio, error);
427 }
428}
429
430void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
431 sector_t sector, int size, struct page *page)
432{
433
434
435
436
437
438
439
440
441
442 struct bio *bio = bio_alloc(GFP_NOIO, 1);
443 int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNC);
444
445 bio->bi_bdev = rdev->bdev;
446 bio->bi_sector = sector;
447 bio_add_page(bio, page, size, 0);
448 bio->bi_private = rdev;
449 bio->bi_end_io = super_written;
450 bio->bi_rw = rw;
451
452 atomic_inc(&mddev->pending_writes);
453 if (!test_bit(BarriersNotsupp, &rdev->flags)) {
454 struct bio *rbio;
455 rw |= (1<<BIO_RW_BARRIER);
456 rbio = bio_clone(bio, GFP_NOIO);
457 rbio->bi_private = bio;
458 rbio->bi_end_io = super_written_barrier;
459 submit_bio(rw, rbio);
460 } else
461 submit_bio(rw, bio);
462}
463
464void md_super_wait(mddev_t *mddev)
465{
466
467
468
469 DEFINE_WAIT(wq);
470 for(;;) {
471 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
472 if (atomic_read(&mddev->pending_writes)==0)
473 break;
474 while (mddev->biolist) {
475 struct bio *bio;
476 spin_lock_irq(&mddev->write_lock);
477 bio = mddev->biolist;
478 mddev->biolist = bio->bi_next ;
479 bio->bi_next = NULL;
480 spin_unlock_irq(&mddev->write_lock);
481 submit_bio(bio->bi_rw, bio);
482 }
483 schedule();
484 }
485 finish_wait(&mddev->sb_wait, &wq);
486}
487
488static void bi_complete(struct bio *bio, int error)
489{
490 complete((struct completion*)bio->bi_private);
491}
492
493int sync_page_io(struct block_device *bdev, sector_t sector, int size,
494 struct page *page, int rw)
495{
496 struct bio *bio = bio_alloc(GFP_NOIO, 1);
497 struct completion event;
498 int ret;
499
500 rw |= (1 << BIO_RW_SYNC);
501
502 bio->bi_bdev = bdev;
503 bio->bi_sector = sector;
504 bio_add_page(bio, page, size, 0);
505 init_completion(&event);
506 bio->bi_private = &event;
507 bio->bi_end_io = bi_complete;
508 submit_bio(rw, bio);
509 wait_for_completion(&event);
510
511 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
512 bio_put(bio);
513 return ret;
514}
515EXPORT_SYMBOL_GPL(sync_page_io);
516
517static int read_disk_sb(mdk_rdev_t * rdev, int size)
518{
519 char b[BDEVNAME_SIZE];
520 if (!rdev->sb_page) {
521 MD_BUG();
522 return -EINVAL;
523 }
524 if (rdev->sb_loaded)
525 return 0;
526
527
528 if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, size, rdev->sb_page, READ))
529 goto fail;
530 rdev->sb_loaded = 1;
531 return 0;
532
533fail:
534 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
535 bdevname(rdev->bdev,b));
536 return -EINVAL;
537}
538
539static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
540{
541 if ( (sb1->set_uuid0 == sb2->set_uuid0) &&
542 (sb1->set_uuid1 == sb2->set_uuid1) &&
543 (sb1->set_uuid2 == sb2->set_uuid2) &&
544 (sb1->set_uuid3 == sb2->set_uuid3))
545
546 return 1;
547
548 return 0;
549}
550
551
552static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
553{
554 int ret;
555 mdp_super_t *tmp1, *tmp2;
556
557 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
558 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
559
560 if (!tmp1 || !tmp2) {
561 ret = 0;
562 printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n");
563 goto abort;
564 }
565
566 *tmp1 = *sb1;
567 *tmp2 = *sb2;
568
569
570
571
572 tmp1->nr_disks = 0;
573 tmp2->nr_disks = 0;
574
575 if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4))
576 ret = 0;
577 else
578 ret = 1;
579
580abort:
581 kfree(tmp1);
582 kfree(tmp2);
583 return ret;
584}
585
586
587static u32 md_csum_fold(u32 csum)
588{
589 csum = (csum & 0xffff) + (csum >> 16);
590 return (csum & 0xffff) + (csum >> 16);
591}
592
593static unsigned int calc_sb_csum(mdp_super_t * sb)
594{
595 u64 newcsum = 0;
596 u32 *sb32 = (u32*)sb;
597 int i;
598 unsigned int disk_csum, csum;
599
600 disk_csum = sb->sb_csum;
601 sb->sb_csum = 0;
602
603 for (i = 0; i < MD_SB_BYTES/4 ; i++)
604 newcsum += sb32[i];
605 csum = (newcsum & 0xffffffff) + (newcsum>>32);
606
607
608#ifdef CONFIG_ALPHA
609
610
611
612
613
614
615
616
617 sb->sb_csum = md_csum_fold(disk_csum);
618#else
619 sb->sb_csum = disk_csum;
620#endif
621 return csum;
622}
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655struct super_type {
656 char *name;
657 struct module *owner;
658 int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version);
659 int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);
660 void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);
661};
662
663
664
665
666static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
667{
668 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
669 mdp_super_t *sb;
670 int ret;
671 sector_t sb_offset;
672
673
674
675
676
677
678
679 sb_offset = calc_dev_sboffset(rdev->bdev);
680 rdev->sb_offset = sb_offset;
681
682 ret = read_disk_sb(rdev, MD_SB_BYTES);
683 if (ret) return ret;
684
685 ret = -EINVAL;
686
687 bdevname(rdev->bdev, b);
688 sb = (mdp_super_t*)page_address(rdev->sb_page);
689
690 if (sb->md_magic != MD_SB_MAGIC) {
691 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
692 b);
693 goto abort;
694 }
695
696 if (sb->major_version != 0 ||
697 sb->minor_version < 90 ||
698 sb->minor_version > 91) {
699 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
700 sb->major_version, sb->minor_version,
701 b);
702 goto abort;
703 }
704
705 if (sb->raid_disks <= 0)
706 goto abort;
707
708 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
709 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
710 b);
711 goto abort;
712 }
713
714 rdev->preferred_minor = sb->md_minor;
715 rdev->data_offset = 0;
716 rdev->sb_size = MD_SB_BYTES;
717
718 if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) {
719 if (sb->level != 1 && sb->level != 4
720 && sb->level != 5 && sb->level != 6
721 && sb->level != 10) {
722
723 printk(KERN_WARNING
724 "md: bitmaps not supported for this level.\n");
725 goto abort;
726 }
727 }
728
729 if (sb->level == LEVEL_MULTIPATH)
730 rdev->desc_nr = -1;
731 else
732 rdev->desc_nr = sb->this_disk.number;
733
734 if (refdev == 0)
735 ret = 1;
736 else {
737 __u64 ev1, ev2;
738 mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
739 if (!uuid_equal(refsb, sb)) {
740 printk(KERN_WARNING "md: %s has different UUID to %s\n",
741 b, bdevname(refdev->bdev,b2));
742 goto abort;
743 }
744 if (!sb_equal(refsb, sb)) {
745 printk(KERN_WARNING "md: %s has same UUID"
746 " but different superblock to %s\n",
747 b, bdevname(refdev->bdev, b2));
748 goto abort;
749 }
750 ev1 = md_event(sb);
751 ev2 = md_event(refsb);
752 if (ev1 > ev2)
753 ret = 1;
754 else
755 ret = 0;
756 }
757 rdev->size = calc_dev_size(rdev, sb->chunk_size);
758
759 if (rdev->size < sb->size && sb->level > 1)
760
761 ret = -EINVAL;
762
763 abort:
764 return ret;
765}
766
767
768
769
770static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
771{
772 mdp_disk_t *desc;
773 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
774 __u64 ev1 = md_event(sb);
775
776 rdev->raid_disk = -1;
777 clear_bit(Faulty, &rdev->flags);
778 clear_bit(In_sync, &rdev->flags);
779 clear_bit(WriteMostly, &rdev->flags);
780 clear_bit(BarriersNotsupp, &rdev->flags);
781
782 if (mddev->raid_disks == 0) {
783 mddev->major_version = 0;
784 mddev->minor_version = sb->minor_version;
785 mddev->patch_version = sb->patch_version;
786 mddev->external = 0;
787 mddev->chunk_size = sb->chunk_size;
788 mddev->ctime = sb->ctime;
789 mddev->utime = sb->utime;
790 mddev->level = sb->level;
791 mddev->clevel[0] = 0;
792 mddev->layout = sb->layout;
793 mddev->raid_disks = sb->raid_disks;
794 mddev->size = sb->size;
795 mddev->events = ev1;
796 mddev->bitmap_offset = 0;
797 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
798
799 if (mddev->minor_version >= 91) {
800 mddev->reshape_position = sb->reshape_position;
801 mddev->delta_disks = sb->delta_disks;
802 mddev->new_level = sb->new_level;
803 mddev->new_layout = sb->new_layout;
804 mddev->new_chunk = sb->new_chunk;
805 } else {
806 mddev->reshape_position = MaxSector;
807 mddev->delta_disks = 0;
808 mddev->new_level = mddev->level;
809 mddev->new_layout = mddev->layout;
810 mddev->new_chunk = mddev->chunk_size;
811 }
812
813 if (sb->state & (1<<MD_SB_CLEAN))
814 mddev->recovery_cp = MaxSector;
815 else {
816 if (sb->events_hi == sb->cp_events_hi &&
817 sb->events_lo == sb->cp_events_lo) {
818 mddev->recovery_cp = sb->recovery_cp;
819 } else
820 mddev->recovery_cp = 0;
821 }
822
823 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
824 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
825 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
826 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
827
828 mddev->max_disks = MD_SB_DISKS;
829
830 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
831 mddev->bitmap_file == NULL)
832 mddev->bitmap_offset = mddev->default_bitmap_offset;
833
834 } else if (mddev->pers == NULL) {
835
836 ++ev1;
837 if (ev1 < mddev->events)
838 return -EINVAL;
839 } else if (mddev->bitmap) {
840
841
842
843 if (ev1 < mddev->bitmap->events_cleared)
844 return 0;
845 } else {
846 if (ev1 < mddev->events)
847
848 return 0;
849 }
850
851 if (mddev->level != LEVEL_MULTIPATH) {
852 desc = sb->disks + rdev->desc_nr;
853
854 if (desc->state & (1<<MD_DISK_FAULTY))
855 set_bit(Faulty, &rdev->flags);
856 else if (desc->state & (1<<MD_DISK_SYNC)
857) {
858 set_bit(In_sync, &rdev->flags);
859 rdev->raid_disk = desc->raid_disk;
860 }
861 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
862 set_bit(WriteMostly, &rdev->flags);
863 } else
864 set_bit(In_sync, &rdev->flags);
865 return 0;
866}
867
868
869
870
871static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
872{
873 mdp_super_t *sb;
874 struct list_head *tmp;
875 mdk_rdev_t *rdev2;
876 int next_spare = mddev->raid_disks;
877
878
879
880
881
882
883
884
885
886
887
888
889 int i;
890 int active=0, working=0,failed=0,spare=0,nr_disks=0;
891
892 rdev->sb_size = MD_SB_BYTES;
893
894 sb = (mdp_super_t*)page_address(rdev->sb_page);
895
896 memset(sb, 0, sizeof(*sb));
897
898 sb->md_magic = MD_SB_MAGIC;
899 sb->major_version = mddev->major_version;
900 sb->patch_version = mddev->patch_version;
901 sb->gvalid_words = 0;
902 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
903 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
904 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
905 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
906
907 sb->ctime = mddev->ctime;
908 sb->level = mddev->level;
909 sb->size = mddev->size;
910 sb->raid_disks = mddev->raid_disks;
911 sb->md_minor = mddev->md_minor;
912 sb->not_persistent = 0;
913 sb->utime = mddev->utime;
914 sb->state = 0;
915 sb->events_hi = (mddev->events>>32);
916 sb->events_lo = (u32)mddev->events;
917
918 if (mddev->reshape_position == MaxSector)
919 sb->minor_version = 90;
920 else {
921 sb->minor_version = 91;
922 sb->reshape_position = mddev->reshape_position;
923 sb->new_level = mddev->new_level;
924 sb->delta_disks = mddev->delta_disks;
925 sb->new_layout = mddev->new_layout;
926 sb->new_chunk = mddev->new_chunk;
927 }
928 mddev->minor_version = sb->minor_version;
929 if (mddev->in_sync)
930 {
931 sb->recovery_cp = mddev->recovery_cp;
932 sb->cp_events_hi = (mddev->events>>32);
933 sb->cp_events_lo = (u32)mddev->events;
934 if (mddev->recovery_cp == MaxSector)
935 sb->state = (1<< MD_SB_CLEAN);
936 } else
937 sb->recovery_cp = 0;
938
939 sb->layout = mddev->layout;
940 sb->chunk_size = mddev->chunk_size;
941
942 if (mddev->bitmap && mddev->bitmap_file == NULL)
943 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
944
945 sb->disks[0].state = (1<<MD_DISK_REMOVED);
946 rdev_for_each(rdev2, tmp, mddev) {
947 mdp_disk_t *d;
948 int desc_nr;
949 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
950 && !test_bit(Faulty, &rdev2->flags))
951 desc_nr = rdev2->raid_disk;
952 else
953 desc_nr = next_spare++;
954 rdev2->desc_nr = desc_nr;
955 d = &sb->disks[rdev2->desc_nr];
956 nr_disks++;
957 d->number = rdev2->desc_nr;
958 d->major = MAJOR(rdev2->bdev->bd_dev);
959 d->minor = MINOR(rdev2->bdev->bd_dev);
960 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
961 && !test_bit(Faulty, &rdev2->flags))
962 d->raid_disk = rdev2->raid_disk;
963 else
964 d->raid_disk = rdev2->desc_nr;
965 if (test_bit(Faulty, &rdev2->flags))
966 d->state = (1<<MD_DISK_FAULTY);
967 else if (test_bit(In_sync, &rdev2->flags)) {
968 d->state = (1<<MD_DISK_ACTIVE);
969 d->state |= (1<<MD_DISK_SYNC);
970 active++;
971 working++;
972 } else {
973 d->state = 0;
974 spare++;
975 working++;
976 }
977 if (test_bit(WriteMostly, &rdev2->flags))
978 d->state |= (1<<MD_DISK_WRITEMOSTLY);
979 }
980
981 for (i=0 ; i < mddev->raid_disks ; i++) {
982 mdp_disk_t *d = &sb->disks[i];
983 if (d->state == 0 && d->number == 0) {
984 d->number = i;
985 d->raid_disk = i;
986 d->state = (1<<MD_DISK_REMOVED);
987 d->state |= (1<<MD_DISK_FAULTY);
988 failed++;
989 }
990 }
991 sb->nr_disks = nr_disks;
992 sb->active_disks = active;
993 sb->working_disks = working;
994 sb->failed_disks = failed;
995 sb->spare_disks = spare;
996
997 sb->this_disk = sb->disks[rdev->desc_nr];
998 sb->sb_csum = calc_sb_csum(sb);
999}
1000
1001
1002
1003
1004
1005static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1006{
1007 __le32 disk_csum;
1008 u32 csum;
1009 unsigned long long newcsum;
1010 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1011 __le32 *isuper = (__le32*)sb;
1012 int i;
1013
1014 disk_csum = sb->sb_csum;
1015 sb->sb_csum = 0;
1016 newcsum = 0;
1017 for (i=0; size>=4; size -= 4 )
1018 newcsum += le32_to_cpu(*isuper++);
1019
1020 if (size == 2)
1021 newcsum += le16_to_cpu(*(__le16*) isuper);
1022
1023 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1024 sb->sb_csum = disk_csum;
1025 return cpu_to_le32(csum);
1026}
1027
1028static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1029{
1030 struct mdp_superblock_1 *sb;
1031 int ret;
1032 sector_t sb_offset;
1033 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1034 int bmask;
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044 switch(minor_version) {
1045 case 0:
1046 sb_offset = rdev->bdev->bd_inode->i_size >> 9;
1047 sb_offset -= 8*2;
1048 sb_offset &= ~(sector_t)(4*2-1);
1049
1050 sb_offset /= 2;
1051 break;
1052 case 1:
1053 sb_offset = 0;
1054 break;
1055 case 2:
1056 sb_offset = 4;
1057 break;
1058 default:
1059 return -EINVAL;
1060 }
1061 rdev->sb_offset = sb_offset;
1062
1063
1064
1065
1066 ret = read_disk_sb(rdev, 4096);
1067 if (ret) return ret;
1068
1069
1070 sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1071
1072 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1073 sb->major_version != cpu_to_le32(1) ||
1074 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1075 le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) ||
1076 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1077 return -EINVAL;
1078
1079 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1080 printk("md: invalid superblock checksum on %s\n",
1081 bdevname(rdev->bdev,b));
1082 return -EINVAL;
1083 }
1084 if (le64_to_cpu(sb->data_size) < 10) {
1085 printk("md: data_size too small on %s\n",
1086 bdevname(rdev->bdev,b));
1087 return -EINVAL;
1088 }
1089 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) {
1090 if (sb->level != cpu_to_le32(1) &&
1091 sb->level != cpu_to_le32(4) &&
1092 sb->level != cpu_to_le32(5) &&
1093 sb->level != cpu_to_le32(6) &&
1094 sb->level != cpu_to_le32(10)) {
1095 printk(KERN_WARNING
1096 "md: bitmaps not supported for this level.\n");
1097 return -EINVAL;
1098 }
1099 }
1100
1101 rdev->preferred_minor = 0xffff;
1102 rdev->data_offset = le64_to_cpu(sb->data_offset);
1103 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1104
1105 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1106 bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
1107 if (rdev->sb_size & bmask)
1108 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1109
1110 if (minor_version
1111 && rdev->data_offset < sb_offset + (rdev->sb_size/512))
1112 return -EINVAL;
1113
1114 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1115 rdev->desc_nr = -1;
1116 else
1117 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1118
1119 if (refdev == 0)
1120 ret = 1;
1121 else {
1122 __u64 ev1, ev2;
1123 struct mdp_superblock_1 *refsb =
1124 (struct mdp_superblock_1*)page_address(refdev->sb_page);
1125
1126 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1127 sb->level != refsb->level ||
1128 sb->layout != refsb->layout ||
1129 sb->chunksize != refsb->chunksize) {
1130 printk(KERN_WARNING "md: %s has strangely different"
1131 " superblock to %s\n",
1132 bdevname(rdev->bdev,b),
1133 bdevname(refdev->bdev,b2));
1134 return -EINVAL;
1135 }
1136 ev1 = le64_to_cpu(sb->events);
1137 ev2 = le64_to_cpu(refsb->events);
1138
1139 if (ev1 > ev2)
1140 ret = 1;
1141 else
1142 ret = 0;
1143 }
1144 if (minor_version)
1145 rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2;
1146 else
1147 rdev->size = rdev->sb_offset;
1148 if (rdev->size < le64_to_cpu(sb->data_size)/2)
1149 return -EINVAL;
1150 rdev->size = le64_to_cpu(sb->data_size)/2;
1151 if (le32_to_cpu(sb->chunksize))
1152 rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1);
1153
1154 if (le64_to_cpu(sb->size) > rdev->size*2)
1155 return -EINVAL;
1156 return ret;
1157}
1158
1159static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1160{
1161 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1162 __u64 ev1 = le64_to_cpu(sb->events);
1163
1164 rdev->raid_disk = -1;
1165 clear_bit(Faulty, &rdev->flags);
1166 clear_bit(In_sync, &rdev->flags);
1167 clear_bit(WriteMostly, &rdev->flags);
1168 clear_bit(BarriersNotsupp, &rdev->flags);
1169
1170 if (mddev->raid_disks == 0) {
1171 mddev->major_version = 1;
1172 mddev->patch_version = 0;
1173 mddev->external = 0;
1174 mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
1175 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1176 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1177 mddev->level = le32_to_cpu(sb->level);
1178 mddev->clevel[0] = 0;
1179 mddev->layout = le32_to_cpu(sb->layout);
1180 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1181 mddev->size = le64_to_cpu(sb->size)/2;
1182 mddev->events = ev1;
1183 mddev->bitmap_offset = 0;
1184 mddev->default_bitmap_offset = 1024 >> 9;
1185
1186 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1187 memcpy(mddev->uuid, sb->set_uuid, 16);
1188
1189 mddev->max_disks = (4096-256)/2;
1190
1191 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1192 mddev->bitmap_file == NULL )
1193 mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
1194
1195 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1196 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1197 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1198 mddev->new_level = le32_to_cpu(sb->new_level);
1199 mddev->new_layout = le32_to_cpu(sb->new_layout);
1200 mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9;
1201 } else {
1202 mddev->reshape_position = MaxSector;
1203 mddev->delta_disks = 0;
1204 mddev->new_level = mddev->level;
1205 mddev->new_layout = mddev->layout;
1206 mddev->new_chunk = mddev->chunk_size;
1207 }
1208
1209 } else if (mddev->pers == NULL) {
1210
1211 ++ev1;
1212 if (ev1 < mddev->events)
1213 return -EINVAL;
1214 } else if (mddev->bitmap) {
1215
1216
1217
1218 if (ev1 < mddev->bitmap->events_cleared)
1219 return 0;
1220 } else {
1221 if (ev1 < mddev->events)
1222
1223 return 0;
1224 }
1225 if (mddev->level != LEVEL_MULTIPATH) {
1226 int role;
1227 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1228 switch(role) {
1229 case 0xffff:
1230 break;
1231 case 0xfffe:
1232 set_bit(Faulty, &rdev->flags);
1233 break;
1234 default:
1235 if ((le32_to_cpu(sb->feature_map) &
1236 MD_FEATURE_RECOVERY_OFFSET))
1237 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1238 else
1239 set_bit(In_sync, &rdev->flags);
1240 rdev->raid_disk = role;
1241 break;
1242 }
1243 if (sb->devflags & WriteMostly1)
1244 set_bit(WriteMostly, &rdev->flags);
1245 } else
1246 set_bit(In_sync, &rdev->flags);
1247
1248 return 0;
1249}
1250
1251static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1252{
1253 struct mdp_superblock_1 *sb;
1254 struct list_head *tmp;
1255 mdk_rdev_t *rdev2;
1256 int max_dev, i;
1257
1258
1259 sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1260
1261 sb->feature_map = 0;
1262 sb->pad0 = 0;
1263 sb->recovery_offset = cpu_to_le64(0);
1264 memset(sb->pad1, 0, sizeof(sb->pad1));
1265 memset(sb->pad2, 0, sizeof(sb->pad2));
1266 memset(sb->pad3, 0, sizeof(sb->pad3));
1267
1268 sb->utime = cpu_to_le64((__u64)mddev->utime);
1269 sb->events = cpu_to_le64(mddev->events);
1270 if (mddev->in_sync)
1271 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1272 else
1273 sb->resync_offset = cpu_to_le64(0);
1274
1275 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1276
1277 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1278 sb->size = cpu_to_le64(mddev->size<<1);
1279
1280 if (mddev->bitmap && mddev->bitmap_file == NULL) {
1281 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
1282 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1283 }
1284
1285 if (rdev->raid_disk >= 0 &&
1286 !test_bit(In_sync, &rdev->flags) &&
1287 rdev->recovery_offset > 0) {
1288 sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1289 sb->recovery_offset = cpu_to_le64(rdev->recovery_offset);
1290 }
1291
1292 if (mddev->reshape_position != MaxSector) {
1293 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1294 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1295 sb->new_layout = cpu_to_le32(mddev->new_layout);
1296 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1297 sb->new_level = cpu_to_le32(mddev->new_level);
1298 sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9);
1299 }
1300
1301 max_dev = 0;
1302 rdev_for_each(rdev2, tmp, mddev)
1303 if (rdev2->desc_nr+1 > max_dev)
1304 max_dev = rdev2->desc_nr+1;
1305
1306 if (max_dev > le32_to_cpu(sb->max_dev))
1307 sb->max_dev = cpu_to_le32(max_dev);
1308 for (i=0; i<max_dev;i++)
1309 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1310
1311 rdev_for_each(rdev2, tmp, mddev) {
1312 i = rdev2->desc_nr;
1313 if (test_bit(Faulty, &rdev2->flags))
1314 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1315 else if (test_bit(In_sync, &rdev2->flags))
1316 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1317 else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0)
1318 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1319 else
1320 sb->dev_roles[i] = cpu_to_le16(0xffff);
1321 }
1322
1323 sb->sb_csum = calc_sb_1_csum(sb);
1324}
1325
1326
1327static struct super_type super_types[] = {
1328 [0] = {
1329 .name = "0.90.0",
1330 .owner = THIS_MODULE,
1331 .load_super = super_90_load,
1332 .validate_super = super_90_validate,
1333 .sync_super = super_90_sync,
1334 },
1335 [1] = {
1336 .name = "md-1",
1337 .owner = THIS_MODULE,
1338 .load_super = super_1_load,
1339 .validate_super = super_1_validate,
1340 .sync_super = super_1_sync,
1341 },
1342};
1343
1344static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1345{
1346 struct list_head *tmp, *tmp2;
1347 mdk_rdev_t *rdev, *rdev2;
1348
1349 rdev_for_each(rdev, tmp, mddev1)
1350 rdev_for_each(rdev2, tmp2, mddev2)
1351 if (rdev->bdev->bd_contains ==
1352 rdev2->bdev->bd_contains)
1353 return 1;
1354
1355 return 0;
1356}
1357
1358static LIST_HEAD(pending_raid_disks);
1359
1360static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1361{
1362 char b[BDEVNAME_SIZE];
1363 struct kobject *ko;
1364 char *s;
1365 int err;
1366
1367 if (rdev->mddev) {
1368 MD_BUG();
1369 return -EINVAL;
1370 }
1371
1372 if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {
1373 if (mddev->pers) {
1374
1375
1376
1377
1378 if (mddev->level > 0)
1379 return -ENOSPC;
1380 } else
1381 mddev->size = rdev->size;
1382 }
1383
1384
1385
1386
1387
1388 if (rdev->desc_nr < 0) {
1389 int choice = 0;
1390 if (mddev->pers) choice = mddev->raid_disks;
1391 while (find_rdev_nr(mddev, choice))
1392 choice++;
1393 rdev->desc_nr = choice;
1394 } else {
1395 if (find_rdev_nr(mddev, rdev->desc_nr))
1396 return -EBUSY;
1397 }
1398 bdevname(rdev->bdev,b);
1399 while ( (s=strchr(b, '/')) != NULL)
1400 *s = '!';
1401
1402 rdev->mddev = mddev;
1403 printk(KERN_INFO "md: bind<%s>\n", b);
1404
1405 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
1406 goto fail;
1407
1408 if (rdev->bdev->bd_part)
1409 ko = &rdev->bdev->bd_part->dev.kobj;
1410 else
1411 ko = &rdev->bdev->bd_disk->dev.kobj;
1412 if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) {
1413 kobject_del(&rdev->kobj);
1414 goto fail;
1415 }
1416 list_add(&rdev->same_set, &mddev->disks);
1417 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
1418 return 0;
1419
1420 fail:
1421 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
1422 b, mdname(mddev));
1423 return err;
1424}
1425
1426static void md_delayed_delete(struct work_struct *ws)
1427{
1428 mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
1429 kobject_del(&rdev->kobj);
1430 kobject_put(&rdev->kobj);
1431}
1432
1433static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1434{
1435 char b[BDEVNAME_SIZE];
1436 if (!rdev->mddev) {
1437 MD_BUG();
1438 return;
1439 }
1440 bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
1441 list_del_init(&rdev->same_set);
1442 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
1443 rdev->mddev = NULL;
1444 sysfs_remove_link(&rdev->kobj, "block");
1445
1446
1447
1448
1449 INIT_WORK(&rdev->del_work, md_delayed_delete);
1450 kobject_get(&rdev->kobj);
1451 schedule_work(&rdev->del_work);
1452}
1453
1454
1455
1456
1457
1458
1459static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
1460{
1461 int err = 0;
1462 struct block_device *bdev;
1463 char b[BDEVNAME_SIZE];
1464
1465 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
1466 if (IS_ERR(bdev)) {
1467 printk(KERN_ERR "md: could not open %s.\n",
1468 __bdevname(dev, b));
1469 return PTR_ERR(bdev);
1470 }
1471 err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
1472 if (err) {
1473 printk(KERN_ERR "md: could not bd_claim %s.\n",
1474 bdevname(bdev, b));
1475 blkdev_put(bdev);
1476 return err;
1477 }
1478 if (!shared)
1479 set_bit(AllReserved, &rdev->flags);
1480 rdev->bdev = bdev;
1481 return err;
1482}
1483
1484static void unlock_rdev(mdk_rdev_t *rdev)
1485{
1486 struct block_device *bdev = rdev->bdev;
1487 rdev->bdev = NULL;
1488 if (!bdev)
1489 MD_BUG();
1490 bd_release(bdev);
1491 blkdev_put(bdev);
1492}
1493
1494void md_autodetect_dev(dev_t dev);
1495
1496static void export_rdev(mdk_rdev_t * rdev)
1497{
1498 char b[BDEVNAME_SIZE];
1499 printk(KERN_INFO "md: export_rdev(%s)\n",
1500 bdevname(rdev->bdev,b));
1501 if (rdev->mddev)
1502 MD_BUG();
1503 free_disk_sb(rdev);
1504 list_del_init(&rdev->same_set);
1505#ifndef MODULE
1506 if (test_bit(AutoDetected, &rdev->flags))
1507 md_autodetect_dev(rdev->bdev->bd_dev);
1508#endif
1509 unlock_rdev(rdev);
1510 kobject_put(&rdev->kobj);
1511}
1512
1513static void kick_rdev_from_array(mdk_rdev_t * rdev)
1514{
1515 unbind_rdev_from_array(rdev);
1516 export_rdev(rdev);
1517}
1518
1519static void export_array(mddev_t *mddev)
1520{
1521 struct list_head *tmp;
1522 mdk_rdev_t *rdev;
1523
1524 rdev_for_each(rdev, tmp, mddev) {
1525 if (!rdev->mddev) {
1526 MD_BUG();
1527 continue;
1528 }
1529 kick_rdev_from_array(rdev);
1530 }
1531 if (!list_empty(&mddev->disks))
1532 MD_BUG();
1533 mddev->raid_disks = 0;
1534 mddev->major_version = 0;
1535}
1536
1537static void print_desc(mdp_disk_t *desc)
1538{
1539 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
1540 desc->major,desc->minor,desc->raid_disk,desc->state);
1541}
1542
1543static void print_sb(mdp_super_t *sb)
1544{
1545 int i;
1546
1547 printk(KERN_INFO
1548 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
1549 sb->major_version, sb->minor_version, sb->patch_version,
1550 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
1551 sb->ctime);
1552 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
1553 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
1554 sb->md_minor, sb->layout, sb->chunk_size);
1555 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
1556 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
1557 sb->utime, sb->state, sb->active_disks, sb->working_disks,
1558 sb->failed_disks, sb->spare_disks,
1559 sb->sb_csum, (unsigned long)sb->events_lo);
1560
1561 printk(KERN_INFO);
1562 for (i = 0; i < MD_SB_DISKS; i++) {
1563 mdp_disk_t *desc;
1564
1565 desc = sb->disks + i;
1566 if (desc->number || desc->major || desc->minor ||
1567 desc->raid_disk || (desc->state && (desc->state != 4))) {
1568 printk(" D %2d: ", i);
1569 print_desc(desc);
1570 }
1571 }
1572 printk(KERN_INFO "md: THIS: ");
1573 print_desc(&sb->this_disk);
1574
1575}
1576
1577static void print_rdev(mdk_rdev_t *rdev)
1578{
1579 char b[BDEVNAME_SIZE];
1580 printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n",
1581 bdevname(rdev->bdev,b), (unsigned long long)rdev->size,
1582 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
1583 rdev->desc_nr);
1584 if (rdev->sb_loaded) {
1585 printk(KERN_INFO "md: rdev superblock:\n");
1586 print_sb((mdp_super_t*)page_address(rdev->sb_page));
1587 } else
1588 printk(KERN_INFO "md: no rdev superblock!\n");
1589}
1590
1591static void md_print_devices(void)
1592{
1593 struct list_head *tmp, *tmp2;
1594 mdk_rdev_t *rdev;
1595 mddev_t *mddev;
1596 char b[BDEVNAME_SIZE];
1597
1598 printk("\n");
1599 printk("md: **********************************\n");
1600 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
1601 printk("md: **********************************\n");
1602 for_each_mddev(mddev, tmp) {
1603
1604 if (mddev->bitmap)
1605 bitmap_print_sb(mddev->bitmap);
1606 else
1607 printk("%s: ", mdname(mddev));
1608 rdev_for_each(rdev, tmp2, mddev)
1609 printk("<%s>", bdevname(rdev->bdev,b));
1610 printk("\n");
1611
1612 rdev_for_each(rdev, tmp2, mddev)
1613 print_rdev(rdev);
1614 }
1615 printk("md: **********************************\n");
1616 printk("\n");
1617}
1618
1619
1620static void sync_sbs(mddev_t * mddev, int nospares)
1621{
1622
1623
1624
1625
1626
1627
1628 mdk_rdev_t *rdev;
1629 struct list_head *tmp;
1630
1631 rdev_for_each(rdev, tmp, mddev) {
1632 if (rdev->sb_events == mddev->events ||
1633 (nospares &&
1634 rdev->raid_disk < 0 &&
1635 (rdev->sb_events&1)==0 &&
1636 rdev->sb_events+1 == mddev->events)) {
1637
1638 rdev->sb_loaded = 2;
1639 } else {
1640 super_types[mddev->major_version].
1641 sync_super(mddev, rdev);
1642 rdev->sb_loaded = 1;
1643 }
1644 }
1645}
1646
1647static void md_update_sb(mddev_t * mddev, int force_change)
1648{
1649 struct list_head *tmp;
1650 mdk_rdev_t *rdev;
1651 int sync_req;
1652 int nospares = 0;
1653
1654repeat:
1655 spin_lock_irq(&mddev->write_lock);
1656
1657 set_bit(MD_CHANGE_PENDING, &mddev->flags);
1658 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
1659 force_change = 1;
1660 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
1661
1662
1663
1664
1665 nospares = 1;
1666 if (force_change)
1667 nospares = 0;
1668 if (mddev->degraded)
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678 nospares = 0;
1679
1680 sync_req = mddev->in_sync;
1681 mddev->utime = get_seconds();
1682
1683
1684
1685 if (nospares
1686 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
1687 && (mddev->events & 1)
1688 && mddev->events != 1)
1689 mddev->events--;
1690 else {
1691
1692 mddev->events ++;
1693 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) {
1694
1695 if ((mddev->events&1)==0) {
1696 mddev->events++;
1697 nospares = 0;
1698 }
1699 } else {
1700
1701 if ((mddev->events&1)) {
1702 mddev->events++;
1703 nospares = 0;
1704 }
1705 }
1706 }
1707
1708 if (!mddev->events) {
1709
1710
1711
1712
1713
1714 MD_BUG();
1715 mddev->events --;
1716 }
1717
1718
1719
1720
1721
1722 if (!mddev->persistent) {
1723 if (!mddev->external)
1724 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
1725
1726 spin_unlock_irq(&mddev->write_lock);
1727 wake_up(&mddev->sb_wait);
1728 return;
1729 }
1730 sync_sbs(mddev, nospares);
1731 spin_unlock_irq(&mddev->write_lock);
1732
1733 dprintk(KERN_INFO
1734 "md: updating %s RAID superblock on device (in sync %d)\n",
1735 mdname(mddev),mddev->in_sync);
1736
1737 bitmap_update_sb(mddev->bitmap);
1738 rdev_for_each(rdev, tmp, mddev) {
1739 char b[BDEVNAME_SIZE];
1740 dprintk(KERN_INFO "md: ");
1741 if (rdev->sb_loaded != 1)
1742 continue;
1743 if (test_bit(Faulty, &rdev->flags))
1744 dprintk("(skipping faulty ");
1745
1746 dprintk("%s ", bdevname(rdev->bdev,b));
1747 if (!test_bit(Faulty, &rdev->flags)) {
1748 md_super_write(mddev,rdev,
1749 rdev->sb_offset<<1, rdev->sb_size,
1750 rdev->sb_page);
1751 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
1752 bdevname(rdev->bdev,b),
1753 (unsigned long long)rdev->sb_offset);
1754 rdev->sb_events = mddev->events;
1755
1756 } else
1757 dprintk(")\n");
1758 if (mddev->level == LEVEL_MULTIPATH)
1759
1760 break;
1761 }
1762 md_super_wait(mddev);
1763
1764
1765 spin_lock_irq(&mddev->write_lock);
1766 if (mddev->in_sync != sync_req ||
1767 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
1768
1769 spin_unlock_irq(&mddev->write_lock);
1770 goto repeat;
1771 }
1772 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
1773 spin_unlock_irq(&mddev->write_lock);
1774 wake_up(&mddev->sb_wait);
1775
1776}
1777
1778
1779
1780
1781static int cmd_match(const char *cmd, const char *str)
1782{
1783
1784
1785
1786
1787 while (*cmd && *str && *cmd == *str) {
1788 cmd++;
1789 str++;
1790 }
1791 if (*cmd == '\n')
1792 cmd++;
1793 if (*str || *cmd)
1794 return 0;
1795 return 1;
1796}
1797
1798struct rdev_sysfs_entry {
1799 struct attribute attr;
1800 ssize_t (*show)(mdk_rdev_t *, char *);
1801 ssize_t (*store)(mdk_rdev_t *, const char *, size_t);
1802};
1803
1804static ssize_t
1805state_show(mdk_rdev_t *rdev, char *page)
1806{
1807 char *sep = "";
1808 size_t len = 0;
1809
1810 if (test_bit(Faulty, &rdev->flags)) {
1811 len+= sprintf(page+len, "%sfaulty",sep);
1812 sep = ",";
1813 }
1814 if (test_bit(In_sync, &rdev->flags)) {
1815 len += sprintf(page+len, "%sin_sync",sep);
1816 sep = ",";
1817 }
1818 if (test_bit(WriteMostly, &rdev->flags)) {
1819 len += sprintf(page+len, "%swrite_mostly",sep);
1820 sep = ",";
1821 }
1822 if (!test_bit(Faulty, &rdev->flags) &&
1823 !test_bit(In_sync, &rdev->flags)) {
1824 len += sprintf(page+len, "%sspare", sep);
1825 sep = ",";
1826 }
1827 return len+sprintf(page+len, "\n");
1828}
1829
1830static ssize_t
1831state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1832{
1833
1834
1835
1836
1837
1838
1839 int err = -EINVAL;
1840 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
1841 md_error(rdev->mddev, rdev);
1842 err = 0;
1843 } else if (cmd_match(buf, "remove")) {
1844 if (rdev->raid_disk >= 0)
1845 err = -EBUSY;
1846 else {
1847 mddev_t *mddev = rdev->mddev;
1848 kick_rdev_from_array(rdev);
1849 if (mddev->pers)
1850 md_update_sb(mddev, 1);
1851 md_new_event(mddev);
1852 err = 0;
1853 }
1854 } else if (cmd_match(buf, "writemostly")) {
1855 set_bit(WriteMostly, &rdev->flags);
1856 err = 0;
1857 } else if (cmd_match(buf, "-writemostly")) {
1858 clear_bit(WriteMostly, &rdev->flags);
1859 err = 0;
1860 }
1861 return err ? err : len;
1862}
1863static struct rdev_sysfs_entry rdev_state =
1864__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
1865
1866static ssize_t
1867errors_show(mdk_rdev_t *rdev, char *page)
1868{
1869 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
1870}
1871
1872static ssize_t
1873errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1874{
1875 char *e;
1876 unsigned long n = simple_strtoul(buf, &e, 10);
1877 if (*buf && (*e == 0 || *e == '\n')) {
1878 atomic_set(&rdev->corrected_errors, n);
1879 return len;
1880 }
1881 return -EINVAL;
1882}
1883static struct rdev_sysfs_entry rdev_errors =
1884__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
1885
1886static ssize_t
1887slot_show(mdk_rdev_t *rdev, char *page)
1888{
1889 if (rdev->raid_disk < 0)
1890 return sprintf(page, "none\n");
1891 else
1892 return sprintf(page, "%d\n", rdev->raid_disk);
1893}
1894
1895static ssize_t
1896slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1897{
1898 char *e;
1899 int err;
1900 char nm[20];
1901 int slot = simple_strtoul(buf, &e, 10);
1902 if (strncmp(buf, "none", 4)==0)
1903 slot = -1;
1904 else if (e==buf || (*e && *e!= '\n'))
1905 return -EINVAL;
1906 if (rdev->mddev->pers) {
1907
1908
1909
1910
1911
1912
1913
1914 if (slot != -1)
1915 return -EBUSY;
1916 if (rdev->raid_disk == -1)
1917 return -EEXIST;
1918
1919 if (rdev->mddev->pers->hot_add_disk == NULL)
1920 return -EINVAL;
1921 err = rdev->mddev->pers->
1922 hot_remove_disk(rdev->mddev, rdev->raid_disk);
1923 if (err)
1924 return err;
1925 sprintf(nm, "rd%d", rdev->raid_disk);
1926 sysfs_remove_link(&rdev->mddev->kobj, nm);
1927 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
1928 md_wakeup_thread(rdev->mddev->thread);
1929 } else {
1930 if (slot >= rdev->mddev->raid_disks)
1931 return -ENOSPC;
1932 rdev->raid_disk = slot;
1933
1934 clear_bit(Faulty, &rdev->flags);
1935 clear_bit(WriteMostly, &rdev->flags);
1936 set_bit(In_sync, &rdev->flags);
1937 }
1938 return len;
1939}
1940
1941
1942static struct rdev_sysfs_entry rdev_slot =
1943__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
1944
1945static ssize_t
1946offset_show(mdk_rdev_t *rdev, char *page)
1947{
1948 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
1949}
1950
1951static ssize_t
1952offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1953{
1954 char *e;
1955 unsigned long long offset = simple_strtoull(buf, &e, 10);
1956 if (e==buf || (*e && *e != '\n'))
1957 return -EINVAL;
1958 if (rdev->mddev->pers)
1959 return -EBUSY;
1960 if (rdev->size && rdev->mddev->external)
1961
1962
1963 return -EBUSY;
1964 rdev->data_offset = offset;
1965 return len;
1966}
1967
1968static struct rdev_sysfs_entry rdev_offset =
1969__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
1970
1971static ssize_t
1972rdev_size_show(mdk_rdev_t *rdev, char *page)
1973{
1974 return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
1975}
1976
1977static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
1978{
1979
1980 if (s1+l1 <= s2)
1981 return 0;
1982 if (s2+l2 <= s1)
1983 return 0;
1984 return 1;
1985}
1986
1987static ssize_t
1988rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1989{
1990 char *e;
1991 unsigned long long size = simple_strtoull(buf, &e, 10);
1992 unsigned long long oldsize = rdev->size;
1993 mddev_t *my_mddev = rdev->mddev;
1994
1995 if (e==buf || (*e && *e != '\n'))
1996 return -EINVAL;
1997 if (my_mddev->pers)
1998 return -EBUSY;
1999 rdev->size = size;
2000 if (size > oldsize && rdev->mddev->external) {
2001
2002
2003
2004
2005
2006 mddev_t *mddev;
2007 int overlap = 0;
2008 struct list_head *tmp, *tmp2;
2009
2010 mddev_unlock(my_mddev);
2011 for_each_mddev(mddev, tmp) {
2012 mdk_rdev_t *rdev2;
2013
2014 mddev_lock(mddev);
2015 rdev_for_each(rdev2, tmp2, mddev)
2016 if (test_bit(AllReserved, &rdev2->flags) ||
2017 (rdev->bdev == rdev2->bdev &&
2018 rdev != rdev2 &&
2019 overlaps(rdev->data_offset, rdev->size,
2020 rdev2->data_offset, rdev2->size))) {
2021 overlap = 1;
2022 break;
2023 }
2024 mddev_unlock(mddev);
2025 if (overlap) {
2026 mddev_put(mddev);
2027 break;
2028 }
2029 }
2030 mddev_lock(my_mddev);
2031 if (overlap) {
2032
2033
2034
2035
2036
2037
2038 rdev->size = oldsize;
2039 return -EBUSY;
2040 }
2041 }
2042 if (size < my_mddev->size || my_mddev->size == 0)
2043 my_mddev->size = size;
2044 return len;
2045}
2046
2047static struct rdev_sysfs_entry rdev_size =
2048__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
2049
2050static struct attribute *rdev_default_attrs[] = {
2051 &rdev_state.attr,
2052 &rdev_errors.attr,
2053 &rdev_slot.attr,
2054 &rdev_offset.attr,
2055 &rdev_size.attr,
2056 NULL,
2057};
2058static ssize_t
2059rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
2060{
2061 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2062 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2063 mddev_t *mddev = rdev->mddev;
2064 ssize_t rv;
2065
2066 if (!entry->show)
2067 return -EIO;
2068
2069 rv = mddev ? mddev_lock(mddev) : -EBUSY;
2070 if (!rv) {
2071 if (rdev->mddev == NULL)
2072 rv = -EBUSY;
2073 else
2074 rv = entry->show(rdev, page);
2075 mddev_unlock(mddev);
2076 }
2077 return rv;
2078}
2079
2080static ssize_t
2081rdev_attr_store(struct kobject *kobj, struct attribute *attr,
2082 const char *page, size_t length)
2083{
2084 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2085 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2086 ssize_t rv;
2087 mddev_t *mddev = rdev->mddev;
2088
2089 if (!entry->store)
2090 return -EIO;
2091 if (!capable(CAP_SYS_ADMIN))
2092 return -EACCES;
2093 rv = mddev ? mddev_lock(mddev): -EBUSY;
2094 if (!rv) {
2095 if (rdev->mddev == NULL)
2096 rv = -EBUSY;
2097 else
2098 rv = entry->store(rdev, page, length);
2099 mddev_unlock(rdev->mddev);
2100 }
2101 return rv;
2102}
2103
2104static void rdev_free(struct kobject *ko)
2105{
2106 mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
2107 kfree(rdev);
2108}
2109static struct sysfs_ops rdev_sysfs_ops = {
2110 .show = rdev_attr_show,
2111 .store = rdev_attr_store,
2112};
2113static struct kobj_type rdev_ktype = {
2114 .release = rdev_free,
2115 .sysfs_ops = &rdev_sysfs_ops,
2116 .default_attrs = rdev_default_attrs,
2117};
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_minor)
2130{
2131 char b[BDEVNAME_SIZE];
2132 int err;
2133 mdk_rdev_t *rdev;
2134 sector_t size;
2135
2136 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
2137 if (!rdev) {
2138 printk(KERN_ERR "md: could not alloc mem for new device!\n");
2139 return ERR_PTR(-ENOMEM);
2140 }
2141
2142 if ((err = alloc_disk_sb(rdev)))
2143 goto abort_free;
2144
2145 err = lock_rdev(rdev, newdev, super_format == -2);
2146 if (err)
2147 goto abort_free;
2148
2149 kobject_init(&rdev->kobj, &rdev_ktype);
2150
2151 rdev->desc_nr = -1;
2152 rdev->saved_raid_disk = -1;
2153 rdev->raid_disk = -1;
2154 rdev->flags = 0;
2155 rdev->data_offset = 0;
2156 rdev->sb_events = 0;
2157 atomic_set(&rdev->nr_pending, 0);
2158 atomic_set(&rdev->read_errors, 0);
2159 atomic_set(&rdev->corrected_errors, 0);
2160
2161 size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
2162 if (!size) {
2163 printk(KERN_WARNING
2164 "md: %s has zero or unknown size, marking faulty!\n",
2165 bdevname(rdev->bdev,b));
2166 err = -EINVAL;
2167 goto abort_free;
2168 }
2169
2170 if (super_format >= 0) {
2171 err = super_types[super_format].
2172 load_super(rdev, NULL, super_minor);
2173 if (err == -EINVAL) {
2174 printk(KERN_WARNING
2175 "md: %s does not have a valid v%d.%d "
2176 "superblock, not importing!\n",
2177 bdevname(rdev->bdev,b),
2178 super_format, super_minor);
2179 goto abort_free;
2180 }
2181 if (err < 0) {
2182 printk(KERN_WARNING
2183 "md: could not read %s's sb, not importing!\n",
2184 bdevname(rdev->bdev,b));
2185 goto abort_free;
2186 }
2187 }
2188 INIT_LIST_HEAD(&rdev->same_set);
2189
2190 return rdev;
2191
2192abort_free:
2193 if (rdev->sb_page) {
2194 if (rdev->bdev)
2195 unlock_rdev(rdev);
2196 free_disk_sb(rdev);
2197 }
2198 kfree(rdev);
2199 return ERR_PTR(err);
2200}
2201
2202
2203
2204
2205
2206
2207static void analyze_sbs(mddev_t * mddev)
2208{
2209 int i;
2210 struct list_head *tmp;
2211 mdk_rdev_t *rdev, *freshest;
2212 char b[BDEVNAME_SIZE];
2213
2214 freshest = NULL;
2215 rdev_for_each(rdev, tmp, mddev)
2216 switch (super_types[mddev->major_version].
2217 load_super(rdev, freshest, mddev->minor_version)) {
2218 case 1:
2219 freshest = rdev;
2220 break;
2221 case 0:
2222 break;
2223 default:
2224 printk( KERN_ERR \
2225 "md: fatal superblock inconsistency in %s"
2226 " -- removing from array\n",
2227 bdevname(rdev->bdev,b));
2228 kick_rdev_from_array(rdev);
2229 }
2230
2231
2232 super_types[mddev->major_version].
2233 validate_super(mddev, freshest);
2234
2235 i = 0;
2236 rdev_for_each(rdev, tmp, mddev) {
2237 if (rdev != freshest)
2238 if (super_types[mddev->major_version].
2239 validate_super(mddev, rdev)) {
2240 printk(KERN_WARNING "md: kicking non-fresh %s"
2241 " from array!\n",
2242 bdevname(rdev->bdev,b));
2243 kick_rdev_from_array(rdev);
2244 continue;
2245 }
2246 if (mddev->level == LEVEL_MULTIPATH) {
2247 rdev->desc_nr = i++;
2248 rdev->raid_disk = rdev->desc_nr;
2249 set_bit(In_sync, &rdev->flags);
2250 } else if (rdev->raid_disk >= mddev->raid_disks) {
2251 rdev->raid_disk = -1;
2252 clear_bit(In_sync, &rdev->flags);
2253 }
2254 }
2255
2256
2257
2258 if (mddev->recovery_cp != MaxSector &&
2259 mddev->level >= 1)
2260 printk(KERN_ERR "md: %s: raid array is not clean"
2261 " -- starting background reconstruction\n",
2262 mdname(mddev));
2263
2264}
2265
2266static ssize_t
2267safe_delay_show(mddev_t *mddev, char *page)
2268{
2269 int msec = (mddev->safemode_delay*1000)/HZ;
2270 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
2271}
2272static ssize_t
2273safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
2274{
2275 int scale=1;
2276 int dot=0;
2277 int i;
2278 unsigned long msec;
2279 char buf[30];
2280 char *e;
2281
2282 if (len >= sizeof(buf))
2283 return -EINVAL;
2284 strlcpy(buf, cbuf, len);
2285 buf[len] = 0;
2286 for (i=0; i<len; i++) {
2287 if (dot) {
2288 if (isdigit(buf[i])) {
2289 buf[i-1] = buf[i];
2290 scale *= 10;
2291 }
2292 buf[i] = 0;
2293 } else if (buf[i] == '.') {
2294 dot=1;
2295 buf[i] = 0;
2296 }
2297 }
2298 msec = simple_strtoul(buf, &e, 10);
2299 if (e == buf || (*e && *e != '\n'))
2300 return -EINVAL;
2301 msec = (msec * 1000) / scale;
2302 if (msec == 0)
2303 mddev->safemode_delay = 0;
2304 else {
2305 mddev->safemode_delay = (msec*HZ)/1000;
2306 if (mddev->safemode_delay == 0)
2307 mddev->safemode_delay = 1;
2308 }
2309 return len;
2310}
2311static struct md_sysfs_entry md_safe_delay =
2312__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
2313
2314static ssize_t
2315level_show(mddev_t *mddev, char *page)
2316{
2317 struct mdk_personality *p = mddev->pers;
2318 if (p)
2319 return sprintf(page, "%s\n", p->name);
2320 else if (mddev->clevel[0])
2321 return sprintf(page, "%s\n", mddev->clevel);
2322 else if (mddev->level != LEVEL_NONE)
2323 return sprintf(page, "%d\n", mddev->level);
2324 else
2325 return 0;
2326}
2327
2328static ssize_t
2329level_store(mddev_t *mddev, const char *buf, size_t len)
2330{
2331 ssize_t rv = len;
2332 if (mddev->pers)
2333 return -EBUSY;
2334 if (len == 0)
2335 return 0;
2336 if (len >= sizeof(mddev->clevel))
2337 return -ENOSPC;
2338 strncpy(mddev->clevel, buf, len);
2339 if (mddev->clevel[len-1] == '\n')
2340 len--;
2341 mddev->clevel[len] = 0;
2342 mddev->level = LEVEL_NONE;
2343 return rv;
2344}
2345
2346static struct md_sysfs_entry md_level =
2347__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
2348
2349
2350static ssize_t
2351layout_show(mddev_t *mddev, char *page)
2352{
2353
2354 if (mddev->reshape_position != MaxSector &&
2355 mddev->layout != mddev->new_layout)
2356 return sprintf(page, "%d (%d)\n",
2357 mddev->new_layout, mddev->layout);
2358 return sprintf(page, "%d\n", mddev->layout);
2359}
2360
2361static ssize_t
2362layout_store(mddev_t *mddev, const char *buf, size_t len)
2363{
2364 char *e;
2365 unsigned long n = simple_strtoul(buf, &e, 10);
2366
2367 if (!*buf || (*e && *e != '\n'))
2368 return -EINVAL;
2369
2370 if (mddev->pers)
2371 return -EBUSY;
2372 if (mddev->reshape_position != MaxSector)
2373 mddev->new_layout = n;
2374 else
2375 mddev->layout = n;
2376 return len;
2377}
2378static struct md_sysfs_entry md_layout =
2379__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
2380
2381
2382static ssize_t
2383raid_disks_show(mddev_t *mddev, char *page)
2384{
2385 if (mddev->raid_disks == 0)
2386 return 0;
2387 if (mddev->reshape_position != MaxSector &&
2388 mddev->delta_disks != 0)
2389 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
2390 mddev->raid_disks - mddev->delta_disks);
2391 return sprintf(page, "%d\n", mddev->raid_disks);
2392}
2393
2394static int update_raid_disks(mddev_t *mddev, int raid_disks);
2395
2396static ssize_t
2397raid_disks_store(mddev_t *mddev, const char *buf, size_t len)
2398{
2399 char *e;
2400 int rv = 0;
2401 unsigned long n = simple_strtoul(buf, &e, 10);
2402
2403 if (!*buf || (*e && *e != '\n'))
2404 return -EINVAL;
2405
2406 if (mddev->pers)
2407 rv = update_raid_disks(mddev, n);
2408 else if (mddev->reshape_position != MaxSector) {
2409 int olddisks = mddev->raid_disks - mddev->delta_disks;
2410 mddev->delta_disks = n - olddisks;
2411 mddev->raid_disks = n;
2412 } else
2413 mddev->raid_disks = n;
2414 return rv ? rv : len;
2415}
2416static struct md_sysfs_entry md_raid_disks =
2417__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
2418
2419static ssize_t
2420chunk_size_show(mddev_t *mddev, char *page)
2421{
2422 if (mddev->reshape_position != MaxSector &&
2423 mddev->chunk_size != mddev->new_chunk)
2424 return sprintf(page, "%d (%d)\n", mddev->new_chunk,
2425 mddev->chunk_size);
2426 return sprintf(page, "%d\n", mddev->chunk_size);
2427}
2428
2429static ssize_t
2430chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
2431{
2432
2433 char *e;
2434 unsigned long n = simple_strtoul(buf, &e, 10);
2435
2436 if (!*buf || (*e && *e != '\n'))
2437 return -EINVAL;
2438
2439 if (mddev->pers)
2440 return -EBUSY;
2441 else if (mddev->reshape_position != MaxSector)
2442 mddev->new_chunk = n;
2443 else
2444 mddev->chunk_size = n;
2445 return len;
2446}
2447static struct md_sysfs_entry md_chunk_size =
2448__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
2449
2450static ssize_t
2451resync_start_show(mddev_t *mddev, char *page)
2452{
2453 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
2454}
2455
2456static ssize_t
2457resync_start_store(mddev_t *mddev, const char *buf, size_t len)
2458{
2459
2460 char *e;
2461 unsigned long long n = simple_strtoull(buf, &e, 10);
2462
2463 if (mddev->pers)
2464 return -EBUSY;
2465 if (!*buf || (*e && *e != '\n'))
2466 return -EINVAL;
2467
2468 mddev->recovery_cp = n;
2469 return len;
2470}
2471static struct md_sysfs_entry md_resync_start =
2472__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
2511 write_pending, active_idle, bad_word};
2512static char *array_states[] = {
2513 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
2514 "write-pending", "active-idle", NULL };
2515
2516static int match_word(const char *word, char **list)
2517{
2518 int n;
2519 for (n=0; list[n]; n++)
2520 if (cmd_match(word, list[n]))
2521 break;
2522 return n;
2523}
2524
2525static ssize_t
2526array_state_show(mddev_t *mddev, char *page)
2527{
2528 enum array_state st = inactive;
2529
2530 if (mddev->pers)
2531 switch(mddev->ro) {
2532 case 1:
2533 st = readonly;
2534 break;
2535 case 2:
2536 st = read_auto;
2537 break;
2538 case 0:
2539 if (mddev->in_sync)
2540 st = clean;
2541 else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
2542 st = write_pending;
2543 else if (mddev->safemode)
2544 st = active_idle;
2545 else
2546 st = active;
2547 }
2548 else {
2549 if (list_empty(&mddev->disks) &&
2550 mddev->raid_disks == 0 &&
2551 mddev->size == 0)
2552 st = clear;
2553 else
2554 st = inactive;
2555 }
2556 return sprintf(page, "%s\n", array_states[st]);
2557}
2558
2559static int do_md_stop(mddev_t * mddev, int ro);
2560static int do_md_run(mddev_t * mddev);
2561static int restart_array(mddev_t *mddev);
2562
2563static ssize_t
2564array_state_store(mddev_t *mddev, const char *buf, size_t len)
2565{
2566 int err = -EINVAL;
2567 enum array_state st = match_word(buf, array_states);
2568 switch(st) {
2569 case bad_word:
2570 break;
2571 case clear:
2572
2573 if (atomic_read(&mddev->active) > 1)
2574 return -EBUSY;
2575 err = do_md_stop(mddev, 0);
2576 break;
2577 case inactive:
2578
2579 if (mddev->pers) {
2580 if (atomic_read(&mddev->active) > 1)
2581 return -EBUSY;
2582 err = do_md_stop(mddev, 2);
2583 } else
2584 err = 0;
2585 break;
2586 case suspended:
2587 break;
2588 case readonly:
2589 if (mddev->pers)
2590 err = do_md_stop(mddev, 1);
2591 else {
2592 mddev->ro = 1;
2593 err = do_md_run(mddev);
2594 }
2595 break;
2596 case read_auto:
2597
2598 if (mddev->pers) {
2599 err = do_md_stop(mddev, 1);
2600 if (err == 0)
2601 mddev->ro = 2;
2602 } else {
2603 mddev->ro = 2;
2604 err = do_md_run(mddev);
2605 }
2606 break;
2607 case clean:
2608 if (mddev->pers) {
2609 restart_array(mddev);
2610 spin_lock_irq(&mddev->write_lock);
2611 if (atomic_read(&mddev->writes_pending) == 0) {
2612 if (mddev->in_sync == 0) {
2613 mddev->in_sync = 1;
2614 if (mddev->persistent)
2615 set_bit(MD_CHANGE_CLEAN,
2616 &mddev->flags);
2617 }
2618 err = 0;
2619 } else
2620 err = -EBUSY;
2621 spin_unlock_irq(&mddev->write_lock);
2622 } else {
2623 mddev->ro = 0;
2624 mddev->recovery_cp = MaxSector;
2625 err = do_md_run(mddev);
2626 }
2627 break;
2628 case active:
2629 if (mddev->pers) {
2630 restart_array(mddev);
2631 if (mddev->external)
2632 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2633 wake_up(&mddev->sb_wait);
2634 err = 0;
2635 } else {
2636 mddev->ro = 0;
2637 err = do_md_run(mddev);
2638 }
2639 break;
2640 case write_pending:
2641 case active_idle:
2642
2643 break;
2644 }
2645 if (err)
2646 return err;
2647 else
2648 return len;
2649}
2650static struct md_sysfs_entry md_array_state =
2651__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
2652
2653static ssize_t
2654null_show(mddev_t *mddev, char *page)
2655{
2656 return -EINVAL;
2657}
2658
2659static ssize_t
2660new_dev_store(mddev_t *mddev, const char *buf, size_t len)
2661{
2662
2663
2664
2665
2666
2667
2668
2669 char *e;
2670 int major = simple_strtoul(buf, &e, 10);
2671 int minor;
2672 dev_t dev;
2673 mdk_rdev_t *rdev;
2674 int err;
2675
2676 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
2677 return -EINVAL;
2678 minor = simple_strtoul(e+1, &e, 10);
2679 if (*e && *e != '\n')
2680 return -EINVAL;
2681 dev = MKDEV(major, minor);
2682 if (major != MAJOR(dev) ||
2683 minor != MINOR(dev))
2684 return -EOVERFLOW;
2685
2686
2687 if (mddev->persistent) {
2688 rdev = md_import_device(dev, mddev->major_version,
2689 mddev->minor_version);
2690 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
2691 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
2692 mdk_rdev_t, same_set);
2693 err = super_types[mddev->major_version]
2694 .load_super(rdev, rdev0, mddev->minor_version);
2695 if (err < 0)
2696 goto out;
2697 }
2698 } else if (mddev->external)
2699 rdev = md_import_device(dev, -2, -1);
2700 else
2701 rdev = md_import_device(dev, -1, -1);
2702
2703 if (IS_ERR(rdev))
2704 return PTR_ERR(rdev);
2705 err = bind_rdev_to_array(rdev, mddev);
2706 out:
2707 if (err)
2708 export_rdev(rdev);
2709 return err ? err : len;
2710}
2711
2712static struct md_sysfs_entry md_new_device =
2713__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
2714
2715static ssize_t
2716bitmap_store(mddev_t *mddev, const char *buf, size_t len)
2717{
2718 char *end;
2719 unsigned long chunk, end_chunk;
2720
2721 if (!mddev->bitmap)
2722 goto out;
2723
2724 while (*buf) {
2725 chunk = end_chunk = simple_strtoul(buf, &end, 0);
2726 if (buf == end) break;
2727 if (*end == '-') {
2728 buf = end + 1;
2729 end_chunk = simple_strtoul(buf, &end, 0);
2730 if (buf == end) break;
2731 }
2732 if (*end && !isspace(*end)) break;
2733 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
2734 buf = end;
2735 while (isspace(*buf)) buf++;
2736 }
2737 bitmap_unplug(mddev->bitmap);
2738out:
2739 return len;
2740}
2741
2742static struct md_sysfs_entry md_bitmap =
2743__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
2744
2745static ssize_t
2746size_show(mddev_t *mddev, char *page)
2747{
2748 return sprintf(page, "%llu\n", (unsigned long long)mddev->size);
2749}
2750
2751static int update_size(mddev_t *mddev, unsigned long size);
2752
2753static ssize_t
2754size_store(mddev_t *mddev, const char *buf, size_t len)
2755{
2756
2757
2758
2759
2760 char *e;
2761 int err = 0;
2762 unsigned long long size = simple_strtoull(buf, &e, 10);
2763 if (!*buf || *buf == '\n' ||
2764 (*e && *e != '\n'))
2765 return -EINVAL;
2766
2767 if (mddev->pers) {
2768 err = update_size(mddev, size);
2769 md_update_sb(mddev, 1);
2770 } else {
2771 if (mddev->size == 0 ||
2772 mddev->size > size)
2773 mddev->size = size;
2774 else
2775 err = -ENOSPC;
2776 }
2777 return err ? err : len;
2778}
2779
2780static struct md_sysfs_entry md_size =
2781__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
2782
2783
2784
2785
2786
2787
2788
2789
2790static ssize_t
2791metadata_show(mddev_t *mddev, char *page)
2792{
2793 if (mddev->persistent)
2794 return sprintf(page, "%d.%d\n",
2795 mddev->major_version, mddev->minor_version);
2796 else if (mddev->external)
2797 return sprintf(page, "external:%s\n", mddev->metadata_type);
2798 else
2799 return sprintf(page, "none\n");
2800}
2801
2802static ssize_t
2803metadata_store(mddev_t *mddev, const char *buf, size_t len)
2804{
2805 int major, minor;
2806 char *e;
2807 if (!list_empty(&mddev->disks))
2808 return -EBUSY;
2809
2810 if (cmd_match(buf, "none")) {
2811 mddev->persistent = 0;
2812 mddev->external = 0;
2813 mddev->major_version = 0;
2814 mddev->minor_version = 90;
2815 return len;
2816 }
2817 if (strncmp(buf, "external:", 9) == 0) {
2818 size_t namelen = len-9;
2819 if (namelen >= sizeof(mddev->metadata_type))
2820 namelen = sizeof(mddev->metadata_type)-1;
2821 strncpy(mddev->metadata_type, buf+9, namelen);
2822 mddev->metadata_type[namelen] = 0;
2823 if (namelen && mddev->metadata_type[namelen-1] == '\n')
2824 mddev->metadata_type[--namelen] = 0;
2825 mddev->persistent = 0;
2826 mddev->external = 1;
2827 mddev->major_version = 0;
2828 mddev->minor_version = 90;
2829 return len;
2830 }
2831 major = simple_strtoul(buf, &e, 10);
2832 if (e==buf || *e != '.')
2833 return -EINVAL;
2834 buf = e+1;
2835 minor = simple_strtoul(buf, &e, 10);
2836 if (e==buf || (*e && *e != '\n') )
2837 return -EINVAL;
2838 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
2839 return -ENOENT;
2840 mddev->major_version = major;
2841 mddev->minor_version = minor;
2842 mddev->persistent = 1;
2843 mddev->external = 0;
2844 return len;
2845}
2846
2847static struct md_sysfs_entry md_metadata =
2848__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2849
2850static ssize_t
2851action_show(mddev_t *mddev, char *page)
2852{
2853 char *type = "idle";
2854 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
2855 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
2856 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
2857 type = "reshape";
2858 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
2859 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
2860 type = "resync";
2861 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
2862 type = "check";
2863 else
2864 type = "repair";
2865 } else
2866 type = "recover";
2867 }
2868 return sprintf(page, "%s\n", type);
2869}
2870
2871static ssize_t
2872action_store(mddev_t *mddev, const char *page, size_t len)
2873{
2874 if (!mddev->pers || !mddev->pers->sync_request)
2875 return -EINVAL;
2876
2877 if (cmd_match(page, "idle")) {
2878 if (mddev->sync_thread) {
2879 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
2880 md_unregister_thread(mddev->sync_thread);
2881 mddev->sync_thread = NULL;
2882 mddev->recovery = 0;
2883 }
2884 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
2885 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
2886 return -EBUSY;
2887 else if (cmd_match(page, "resync") || cmd_match(page, "recover"))
2888 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2889 else if (cmd_match(page, "reshape")) {
2890 int err;
2891 if (mddev->pers->start_reshape == NULL)
2892 return -EINVAL;
2893 err = mddev->pers->start_reshape(mddev);
2894 if (err)
2895 return err;
2896 } else {
2897 if (cmd_match(page, "check"))
2898 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
2899 else if (!cmd_match(page, "repair"))
2900 return -EINVAL;
2901 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
2902 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
2903 }
2904 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2905 md_wakeup_thread(mddev->thread);
2906 return len;
2907}
2908
2909static ssize_t
2910mismatch_cnt_show(mddev_t *mddev, char *page)
2911{
2912 return sprintf(page, "%llu\n",
2913 (unsigned long long) mddev->resync_mismatches);
2914}
2915
2916static struct md_sysfs_entry md_scan_mode =
2917__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
2918
2919
2920static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
2921
2922static ssize_t
2923sync_min_show(mddev_t *mddev, char *page)
2924{
2925 return sprintf(page, "%d (%s)\n", speed_min(mddev),
2926 mddev->sync_speed_min ? "local": "system");
2927}
2928
2929static ssize_t
2930sync_min_store(mddev_t *mddev, const char *buf, size_t len)
2931{
2932 int min;
2933 char *e;
2934 if (strncmp(buf, "system", 6)==0) {
2935 mddev->sync_speed_min = 0;
2936 return len;
2937 }
2938 min = simple_strtoul(buf, &e, 10);
2939 if (buf == e || (*e && *e != '\n') || min <= 0)
2940 return -EINVAL;
2941 mddev->sync_speed_min = min;
2942 return len;
2943}
2944
2945static struct md_sysfs_entry md_sync_min =
2946__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
2947
2948static ssize_t
2949sync_max_show(mddev_t *mddev, char *page)
2950{
2951 return sprintf(page, "%d (%s)\n", speed_max(mddev),
2952 mddev->sync_speed_max ? "local": "system");
2953}
2954
2955static ssize_t
2956sync_max_store(mddev_t *mddev, const char *buf, size_t len)
2957{
2958 int max;
2959 char *e;
2960 if (strncmp(buf, "system", 6)==0) {
2961 mddev->sync_speed_max = 0;
2962 return len;
2963 }
2964 max = simple_strtoul(buf, &e, 10);
2965 if (buf == e || (*e && *e != '\n') || max <= 0)
2966 return -EINVAL;
2967 mddev->sync_speed_max = max;
2968 return len;
2969}
2970
2971static struct md_sysfs_entry md_sync_max =
2972__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
2973
2974static ssize_t
2975degraded_show(mddev_t *mddev, char *page)
2976{
2977 return sprintf(page, "%d\n", mddev->degraded);
2978}
2979static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
2980
2981static ssize_t
2982sync_speed_show(mddev_t *mddev, char *page)
2983{
2984 unsigned long resync, dt, db;
2985 resync = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active));
2986 dt = ((jiffies - mddev->resync_mark) / HZ);
2987 if (!dt) dt++;
2988 db = resync - (mddev->resync_mark_cnt);
2989 return sprintf(page, "%ld\n", db/dt/2);
2990}
2991
2992static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
2993
2994static ssize_t
2995sync_completed_show(mddev_t *mddev, char *page)
2996{
2997 unsigned long max_blocks, resync;
2998
2999 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
3000 max_blocks = mddev->resync_max_sectors;
3001 else
3002 max_blocks = mddev->size << 1;
3003
3004 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
3005 return sprintf(page, "%lu / %lu\n", resync, max_blocks);
3006}
3007
3008static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
3009
3010static ssize_t
3011max_sync_show(mddev_t *mddev, char *page)
3012{
3013 if (mddev->resync_max == MaxSector)
3014 return sprintf(page, "max\n");
3015 else
3016 return sprintf(page, "%llu\n",
3017 (unsigned long long)mddev->resync_max);
3018}
3019static ssize_t
3020max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3021{
3022 if (strncmp(buf, "max", 3) == 0)
3023 mddev->resync_max = MaxSector;
3024 else {
3025 char *ep;
3026 unsigned long long max = simple_strtoull(buf, &ep, 10);
3027 if (ep == buf || (*ep != 0 && *ep != '\n'))
3028 return -EINVAL;
3029 if (max < mddev->resync_max &&
3030 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3031 return -EBUSY;
3032
3033
3034 if (mddev->chunk_size) {
3035 if (max & (sector_t)((mddev->chunk_size>>9)-1))
3036 return -EINVAL;
3037 }
3038 mddev->resync_max = max;
3039 }
3040 wake_up(&mddev->recovery_wait);
3041 return len;
3042}
3043
3044static struct md_sysfs_entry md_max_sync =
3045__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
3046
3047static ssize_t
3048suspend_lo_show(mddev_t *mddev, char *page)
3049{
3050 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
3051}
3052
3053static ssize_t
3054suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
3055{
3056 char *e;
3057 unsigned long long new = simple_strtoull(buf, &e, 10);
3058
3059 if (mddev->pers->quiesce == NULL)
3060 return -EINVAL;
3061 if (buf == e || (*e && *e != '\n'))
3062 return -EINVAL;
3063 if (new >= mddev->suspend_hi ||
3064 (new > mddev->suspend_lo && new < mddev->suspend_hi)) {
3065 mddev->suspend_lo = new;
3066 mddev->pers->quiesce(mddev, 2);
3067 return len;
3068 } else
3069 return -EINVAL;
3070}
3071static struct md_sysfs_entry md_suspend_lo =
3072__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
3073
3074
3075static ssize_t
3076suspend_hi_show(mddev_t *mddev, char *page)
3077{
3078 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
3079}
3080
3081static ssize_t
3082suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
3083{
3084 char *e;
3085 unsigned long long new = simple_strtoull(buf, &e, 10);
3086
3087 if (mddev->pers->quiesce == NULL)
3088 return -EINVAL;
3089 if (buf == e || (*e && *e != '\n'))
3090 return -EINVAL;
3091 if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) ||
3092 (new > mddev->suspend_lo && new > mddev->suspend_hi)) {
3093 mddev->suspend_hi = new;
3094 mddev->pers->quiesce(mddev, 1);
3095 mddev->pers->quiesce(mddev, 0);
3096 return len;
3097 } else
3098 return -EINVAL;
3099}
3100static struct md_sysfs_entry md_suspend_hi =
3101__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
3102
3103static ssize_t
3104reshape_position_show(mddev_t *mddev, char *page)
3105{
3106 if (mddev->reshape_position != MaxSector)
3107 return sprintf(page, "%llu\n",
3108 (unsigned long long)mddev->reshape_position);
3109 strcpy(page, "none\n");
3110 return 5;
3111}
3112
3113static ssize_t
3114reshape_position_store(mddev_t *mddev, const char *buf, size_t len)
3115{
3116 char *e;
3117 unsigned long long new = simple_strtoull(buf, &e, 10);
3118 if (mddev->pers)
3119 return -EBUSY;
3120 if (buf == e || (*e && *e != '\n'))
3121 return -EINVAL;
3122 mddev->reshape_position = new;
3123 mddev->delta_disks = 0;
3124 mddev->new_level = mddev->level;
3125 mddev->new_layout = mddev->layout;
3126 mddev->new_chunk = mddev->chunk_size;
3127 return len;
3128}
3129
3130static struct md_sysfs_entry md_reshape_position =
3131__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
3132 reshape_position_store);
3133
3134
3135static struct attribute *md_default_attrs[] = {
3136 &md_level.attr,
3137 &md_layout.attr,
3138 &md_raid_disks.attr,
3139 &md_chunk_size.attr,
3140 &md_size.attr,
3141 &md_resync_start.attr,
3142 &md_metadata.attr,
3143 &md_new_device.attr,
3144 &md_safe_delay.attr,
3145 &md_array_state.attr,
3146 &md_reshape_position.attr,
3147 NULL,
3148};
3149
3150static struct attribute *md_redundancy_attrs[] = {
3151 &md_scan_mode.attr,
3152 &md_mismatches.attr,
3153 &md_sync_min.attr,
3154 &md_sync_max.attr,
3155 &md_sync_speed.attr,
3156 &md_sync_completed.attr,
3157 &md_max_sync.attr,
3158 &md_suspend_lo.attr,
3159 &md_suspend_hi.attr,
3160 &md_bitmap.attr,
3161 &md_degraded.attr,
3162 NULL,
3163};
3164static struct attribute_group md_redundancy_group = {
3165 .name = NULL,
3166 .attrs = md_redundancy_attrs,
3167};
3168
3169
3170static ssize_t
3171md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3172{
3173 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
3174 mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
3175 ssize_t rv;
3176
3177 if (!entry->show)
3178 return -EIO;
3179 rv = mddev_lock(mddev);
3180 if (!rv) {
3181 rv = entry->show(mddev, page);
3182 mddev_unlock(mddev);
3183 }
3184 return rv;
3185}
3186
3187static ssize_t
3188md_attr_store(struct kobject *kobj, struct attribute *attr,
3189 const char *page, size_t length)
3190{
3191 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
3192 mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
3193 ssize_t rv;
3194
3195 if (!entry->store)
3196 return -EIO;
3197 if (!capable(CAP_SYS_ADMIN))
3198 return -EACCES;
3199 rv = mddev_lock(mddev);
3200 if (!rv) {
3201 rv = entry->store(mddev, page, length);
3202 mddev_unlock(mddev);
3203 }
3204 return rv;
3205}
3206
3207static void md_free(struct kobject *ko)
3208{
3209 mddev_t *mddev = container_of(ko, mddev_t, kobj);
3210 kfree(mddev);
3211}
3212
3213static struct sysfs_ops md_sysfs_ops = {
3214 .show = md_attr_show,
3215 .store = md_attr_store,
3216};
3217static struct kobj_type md_ktype = {
3218 .release = md_free,
3219 .sysfs_ops = &md_sysfs_ops,
3220 .default_attrs = md_default_attrs,
3221};
3222
3223int mdp_major = 0;
3224
3225static struct kobject *md_probe(dev_t dev, int *part, void *data)
3226{
3227 static DEFINE_MUTEX(disks_mutex);
3228 mddev_t *mddev = mddev_find(dev);
3229 struct gendisk *disk;
3230 int partitioned = (MAJOR(dev) != MD_MAJOR);
3231 int shift = partitioned ? MdpMinorShift : 0;
3232 int unit = MINOR(dev) >> shift;
3233 int error;
3234
3235 if (!mddev)
3236 return NULL;
3237
3238 mutex_lock(&disks_mutex);
3239 if (mddev->gendisk) {
3240 mutex_unlock(&disks_mutex);
3241 mddev_put(mddev);
3242 return NULL;
3243 }
3244 disk = alloc_disk(1 << shift);
3245 if (!disk) {
3246 mutex_unlock(&disks_mutex);
3247 mddev_put(mddev);
3248 return NULL;
3249 }
3250 disk->major = MAJOR(dev);
3251 disk->first_minor = unit << shift;
3252 if (partitioned)
3253 sprintf(disk->disk_name, "md_d%d", unit);
3254 else
3255 sprintf(disk->disk_name, "md%d", unit);
3256 disk->fops = &md_fops;
3257 disk->private_data = mddev;
3258 disk->queue = mddev->queue;
3259 add_disk(disk);
3260 mddev->gendisk = disk;
3261 mutex_unlock(&disks_mutex);
3262 error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj,
3263 "%s", "md");
3264 if (error)
3265 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
3266 disk->disk_name);
3267 else
3268 kobject_uevent(&mddev->kobj, KOBJ_ADD);
3269 return NULL;
3270}
3271
3272static void md_safemode_timeout(unsigned long data)
3273{
3274 mddev_t *mddev = (mddev_t *) data;
3275
3276 mddev->safemode = 1;
3277 md_wakeup_thread(mddev->thread);
3278}
3279
3280static int start_dirty_degraded;
3281
3282static int do_md_run(mddev_t * mddev)
3283{
3284 int err;
3285 int chunk_size;
3286 struct list_head *tmp;
3287 mdk_rdev_t *rdev;
3288 struct gendisk *disk;
3289 struct mdk_personality *pers;
3290 char b[BDEVNAME_SIZE];
3291
3292 if (list_empty(&mddev->disks))
3293
3294 return -EINVAL;
3295
3296 if (mddev->pers)
3297 return -EBUSY;
3298
3299
3300
3301
3302 if (!mddev->raid_disks) {
3303 if (!mddev->persistent)
3304 return -EINVAL;
3305 analyze_sbs(mddev);
3306 }
3307
3308 chunk_size = mddev->chunk_size;
3309
3310 if (chunk_size) {
3311 if (chunk_size > MAX_CHUNK_SIZE) {
3312 printk(KERN_ERR "too big chunk_size: %d > %d\n",
3313 chunk_size, MAX_CHUNK_SIZE);
3314 return -EINVAL;
3315 }
3316
3317
3318
3319 if ( (1 << ffz(~chunk_size)) != chunk_size) {
3320 printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size);
3321 return -EINVAL;
3322 }
3323 if (chunk_size < PAGE_SIZE) {
3324 printk(KERN_ERR "too small chunk_size: %d < %ld\n",
3325 chunk_size, PAGE_SIZE);
3326 return -EINVAL;
3327 }
3328
3329
3330 rdev_for_each(rdev, tmp, mddev) {
3331 if (test_bit(Faulty, &rdev->flags))
3332 continue;
3333 if (rdev->size < chunk_size / 1024) {
3334 printk(KERN_WARNING
3335 "md: Dev %s smaller than chunk_size:"
3336 " %lluk < %dk\n",
3337 bdevname(rdev->bdev,b),
3338 (unsigned long long)rdev->size,
3339 chunk_size / 1024);
3340 return -EINVAL;
3341 }
3342 }
3343 }
3344
3345#ifdef CONFIG_KMOD
3346 if (mddev->level != LEVEL_NONE)
3347 request_module("md-level-%d", mddev->level);
3348 else if (mddev->clevel[0])
3349 request_module("md-%s", mddev->clevel);
3350#endif
3351
3352
3353
3354
3355
3356
3357 rdev_for_each(rdev, tmp, mddev) {
3358 if (test_bit(Faulty, &rdev->flags))
3359 continue;
3360 sync_blockdev(rdev->bdev);
3361 invalidate_bdev(rdev->bdev);
3362
3363
3364
3365
3366
3367 if (rdev->data_offset < rdev->sb_offset) {
3368 if (mddev->size &&
3369 rdev->data_offset + mddev->size*2
3370 > rdev->sb_offset*2) {
3371 printk("md: %s: data overlaps metadata\n",
3372 mdname(mddev));
3373 return -EINVAL;
3374 }
3375 } else {
3376 if (rdev->sb_offset*2 + rdev->sb_size/512
3377 > rdev->data_offset) {
3378 printk("md: %s: metadata overlaps data\n",
3379 mdname(mddev));
3380 return -EINVAL;
3381 }
3382 }
3383 }
3384
3385 md_probe(mddev->unit, NULL, NULL);
3386 disk = mddev->gendisk;
3387 if (!disk)
3388 return -ENOMEM;
3389
3390 spin_lock(&pers_lock);
3391 pers = find_pers(mddev->level, mddev->clevel);
3392 if (!pers || !try_module_get(pers->owner)) {
3393 spin_unlock(&pers_lock);
3394 if (mddev->level != LEVEL_NONE)
3395 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
3396 mddev->level);
3397 else
3398 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
3399 mddev->clevel);
3400 return -EINVAL;
3401 }
3402 mddev->pers = pers;
3403 spin_unlock(&pers_lock);
3404 mddev->level = pers->level;
3405 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3406
3407 if (mddev->reshape_position != MaxSector &&
3408 pers->start_reshape == NULL) {
3409
3410 mddev->pers = NULL;
3411 module_put(pers->owner);
3412 return -EINVAL;
3413 }
3414
3415 if (pers->sync_request) {
3416
3417
3418
3419 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
3420 mdk_rdev_t *rdev2;
3421 struct list_head *tmp2;
3422 int warned = 0;
3423 rdev_for_each(rdev, tmp, mddev) {
3424 rdev_for_each(rdev2, tmp2, mddev) {
3425 if (rdev < rdev2 &&
3426 rdev->bdev->bd_contains ==
3427 rdev2->bdev->bd_contains) {
3428 printk(KERN_WARNING
3429 "%s: WARNING: %s appears to be"
3430 " on the same physical disk as"
3431 " %s.\n",
3432 mdname(mddev),
3433 bdevname(rdev->bdev,b),
3434 bdevname(rdev2->bdev,b2));
3435 warned = 1;
3436 }
3437 }
3438 }
3439 if (warned)
3440 printk(KERN_WARNING
3441 "True protection against single-disk"
3442 " failure might be compromised.\n");
3443 }
3444
3445 mddev->recovery = 0;
3446 mddev->resync_max_sectors = mddev->size << 1;
3447 mddev->barriers_work = 1;
3448 mddev->ok_start_degraded = start_dirty_degraded;
3449
3450 if (start_readonly)
3451 mddev->ro = 2;
3452
3453 err = mddev->pers->run(mddev);
3454 if (!err && mddev->pers->sync_request) {
3455 err = bitmap_create(mddev);
3456 if (err) {
3457 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
3458 mdname(mddev), err);
3459 mddev->pers->stop(mddev);
3460 }
3461 }
3462 if (err) {
3463 printk(KERN_ERR "md: pers->run() failed ...\n");
3464 module_put(mddev->pers->owner);
3465 mddev->pers = NULL;
3466 bitmap_destroy(mddev);
3467 return err;
3468 }
3469 if (mddev->pers->sync_request) {
3470 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3471 printk(KERN_WARNING
3472 "md: cannot register extra attributes for %s\n",
3473 mdname(mddev));
3474 } else if (mddev->ro == 2)
3475 mddev->ro = 0;
3476
3477 atomic_set(&mddev->writes_pending,0);
3478 mddev->safemode = 0;
3479 mddev->safemode_timer.function = md_safemode_timeout;
3480 mddev->safemode_timer.data = (unsigned long) mddev;
3481 mddev->safemode_delay = (200 * HZ)/1000 +1;
3482 mddev->in_sync = 1;
3483
3484 rdev_for_each(rdev, tmp, mddev)
3485 if (rdev->raid_disk >= 0) {
3486 char nm[20];
3487 sprintf(nm, "rd%d", rdev->raid_disk);
3488 if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
3489 printk("md: cannot register %s for %s\n",
3490 nm, mdname(mddev));
3491 }
3492
3493 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3494
3495 if (mddev->flags)
3496 md_update_sb(mddev, 0);
3497
3498 set_capacity(disk, mddev->array_size<<1);
3499
3500
3501
3502
3503
3504
3505
3506
3507 mddev->queue->queuedata = mddev;
3508 mddev->queue->make_request_fn = mddev->pers->make_request;
3509
3510
3511
3512
3513
3514 if (mddev->degraded && !mddev->sync_thread) {
3515 struct list_head *rtmp;
3516 int spares = 0;
3517 rdev_for_each(rdev, rtmp, mddev)
3518 if (rdev->raid_disk >= 0 &&
3519 !test_bit(In_sync, &rdev->flags) &&
3520 !test_bit(Faulty, &rdev->flags))
3521
3522 spares++;
3523 if (spares && mddev->pers->sync_request) {
3524 mddev->recovery = 0;
3525 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
3526 mddev->sync_thread = md_register_thread(md_do_sync,
3527 mddev,
3528 "%s_resync");
3529 if (!mddev->sync_thread) {
3530 printk(KERN_ERR "%s: could not start resync"
3531 " thread...\n",
3532 mdname(mddev));
3533
3534 mddev->recovery = 0;
3535 }
3536 }
3537 }
3538 md_wakeup_thread(mddev->thread);
3539 md_wakeup_thread(mddev->sync_thread);
3540
3541 mddev->changed = 1;
3542 md_new_event(mddev);
3543 kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE);
3544 return 0;
3545}
3546
3547static int restart_array(mddev_t *mddev)
3548{
3549 struct gendisk *disk = mddev->gendisk;
3550 int err;
3551
3552
3553
3554
3555 err = -ENXIO;
3556 if (list_empty(&mddev->disks))
3557 goto out;
3558
3559 if (mddev->pers) {
3560 err = -EBUSY;
3561 if (!mddev->ro)
3562 goto out;
3563
3564 mddev->safemode = 0;
3565 mddev->ro = 0;
3566 set_disk_ro(disk, 0);
3567
3568 printk(KERN_INFO "md: %s switched to read-write mode.\n",
3569 mdname(mddev));
3570
3571
3572
3573 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3574 md_wakeup_thread(mddev->thread);
3575 md_wakeup_thread(mddev->sync_thread);
3576 err = 0;
3577 } else
3578 err = -EINVAL;
3579
3580out:
3581 return err;
3582}
3583
3584
3585
3586static int deny_bitmap_write_access(struct file * file)
3587{
3588 struct inode *inode = file->f_mapping->host;
3589
3590 spin_lock(&inode->i_lock);
3591 if (atomic_read(&inode->i_writecount) > 1) {
3592 spin_unlock(&inode->i_lock);
3593 return -ETXTBSY;
3594 }
3595 atomic_set(&inode->i_writecount, -1);
3596 spin_unlock(&inode->i_lock);
3597
3598 return 0;
3599}
3600
3601static void restore_bitmap_write_access(struct file *file)
3602{
3603 struct inode *inode = file->f_mapping->host;
3604
3605 spin_lock(&inode->i_lock);
3606 atomic_set(&inode->i_writecount, 1);
3607 spin_unlock(&inode->i_lock);
3608}
3609
3610
3611
3612
3613
3614
3615static int do_md_stop(mddev_t * mddev, int mode)
3616{
3617 int err = 0;
3618 struct gendisk *disk = mddev->gendisk;
3619
3620 if (mddev->pers) {
3621 if (atomic_read(&mddev->active)>2) {
3622 printk("md: %s still in use.\n",mdname(mddev));
3623 return -EBUSY;
3624 }
3625
3626 if (mddev->sync_thread) {
3627 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3628 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3629 md_unregister_thread(mddev->sync_thread);
3630 mddev->sync_thread = NULL;
3631 }
3632
3633 del_timer_sync(&mddev->safemode_timer);
3634
3635 invalidate_partition(disk, 0);
3636
3637 switch(mode) {
3638 case 1:
3639 err = -ENXIO;
3640 if (mddev->ro==1)
3641 goto out;
3642 mddev->ro = 1;
3643 break;
3644 case 0:
3645 case 2:
3646 bitmap_flush(mddev);
3647 md_super_wait(mddev);
3648 if (mddev->ro)
3649 set_disk_ro(disk, 0);
3650 blk_queue_make_request(mddev->queue, md_fail_request);
3651 mddev->pers->stop(mddev);
3652 mddev->queue->merge_bvec_fn = NULL;
3653 mddev->queue->unplug_fn = NULL;
3654 mddev->queue->backing_dev_info.congested_fn = NULL;
3655 if (mddev->pers->sync_request)
3656 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
3657
3658 module_put(mddev->pers->owner);
3659 mddev->pers = NULL;
3660
3661 set_capacity(disk, 0);
3662 mddev->changed = 1;
3663
3664 if (mddev->ro)
3665 mddev->ro = 0;
3666 }
3667 if (!mddev->in_sync || mddev->flags) {
3668
3669 mddev->in_sync = 1;
3670 md_update_sb(mddev, 1);
3671 }
3672 if (mode == 1)
3673 set_disk_ro(disk, 1);
3674 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3675 }
3676
3677
3678
3679
3680 if (mode == 0) {
3681 mdk_rdev_t *rdev;
3682 struct list_head *tmp;
3683
3684 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
3685
3686 bitmap_destroy(mddev);
3687 if (mddev->bitmap_file) {
3688 restore_bitmap_write_access(mddev->bitmap_file);
3689 fput(mddev->bitmap_file);
3690 mddev->bitmap_file = NULL;
3691 }
3692 mddev->bitmap_offset = 0;
3693
3694 rdev_for_each(rdev, tmp, mddev)
3695 if (rdev->raid_disk >= 0) {
3696 char nm[20];
3697 sprintf(nm, "rd%d", rdev->raid_disk);
3698 sysfs_remove_link(&mddev->kobj, nm);
3699 }
3700
3701
3702 flush_scheduled_work();
3703
3704 export_array(mddev);
3705
3706 mddev->array_size = 0;
3707 mddev->size = 0;
3708 mddev->raid_disks = 0;
3709 mddev->recovery_cp = 0;
3710 mddev->resync_max = MaxSector;
3711 mddev->reshape_position = MaxSector;
3712 mddev->external = 0;
3713 mddev->persistent = 0;
3714
3715 } else if (mddev->pers)
3716 printk(KERN_INFO "md: %s switched to read-only mode.\n",
3717 mdname(mddev));
3718 err = 0;
3719 md_new_event(mddev);
3720out:
3721 return err;
3722}
3723
3724#ifndef MODULE
3725static void autorun_array(mddev_t *mddev)
3726{
3727 mdk_rdev_t *rdev;
3728 struct list_head *tmp;
3729 int err;
3730
3731 if (list_empty(&mddev->disks))
3732 return;
3733
3734 printk(KERN_INFO "md: running: ");
3735
3736 rdev_for_each(rdev, tmp, mddev) {
3737 char b[BDEVNAME_SIZE];
3738 printk("<%s>", bdevname(rdev->bdev,b));
3739 }
3740 printk("\n");
3741
3742 err = do_md_run (mddev);
3743 if (err) {
3744 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
3745 do_md_stop (mddev, 0);
3746 }
3747}
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761static void autorun_devices(int part)
3762{
3763 struct list_head *tmp;
3764 mdk_rdev_t *rdev0, *rdev;
3765 mddev_t *mddev;
3766 char b[BDEVNAME_SIZE];
3767
3768 printk(KERN_INFO "md: autorun ...\n");
3769 while (!list_empty(&pending_raid_disks)) {
3770 int unit;
3771 dev_t dev;
3772 LIST_HEAD(candidates);
3773 rdev0 = list_entry(pending_raid_disks.next,
3774 mdk_rdev_t, same_set);
3775
3776 printk(KERN_INFO "md: considering %s ...\n",
3777 bdevname(rdev0->bdev,b));
3778 INIT_LIST_HEAD(&candidates);
3779 rdev_for_each_list(rdev, tmp, pending_raid_disks)
3780 if (super_90_load(rdev, rdev0, 0) >= 0) {
3781 printk(KERN_INFO "md: adding %s ...\n",
3782 bdevname(rdev->bdev,b));
3783 list_move(&rdev->same_set, &candidates);
3784 }
3785
3786
3787
3788
3789
3790 if (part) {
3791 dev = MKDEV(mdp_major,
3792 rdev0->preferred_minor << MdpMinorShift);
3793 unit = MINOR(dev) >> MdpMinorShift;
3794 } else {
3795 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
3796 unit = MINOR(dev);
3797 }
3798 if (rdev0->preferred_minor != unit) {
3799 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
3800 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
3801 break;
3802 }
3803
3804 md_probe(dev, NULL, NULL);
3805 mddev = mddev_find(dev);
3806 if (!mddev) {
3807 printk(KERN_ERR
3808 "md: cannot allocate memory for md drive.\n");
3809 break;
3810 }
3811 if (mddev_lock(mddev))
3812 printk(KERN_WARNING "md: %s locked, cannot run\n",
3813 mdname(mddev));
3814 else if (mddev->raid_disks || mddev->major_version
3815 || !list_empty(&mddev->disks)) {
3816 printk(KERN_WARNING
3817 "md: %s already running, cannot run %s\n",
3818 mdname(mddev), bdevname(rdev0->bdev,b));
3819 mddev_unlock(mddev);
3820 } else {
3821 printk(KERN_INFO "md: created %s\n", mdname(mddev));
3822 mddev->persistent = 1;
3823 rdev_for_each_list(rdev, tmp, candidates) {
3824 list_del_init(&rdev->same_set);
3825 if (bind_rdev_to_array(rdev, mddev))
3826 export_rdev(rdev);
3827 }
3828 autorun_array(mddev);
3829 mddev_unlock(mddev);
3830 }
3831
3832
3833
3834 rdev_for_each_list(rdev, tmp, candidates)
3835 export_rdev(rdev);
3836 mddev_put(mddev);
3837 }
3838 printk(KERN_INFO "md: ... autorun DONE.\n");
3839}
3840#endif
3841
3842static int get_version(void __user * arg)
3843{
3844 mdu_version_t ver;
3845
3846 ver.major = MD_MAJOR_VERSION;
3847 ver.minor = MD_MINOR_VERSION;
3848 ver.patchlevel = MD_PATCHLEVEL_VERSION;
3849
3850 if (copy_to_user(arg, &ver, sizeof(ver)))
3851 return -EFAULT;
3852
3853 return 0;
3854}
3855
3856static int get_array_info(mddev_t * mddev, void __user * arg)
3857{
3858 mdu_array_info_t info;
3859 int nr,working,active,failed,spare;
3860 mdk_rdev_t *rdev;
3861 struct list_head *tmp;
3862
3863 nr=working=active=failed=spare=0;
3864 rdev_for_each(rdev, tmp, mddev) {
3865 nr++;
3866 if (test_bit(Faulty, &rdev->flags))
3867 failed++;
3868 else {
3869 working++;
3870 if (test_bit(In_sync, &rdev->flags))
3871 active++;
3872 else
3873 spare++;
3874 }
3875 }
3876
3877 info.major_version = mddev->major_version;
3878 info.minor_version = mddev->minor_version;
3879 info.patch_version = MD_PATCHLEVEL_VERSION;
3880 info.ctime = mddev->ctime;
3881 info.level = mddev->level;
3882 info.size = mddev->size;
3883 if (info.size != mddev->size)
3884 info.size = -1;
3885 info.nr_disks = nr;
3886 info.raid_disks = mddev->raid_disks;
3887 info.md_minor = mddev->md_minor;
3888 info.not_persistent= !mddev->persistent;
3889
3890 info.utime = mddev->utime;
3891 info.state = 0;
3892 if (mddev->in_sync)
3893 info.state = (1<<MD_SB_CLEAN);
3894 if (mddev->bitmap && mddev->bitmap_offset)
3895 info.state = (1<<MD_SB_BITMAP_PRESENT);
3896 info.active_disks = active;
3897 info.working_disks = working;
3898 info.failed_disks = failed;
3899 info.spare_disks = spare;
3900
3901 info.layout = mddev->layout;
3902 info.chunk_size = mddev->chunk_size;
3903
3904 if (copy_to_user(arg, &info, sizeof(info)))
3905 return -EFAULT;
3906
3907 return 0;
3908}
3909
3910static int get_bitmap_file(mddev_t * mddev, void __user * arg)
3911{
3912 mdu_bitmap_file_t *file = NULL;
3913 char *ptr, *buf = NULL;
3914 int err = -ENOMEM;
3915
3916 md_allow_write(mddev);
3917
3918 file = kmalloc(sizeof(*file), GFP_KERNEL);
3919 if (!file)
3920 goto out;
3921
3922
3923 if (!mddev->bitmap || !mddev->bitmap->file) {
3924 file->pathname[0] = '\0';
3925 goto copy_out;
3926 }
3927
3928 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
3929 if (!buf)
3930 goto out;
3931
3932 ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname));
3933 if (!ptr)
3934 goto out;
3935
3936 strcpy(file->pathname, ptr);
3937
3938copy_out:
3939 err = 0;
3940 if (copy_to_user(arg, file, sizeof(*file)))
3941 err = -EFAULT;
3942out:
3943 kfree(buf);
3944 kfree(file);
3945 return err;
3946}
3947
3948static int get_disk_info(mddev_t * mddev, void __user * arg)
3949{
3950 mdu_disk_info_t info;
3951 unsigned int nr;
3952 mdk_rdev_t *rdev;
3953
3954 if (copy_from_user(&info, arg, sizeof(info)))
3955 return -EFAULT;
3956
3957 nr = info.number;
3958
3959 rdev = find_rdev_nr(mddev, nr);
3960 if (rdev) {
3961 info.major = MAJOR(rdev->bdev->bd_dev);
3962 info.minor = MINOR(rdev->bdev->bd_dev);
3963 info.raid_disk = rdev->raid_disk;
3964 info.state = 0;
3965 if (test_bit(Faulty, &rdev->flags))
3966 info.state |= (1<<MD_DISK_FAULTY);
3967 else if (test_bit(In_sync, &rdev->flags)) {
3968 info.state |= (1<<MD_DISK_ACTIVE);
3969 info.state |= (1<<MD_DISK_SYNC);
3970 }
3971 if (test_bit(WriteMostly, &rdev->flags))
3972 info.state |= (1<<MD_DISK_WRITEMOSTLY);
3973 } else {
3974 info.major = info.minor = 0;
3975 info.raid_disk = -1;
3976 info.state = (1<<MD_DISK_REMOVED);
3977 }
3978
3979 if (copy_to_user(arg, &info, sizeof(info)))
3980 return -EFAULT;
3981
3982 return 0;
3983}
3984
3985static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
3986{
3987 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
3988 mdk_rdev_t *rdev;
3989 dev_t dev = MKDEV(info->major,info->minor);
3990
3991 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
3992 return -EOVERFLOW;
3993
3994 if (!mddev->raid_disks) {
3995 int err;
3996
3997 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
3998 if (IS_ERR(rdev)) {
3999 printk(KERN_WARNING
4000 "md: md_import_device returned %ld\n",
4001 PTR_ERR(rdev));
4002 return PTR_ERR(rdev);
4003 }
4004 if (!list_empty(&mddev->disks)) {
4005 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
4006 mdk_rdev_t, same_set);
4007 int err = super_types[mddev->major_version]
4008 .load_super(rdev, rdev0, mddev->minor_version);
4009 if (err < 0) {
4010 printk(KERN_WARNING
4011 "md: %s has different UUID to %s\n",
4012 bdevname(rdev->bdev,b),
4013 bdevname(rdev0->bdev,b2));
4014 export_rdev(rdev);
4015 return -EINVAL;
4016 }
4017 }
4018 err = bind_rdev_to_array(rdev, mddev);
4019 if (err)
4020 export_rdev(rdev);
4021 return err;
4022 }
4023
4024
4025
4026
4027
4028
4029 if (mddev->pers) {
4030 int err;
4031 if (!mddev->pers->hot_add_disk) {
4032 printk(KERN_WARNING
4033 "%s: personality does not support diskops!\n",
4034 mdname(mddev));
4035 return -EINVAL;
4036 }
4037 if (mddev->persistent)
4038 rdev = md_import_device(dev, mddev->major_version,
4039 mddev->minor_version);
4040 else
4041 rdev = md_import_device(dev, -1, -1);
4042 if (IS_ERR(rdev)) {
4043 printk(KERN_WARNING
4044 "md: md_import_device returned %ld\n",
4045 PTR_ERR(rdev));
4046 return PTR_ERR(rdev);
4047 }
4048
4049 if (!mddev->persistent) {
4050 if (info->state & (1<<MD_DISK_SYNC) &&
4051 info->raid_disk < mddev->raid_disks)
4052 rdev->raid_disk = info->raid_disk;
4053 else
4054 rdev->raid_disk = -1;
4055 } else
4056 super_types[mddev->major_version].
4057 validate_super(mddev, rdev);
4058 rdev->saved_raid_disk = rdev->raid_disk;
4059
4060 clear_bit(In_sync, &rdev->flags);
4061 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
4062 set_bit(WriteMostly, &rdev->flags);
4063
4064 rdev->raid_disk = -1;
4065 err = bind_rdev_to_array(rdev, mddev);
4066 if (!err && !mddev->pers->hot_remove_disk) {
4067
4068
4069
4070
4071 super_types[mddev->major_version].
4072 validate_super(mddev, rdev);
4073 err = mddev->pers->hot_add_disk(mddev, rdev);
4074 if (err)
4075 unbind_rdev_from_array(rdev);
4076 }
4077 if (err)
4078 export_rdev(rdev);
4079
4080 md_update_sb(mddev, 1);
4081 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4082 md_wakeup_thread(mddev->thread);
4083 return err;
4084 }
4085
4086
4087
4088
4089 if (mddev->major_version != 0) {
4090 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
4091 mdname(mddev));
4092 return -EINVAL;
4093 }
4094
4095 if (!(info->state & (1<<MD_DISK_FAULTY))) {
4096 int err;
4097 rdev = md_import_device (dev, -1, 0);
4098 if (IS_ERR(rdev)) {
4099 printk(KERN_WARNING
4100 "md: error, md_import_device() returned %ld\n",
4101 PTR_ERR(rdev));
4102 return PTR_ERR(rdev);
4103 }
4104 rdev->desc_nr = info->number;
4105 if (info->raid_disk < mddev->raid_disks)
4106 rdev->raid_disk = info->raid_disk;
4107 else
4108 rdev->raid_disk = -1;
4109
4110 if (rdev->raid_disk < mddev->raid_disks)
4111 if (info->state & (1<<MD_DISK_SYNC))
4112 set_bit(In_sync, &rdev->flags);
4113
4114 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
4115 set_bit(WriteMostly, &rdev->flags);
4116
4117 if (!mddev->persistent) {
4118 printk(KERN_INFO "md: nonpersistent superblock ...\n");
4119 rdev->sb_offset = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
4120 } else
4121 rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
4122 rdev->size = calc_dev_size(rdev, mddev->chunk_size);
4123
4124 err = bind_rdev_to_array(rdev, mddev);
4125 if (err) {
4126 export_rdev(rdev);
4127 return err;
4128 }
4129 }
4130
4131 return 0;
4132}
4133
4134static int hot_remove_disk(mddev_t * mddev, dev_t dev)
4135{
4136 char b[BDEVNAME_SIZE];
4137 mdk_rdev_t *rdev;
4138
4139 if (!mddev->pers)
4140 return -ENODEV;
4141
4142 rdev = find_rdev(mddev, dev);
4143 if (!rdev)
4144 return -ENXIO;
4145
4146 if (rdev->raid_disk >= 0)
4147 goto busy;
4148
4149 kick_rdev_from_array(rdev);
4150 md_update_sb(mddev, 1);
4151 md_new_event(mddev);
4152
4153 return 0;
4154busy:
4155 printk(KERN_WARNING "md: cannot remove active disk %s from %s ... \n",
4156 bdevname(rdev->bdev,b), mdname(mddev));
4157 return -EBUSY;
4158}
4159
4160static int hot_add_disk(mddev_t * mddev, dev_t dev)
4161{
4162 char b[BDEVNAME_SIZE];
4163 int err;
4164 unsigned int size;
4165 mdk_rdev_t *rdev;
4166
4167 if (!mddev->pers)
4168 return -ENODEV;
4169
4170 if (mddev->major_version != 0) {
4171 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
4172 " version-0 superblocks.\n",
4173 mdname(mddev));
4174 return -EINVAL;
4175 }
4176 if (!mddev->pers->hot_add_disk) {
4177 printk(KERN_WARNING
4178 "%s: personality does not support diskops!\n",
4179 mdname(mddev));
4180 return -EINVAL;
4181 }
4182
4183 rdev = md_import_device (dev, -1, 0);
4184 if (IS_ERR(rdev)) {
4185 printk(KERN_WARNING
4186 "md: error, md_import_device() returned %ld\n",
4187 PTR_ERR(rdev));
4188 return -EINVAL;
4189 }
4190
4191 if (mddev->persistent)
4192 rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
4193 else
4194 rdev->sb_offset =
4195 rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
4196
4197 size = calc_dev_size(rdev, mddev->chunk_size);
4198 rdev->size = size;
4199
4200 if (test_bit(Faulty, &rdev->flags)) {
4201 printk(KERN_WARNING
4202 "md: can not hot-add faulty %s disk to %s!\n",
4203 bdevname(rdev->bdev,b), mdname(mddev));
4204 err = -EINVAL;
4205 goto abort_export;
4206 }
4207 clear_bit(In_sync, &rdev->flags);
4208 rdev->desc_nr = -1;
4209 rdev->saved_raid_disk = -1;
4210 err = bind_rdev_to_array(rdev, mddev);
4211 if (err)
4212 goto abort_export;
4213
4214
4215
4216
4217
4218
4219 if (rdev->desc_nr == mddev->max_disks) {
4220 printk(KERN_WARNING "%s: can not hot-add to full array!\n",
4221 mdname(mddev));
4222 err = -EBUSY;
4223 goto abort_unbind_export;
4224 }
4225
4226 rdev->raid_disk = -1;
4227
4228 md_update_sb(mddev, 1);
4229
4230
4231
4232
4233
4234 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4235 md_wakeup_thread(mddev->thread);
4236 md_new_event(mddev);
4237 return 0;
4238
4239abort_unbind_export:
4240 unbind_rdev_from_array(rdev);
4241
4242abort_export:
4243 export_rdev(rdev);
4244 return err;
4245}
4246
4247static int set_bitmap_file(mddev_t *mddev, int fd)
4248{
4249 int err;
4250
4251 if (mddev->pers) {
4252 if (!mddev->pers->quiesce)
4253 return -EBUSY;
4254 if (mddev->recovery || mddev->sync_thread)
4255 return -EBUSY;
4256
4257 }
4258
4259
4260 if (fd >= 0) {
4261 if (mddev->bitmap)
4262 return -EEXIST;
4263 mddev->bitmap_file = fget(fd);
4264
4265 if (mddev->bitmap_file == NULL) {
4266 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
4267 mdname(mddev));
4268 return -EBADF;
4269 }
4270
4271 err = deny_bitmap_write_access(mddev->bitmap_file);
4272 if (err) {
4273 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
4274 mdname(mddev));
4275 fput(mddev->bitmap_file);
4276 mddev->bitmap_file = NULL;
4277 return err;
4278 }
4279 mddev->bitmap_offset = 0;
4280 } else if (mddev->bitmap == NULL)
4281 return -ENOENT;
4282 err = 0;
4283 if (mddev->pers) {
4284 mddev->pers->quiesce(mddev, 1);
4285 if (fd >= 0)
4286 err = bitmap_create(mddev);
4287 if (fd < 0 || err) {
4288 bitmap_destroy(mddev);
4289 fd = -1;
4290 }
4291 mddev->pers->quiesce(mddev, 0);
4292 }
4293 if (fd < 0) {
4294 if (mddev->bitmap_file) {
4295 restore_bitmap_write_access(mddev->bitmap_file);
4296 fput(mddev->bitmap_file);
4297 }
4298 mddev->bitmap_file = NULL;
4299 }
4300
4301 return err;
4302}
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
4318{
4319
4320 if (info->raid_disks == 0) {
4321
4322 if (info->major_version < 0 ||
4323 info->major_version >= ARRAY_SIZE(super_types) ||
4324 super_types[info->major_version].name == NULL) {
4325
4326 printk(KERN_INFO
4327 "md: superblock version %d not known\n",
4328 info->major_version);
4329 return -EINVAL;
4330 }
4331 mddev->major_version = info->major_version;
4332 mddev->minor_version = info->minor_version;
4333 mddev->patch_version = info->patch_version;
4334 mddev->persistent = !info->not_persistent;
4335 return 0;
4336 }
4337 mddev->major_version = MD_MAJOR_VERSION;
4338 mddev->minor_version = MD_MINOR_VERSION;
4339 mddev->patch_version = MD_PATCHLEVEL_VERSION;
4340 mddev->ctime = get_seconds();
4341
4342 mddev->level = info->level;
4343 mddev->clevel[0] = 0;
4344 mddev->size = info->size;
4345 mddev->raid_disks = info->raid_disks;
4346
4347
4348
4349 if (info->state & (1<<MD_SB_CLEAN))
4350 mddev->recovery_cp = MaxSector;
4351 else
4352 mddev->recovery_cp = 0;
4353 mddev->persistent = ! info->not_persistent;
4354 mddev->external = 0;
4355
4356 mddev->layout = info->layout;
4357 mddev->chunk_size = info->chunk_size;
4358
4359 mddev->max_disks = MD_SB_DISKS;
4360
4361 if (mddev->persistent)
4362 mddev->flags = 0;
4363 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4364
4365 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
4366 mddev->bitmap_offset = 0;
4367
4368 mddev->reshape_position = MaxSector;
4369
4370
4371
4372
4373 get_random_bytes(mddev->uuid, 16);
4374
4375 mddev->new_level = mddev->level;
4376 mddev->new_chunk = mddev->chunk_size;
4377 mddev->new_layout = mddev->layout;
4378 mddev->delta_disks = 0;
4379
4380 return 0;
4381}
4382
4383static int update_size(mddev_t *mddev, unsigned long size)
4384{
4385 mdk_rdev_t * rdev;
4386 int rv;
4387 struct list_head *tmp;
4388 int fit = (size == 0);
4389
4390 if (mddev->pers->resize == NULL)
4391 return -EINVAL;
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402 if (mddev->sync_thread)
4403 return -EBUSY;
4404 rdev_for_each(rdev, tmp, mddev) {
4405 sector_t avail;
4406 avail = rdev->size * 2;
4407
4408 if (fit && (size == 0 || size > avail/2))
4409 size = avail/2;
4410 if (avail < ((sector_t)size << 1))
4411 return -ENOSPC;
4412 }
4413 rv = mddev->pers->resize(mddev, (sector_t)size *2);
4414 if (!rv) {
4415 struct block_device *bdev;
4416
4417 bdev = bdget_disk(mddev->gendisk, 0);
4418 if (bdev) {
4419 mutex_lock(&bdev->bd_inode->i_mutex);
4420 i_size_write(bdev->bd_inode, (loff_t)mddev->array_size << 10);
4421 mutex_unlock(&bdev->bd_inode->i_mutex);
4422 bdput(bdev);
4423 }
4424 }
4425 return rv;
4426}
4427
4428static int update_raid_disks(mddev_t *mddev, int raid_disks)
4429{
4430 int rv;
4431
4432 if (mddev->pers->check_reshape == NULL)
4433 return -EINVAL;
4434 if (raid_disks <= 0 ||
4435 raid_disks >= mddev->max_disks)
4436 return -EINVAL;
4437 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
4438 return -EBUSY;
4439 mddev->delta_disks = raid_disks - mddev->raid_disks;
4440
4441 rv = mddev->pers->check_reshape(mddev);
4442 return rv;
4443}
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
4455{
4456 int rv = 0;
4457 int cnt = 0;
4458 int state = 0;
4459
4460
4461 if (mddev->bitmap && mddev->bitmap_offset)
4462 state |= (1 << MD_SB_BITMAP_PRESENT);
4463
4464 if (mddev->major_version != info->major_version ||
4465 mddev->minor_version != info->minor_version ||
4466
4467 mddev->ctime != info->ctime ||
4468 mddev->level != info->level ||
4469
4470 !mddev->persistent != info->not_persistent||
4471 mddev->chunk_size != info->chunk_size ||
4472
4473 ((state^info->state) & 0xfffffe00)
4474 )
4475 return -EINVAL;
4476
4477 if (info->size >= 0 && mddev->size != info->size) cnt++;
4478 if (mddev->raid_disks != info->raid_disks) cnt++;
4479 if (mddev->layout != info->layout) cnt++;
4480 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++;
4481 if (cnt == 0) return 0;
4482 if (cnt > 1) return -EINVAL;
4483
4484 if (mddev->layout != info->layout) {
4485
4486
4487
4488
4489 if (mddev->pers->reconfig == NULL)
4490 return -EINVAL;
4491 else
4492 return mddev->pers->reconfig(mddev, info->layout, -1);
4493 }
4494 if (info->size >= 0 && mddev->size != info->size)
4495 rv = update_size(mddev, info->size);
4496
4497 if (mddev->raid_disks != info->raid_disks)
4498 rv = update_raid_disks(mddev, info->raid_disks);
4499
4500 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
4501 if (mddev->pers->quiesce == NULL)
4502 return -EINVAL;
4503 if (mddev->recovery || mddev->sync_thread)
4504 return -EBUSY;
4505 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
4506
4507 if (mddev->bitmap)
4508 return -EEXIST;
4509 if (mddev->default_bitmap_offset == 0)
4510 return -EINVAL;
4511 mddev->bitmap_offset = mddev->default_bitmap_offset;
4512 mddev->pers->quiesce(mddev, 1);
4513 rv = bitmap_create(mddev);
4514 if (rv)
4515 bitmap_destroy(mddev);
4516 mddev->pers->quiesce(mddev, 0);
4517 } else {
4518
4519 if (!mddev->bitmap)
4520 return -ENOENT;
4521 if (mddev->bitmap->file)
4522 return -EINVAL;
4523 mddev->pers->quiesce(mddev, 1);
4524 bitmap_destroy(mddev);
4525 mddev->pers->quiesce(mddev, 0);
4526 mddev->bitmap_offset = 0;
4527 }
4528 }
4529 md_update_sb(mddev, 1);
4530 return rv;
4531}
4532
4533static int set_disk_faulty(mddev_t *mddev, dev_t dev)
4534{
4535 mdk_rdev_t *rdev;
4536
4537 if (mddev->pers == NULL)
4538 return -ENODEV;
4539
4540 rdev = find_rdev(mddev, dev);
4541 if (!rdev)
4542 return -ENODEV;
4543
4544 md_error(mddev, rdev);
4545 return 0;
4546}
4547
4548static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
4549{
4550 mddev_t *mddev = bdev->bd_disk->private_data;
4551
4552 geo->heads = 2;
4553 geo->sectors = 4;
4554 geo->cylinders = get_capacity(mddev->gendisk) / 8;
4555 return 0;
4556}
4557
4558static int md_ioctl(struct inode *inode, struct file *file,
4559 unsigned int cmd, unsigned long arg)
4560{
4561 int err = 0;
4562 void __user *argp = (void __user *)arg;
4563 mddev_t *mddev = NULL;
4564
4565 if (!capable(CAP_SYS_ADMIN))
4566 return -EACCES;
4567
4568
4569
4570
4571
4572 switch (cmd)
4573 {
4574 case RAID_VERSION:
4575 err = get_version(argp);
4576 goto done;
4577
4578 case PRINT_RAID_DEBUG:
4579 err = 0;
4580 md_print_devices();
4581 goto done;
4582
4583#ifndef MODULE
4584 case RAID_AUTORUN:
4585 err = 0;
4586 autostart_arrays(arg);
4587 goto done;
4588#endif
4589 default:;
4590 }
4591
4592
4593
4594
4595
4596 mddev = inode->i_bdev->bd_disk->private_data;
4597
4598 if (!mddev) {
4599 BUG();
4600 goto abort;
4601 }
4602
4603 err = mddev_lock(mddev);
4604 if (err) {
4605 printk(KERN_INFO
4606 "md: ioctl lock interrupted, reason %d, cmd %d\n",
4607 err, cmd);
4608 goto abort;
4609 }
4610
4611 switch (cmd)
4612 {
4613 case SET_ARRAY_INFO:
4614 {
4615 mdu_array_info_t info;
4616 if (!arg)
4617 memset(&info, 0, sizeof(info));
4618 else if (copy_from_user(&info, argp, sizeof(info))) {
4619 err = -EFAULT;
4620 goto abort_unlock;
4621 }
4622 if (mddev->pers) {
4623 err = update_array_info(mddev, &info);
4624 if (err) {
4625 printk(KERN_WARNING "md: couldn't update"
4626 " array info. %d\n", err);
4627 goto abort_unlock;
4628 }
4629 goto done_unlock;
4630 }
4631 if (!list_empty(&mddev->disks)) {
4632 printk(KERN_WARNING
4633 "md: array %s already has disks!\n",
4634 mdname(mddev));
4635 err = -EBUSY;
4636 goto abort_unlock;
4637 }
4638 if (mddev->raid_disks) {
4639 printk(KERN_WARNING
4640 "md: array %s already initialised!\n",
4641 mdname(mddev));
4642 err = -EBUSY;
4643 goto abort_unlock;
4644 }
4645 err = set_array_info(mddev, &info);
4646 if (err) {
4647 printk(KERN_WARNING "md: couldn't set"
4648 " array info. %d\n", err);
4649 goto abort_unlock;
4650 }
4651 }
4652 goto done_unlock;
4653
4654 default:;
4655 }
4656
4657
4658
4659
4660
4661
4662 if ((!mddev->raid_disks && !mddev->external)
4663 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
4664 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
4665 && cmd != GET_BITMAP_FILE) {
4666 err = -ENODEV;
4667 goto abort_unlock;
4668 }
4669
4670
4671
4672
4673 switch (cmd)
4674 {
4675 case GET_ARRAY_INFO:
4676 err = get_array_info(mddev, argp);
4677 goto done_unlock;
4678
4679 case GET_BITMAP_FILE:
4680 err = get_bitmap_file(mddev, argp);
4681 goto done_unlock;
4682
4683 case GET_DISK_INFO:
4684 err = get_disk_info(mddev, argp);
4685 goto done_unlock;
4686
4687 case RESTART_ARRAY_RW:
4688 err = restart_array(mddev);
4689 goto done_unlock;
4690
4691 case STOP_ARRAY:
4692 err = do_md_stop (mddev, 0);
4693 goto done_unlock;
4694
4695 case STOP_ARRAY_RO:
4696 err = do_md_stop (mddev, 1);
4697 goto done_unlock;
4698
4699
4700
4701
4702
4703
4704
4705 }
4706
4707
4708
4709
4710
4711
4712
4713
4714 if (_IOC_TYPE(cmd) == MD_MAJOR &&
4715 mddev->ro && mddev->pers) {
4716 if (mddev->ro == 2) {
4717 mddev->ro = 0;
4718 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4719 md_wakeup_thread(mddev->thread);
4720
4721 } else {
4722 err = -EROFS;
4723 goto abort_unlock;
4724 }
4725 }
4726
4727 switch (cmd)
4728 {
4729 case ADD_NEW_DISK:
4730 {
4731 mdu_disk_info_t info;
4732 if (copy_from_user(&info, argp, sizeof(info)))
4733 err = -EFAULT;
4734 else
4735 err = add_new_disk(mddev, &info);
4736 goto done_unlock;
4737 }
4738
4739 case HOT_REMOVE_DISK:
4740 err = hot_remove_disk(mddev, new_decode_dev(arg));
4741 goto done_unlock;
4742
4743 case HOT_ADD_DISK:
4744 err = hot_add_disk(mddev, new_decode_dev(arg));
4745 goto done_unlock;
4746
4747 case SET_DISK_FAULTY:
4748 err = set_disk_faulty(mddev, new_decode_dev(arg));
4749 goto done_unlock;
4750
4751 case RUN_ARRAY:
4752 err = do_md_run (mddev);
4753 goto done_unlock;
4754
4755 case SET_BITMAP_FILE:
4756 err = set_bitmap_file(mddev, (int)arg);
4757 goto done_unlock;
4758
4759 default:
4760 err = -EINVAL;
4761 goto abort_unlock;
4762 }
4763
4764done_unlock:
4765abort_unlock:
4766 mddev_unlock(mddev);
4767
4768 return err;
4769done:
4770 if (err)
4771 MD_BUG();
4772abort:
4773 return err;
4774}
4775
4776static int md_open(struct inode *inode, struct file *file)
4777{
4778
4779
4780
4781
4782 mddev_t *mddev = inode->i_bdev->bd_disk->private_data;
4783 int err;
4784
4785 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
4786 goto out;
4787
4788 err = 0;
4789 mddev_get(mddev);
4790 mddev_unlock(mddev);
4791
4792 check_disk_change(inode->i_bdev);
4793 out:
4794 return err;
4795}
4796
4797static int md_release(struct inode *inode, struct file * file)
4798{
4799 mddev_t *mddev = inode->i_bdev->bd_disk->private_data;
4800
4801 BUG_ON(!mddev);
4802 mddev_put(mddev);
4803
4804 return 0;
4805}
4806
4807static int md_media_changed(struct gendisk *disk)
4808{
4809 mddev_t *mddev = disk->private_data;
4810
4811 return mddev->changed;
4812}
4813
4814static int md_revalidate(struct gendisk *disk)
4815{
4816 mddev_t *mddev = disk->private_data;
4817
4818 mddev->changed = 0;
4819 return 0;
4820}
4821static struct block_device_operations md_fops =
4822{
4823 .owner = THIS_MODULE,
4824 .open = md_open,
4825 .release = md_release,
4826 .ioctl = md_ioctl,
4827 .getgeo = md_getgeo,
4828 .media_changed = md_media_changed,
4829 .revalidate_disk= md_revalidate,
4830};
4831
4832static int md_thread(void * arg)
4833{
4834 mdk_thread_t *thread = arg;
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848 allow_signal(SIGKILL);
4849 while (!kthread_should_stop()) {
4850
4851
4852
4853
4854
4855
4856 if (signal_pending(current))
4857 flush_signals(current);
4858
4859 wait_event_interruptible_timeout
4860 (thread->wqueue,
4861 test_bit(THREAD_WAKEUP, &thread->flags)
4862 || kthread_should_stop(),
4863 thread->timeout);
4864
4865 clear_bit(THREAD_WAKEUP, &thread->flags);
4866
4867 thread->run(thread->mddev);
4868 }
4869
4870 return 0;
4871}
4872
4873void md_wakeup_thread(mdk_thread_t *thread)
4874{
4875 if (thread) {
4876 dprintk("md: waking up MD thread %s.\n", thread->tsk->comm);
4877 set_bit(THREAD_WAKEUP, &thread->flags);
4878 wake_up(&thread->wqueue);
4879 }
4880}
4881
4882mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
4883 const char *name)
4884{
4885 mdk_thread_t *thread;
4886
4887 thread = kzalloc(sizeof(mdk_thread_t), GFP_KERNEL);
4888 if (!thread)
4889 return NULL;
4890
4891 init_waitqueue_head(&thread->wqueue);
4892
4893 thread->run = run;
4894 thread->mddev = mddev;
4895 thread->timeout = MAX_SCHEDULE_TIMEOUT;
4896 thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev));
4897 if (IS_ERR(thread->tsk)) {
4898 kfree(thread);
4899 return NULL;
4900 }
4901 return thread;
4902}
4903
4904void md_unregister_thread(mdk_thread_t *thread)
4905{
4906 dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
4907
4908 kthread_stop(thread->tsk);
4909 kfree(thread);
4910}
4911
4912void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
4913{
4914 if (!mddev) {
4915 MD_BUG();
4916 return;
4917 }
4918
4919 if (!rdev || test_bit(Faulty, &rdev->flags))
4920 return;
4921
4922
4923
4924
4925
4926
4927
4928 if (!mddev->pers)
4929 return;
4930 if (!mddev->pers->error_handler)
4931 return;
4932 mddev->pers->error_handler(mddev,rdev);
4933 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4934 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4935 md_wakeup_thread(mddev->thread);
4936 md_new_event_inintr(mddev);
4937}
4938
4939
4940
4941static void status_unused(struct seq_file *seq)
4942{
4943 int i = 0;
4944 mdk_rdev_t *rdev;
4945 struct list_head *tmp;
4946
4947 seq_printf(seq, "unused devices: ");
4948
4949 rdev_for_each_list(rdev, tmp, pending_raid_disks) {
4950 char b[BDEVNAME_SIZE];
4951 i++;
4952 seq_printf(seq, "%s ",
4953 bdevname(rdev->bdev,b));
4954 }
4955 if (!i)
4956 seq_printf(seq, "<none>");
4957
4958 seq_printf(seq, "\n");
4959}
4960
4961
4962static void status_resync(struct seq_file *seq, mddev_t * mddev)
4963{
4964 sector_t max_blocks, resync, res;
4965 unsigned long dt, db, rt;
4966 int scale;
4967 unsigned int per_milli;
4968
4969 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
4970
4971 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
4972 max_blocks = mddev->resync_max_sectors >> 1;
4973 else
4974 max_blocks = mddev->size;
4975
4976
4977
4978
4979 if (!max_blocks) {
4980 MD_BUG();
4981 return;
4982 }
4983
4984
4985
4986
4987
4988 scale = 10;
4989 if (sizeof(sector_t) > sizeof(unsigned long)) {
4990 while ( max_blocks/2 > (1ULL<<(scale+32)))
4991 scale++;
4992 }
4993 res = (resync>>scale)*1000;
4994 sector_div(res, (u32)((max_blocks>>scale)+1));
4995
4996 per_milli = res;
4997 {
4998 int i, x = per_milli/50, y = 20-x;
4999 seq_printf(seq, "[");
5000 for (i = 0; i < x; i++)
5001 seq_printf(seq, "=");
5002 seq_printf(seq, ">");
5003 for (i = 0; i < y; i++)
5004 seq_printf(seq, ".");
5005 seq_printf(seq, "] ");
5006 }
5007 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
5008 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
5009 "reshape" :
5010 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
5011 "check" :
5012 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
5013 "resync" : "recovery"))),
5014 per_milli/10, per_milli % 10,
5015 (unsigned long long) resync,
5016 (unsigned long long) max_blocks);
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027 dt = ((jiffies - mddev->resync_mark) / HZ);
5028 if (!dt) dt++;
5029 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
5030 - mddev->resync_mark_cnt;
5031 rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100;
5032
5033 seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
5034
5035 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
5036}
5037
5038static void *md_seq_start(struct seq_file *seq, loff_t *pos)
5039{
5040 struct list_head *tmp;
5041 loff_t l = *pos;
5042 mddev_t *mddev;
5043
5044 if (l >= 0x10000)
5045 return NULL;
5046 if (!l--)
5047
5048 return (void*)1;
5049
5050 spin_lock(&all_mddevs_lock);
5051 list_for_each(tmp,&all_mddevs)
5052 if (!l--) {
5053 mddev = list_entry(tmp, mddev_t, all_mddevs);
5054 mddev_get(mddev);
5055 spin_unlock(&all_mddevs_lock);
5056 return mddev;
5057 }
5058 spin_unlock(&all_mddevs_lock);
5059 if (!l--)
5060 return (void*)2;
5061 return NULL;
5062}
5063
5064static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
5065{
5066 struct list_head *tmp;
5067 mddev_t *next_mddev, *mddev = v;
5068
5069 ++*pos;
5070 if (v == (void*)2)
5071 return NULL;
5072
5073 spin_lock(&all_mddevs_lock);
5074 if (v == (void*)1)
5075 tmp = all_mddevs.next;
5076 else
5077 tmp = mddev->all_mddevs.next;
5078 if (tmp != &all_mddevs)
5079 next_mddev = mddev_get(list_entry(tmp,mddev_t,all_mddevs));
5080 else {
5081 next_mddev = (void*)2;
5082 *pos = 0x10000;
5083 }
5084 spin_unlock(&all_mddevs_lock);
5085
5086 if (v != (void*)1)
5087 mddev_put(mddev);
5088 return next_mddev;
5089
5090}
5091
5092static void md_seq_stop(struct seq_file *seq, void *v)
5093{
5094 mddev_t *mddev = v;
5095
5096 if (mddev && v != (void*)1 && v != (void*)2)
5097 mddev_put(mddev);
5098}
5099
5100struct mdstat_info {
5101 int event;
5102};
5103
5104static int md_seq_show(struct seq_file *seq, void *v)
5105{
5106 mddev_t *mddev = v;
5107 sector_t size;
5108 struct list_head *tmp2;
5109 mdk_rdev_t *rdev;
5110 struct mdstat_info *mi = seq->private;
5111 struct bitmap *bitmap;
5112
5113 if (v == (void*)1) {
5114 struct mdk_personality *pers;
5115 seq_printf(seq, "Personalities : ");
5116 spin_lock(&pers_lock);
5117 list_for_each_entry(pers, &pers_list, list)
5118 seq_printf(seq, "[%s] ", pers->name);
5119
5120 spin_unlock(&pers_lock);
5121 seq_printf(seq, "\n");
5122 mi->event = atomic_read(&md_event_count);
5123 return 0;
5124 }
5125 if (v == (void*)2) {
5126 status_unused(seq);
5127 return 0;
5128 }
5129
5130 if (mddev_lock(mddev) < 0)
5131 return -EINTR;
5132
5133 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
5134 seq_printf(seq, "%s : %sactive", mdname(mddev),
5135 mddev->pers ? "" : "in");
5136 if (mddev->pers) {
5137 if (mddev->ro==1)
5138 seq_printf(seq, " (read-only)");
5139 if (mddev->ro==2)
5140 seq_printf(seq, " (auto-read-only)");
5141 seq_printf(seq, " %s", mddev->pers->name);
5142 }
5143
5144 size = 0;
5145 rdev_for_each(rdev, tmp2, mddev) {
5146 char b[BDEVNAME_SIZE];
5147 seq_printf(seq, " %s[%d]",
5148 bdevname(rdev->bdev,b), rdev->desc_nr);
5149 if (test_bit(WriteMostly, &rdev->flags))
5150 seq_printf(seq, "(W)");
5151 if (test_bit(Faulty, &rdev->flags)) {
5152 seq_printf(seq, "(F)");
5153 continue;
5154 } else if (rdev->raid_disk < 0)
5155 seq_printf(seq, "(S)");
5156 size += rdev->size;
5157 }
5158
5159 if (!list_empty(&mddev->disks)) {
5160 if (mddev->pers)
5161 seq_printf(seq, "\n %llu blocks",
5162 (unsigned long long)mddev->array_size);
5163 else
5164 seq_printf(seq, "\n %llu blocks",
5165 (unsigned long long)size);
5166 }
5167 if (mddev->persistent) {
5168 if (mddev->major_version != 0 ||
5169 mddev->minor_version != 90) {
5170 seq_printf(seq," super %d.%d",
5171 mddev->major_version,
5172 mddev->minor_version);
5173 }
5174 } else if (mddev->external)
5175 seq_printf(seq, " super external:%s",
5176 mddev->metadata_type);
5177 else
5178 seq_printf(seq, " super non-persistent");
5179
5180 if (mddev->pers) {
5181 mddev->pers->status (seq, mddev);
5182 seq_printf(seq, "\n ");
5183 if (mddev->pers->sync_request) {
5184 if (mddev->curr_resync > 2) {
5185 status_resync (seq, mddev);
5186 seq_printf(seq, "\n ");
5187 } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2)
5188 seq_printf(seq, "\tresync=DELAYED\n ");
5189 else if (mddev->recovery_cp < MaxSector)
5190 seq_printf(seq, "\tresync=PENDING\n ");
5191 }
5192 } else
5193 seq_printf(seq, "\n ");
5194
5195 if ((bitmap = mddev->bitmap)) {
5196 unsigned long chunk_kb;
5197 unsigned long flags;
5198 spin_lock_irqsave(&bitmap->lock, flags);
5199 chunk_kb = bitmap->chunksize >> 10;
5200 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
5201 "%lu%s chunk",
5202 bitmap->pages - bitmap->missing_pages,
5203 bitmap->pages,
5204 (bitmap->pages - bitmap->missing_pages)
5205 << (PAGE_SHIFT - 10),
5206 chunk_kb ? chunk_kb : bitmap->chunksize,
5207 chunk_kb ? "KB" : "B");
5208 if (bitmap->file) {
5209 seq_printf(seq, ", file: ");
5210 seq_path(seq, &bitmap->file->f_path, " \t\n");
5211 }
5212
5213 seq_printf(seq, "\n");
5214 spin_unlock_irqrestore(&bitmap->lock, flags);
5215 }
5216
5217 seq_printf(seq, "\n");
5218 }
5219 mddev_unlock(mddev);
5220
5221 return 0;
5222}
5223
5224static struct seq_operations md_seq_ops = {
5225 .start = md_seq_start,
5226 .next = md_seq_next,
5227 .stop = md_seq_stop,
5228 .show = md_seq_show,
5229};
5230
5231static int md_seq_open(struct inode *inode, struct file *file)
5232{
5233 int error;
5234 struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL);
5235 if (mi == NULL)
5236 return -ENOMEM;
5237
5238 error = seq_open(file, &md_seq_ops);
5239 if (error)
5240 kfree(mi);
5241 else {
5242 struct seq_file *p = file->private_data;
5243 p->private = mi;
5244 mi->event = atomic_read(&md_event_count);
5245 }
5246 return error;
5247}
5248
5249static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
5250{
5251 struct seq_file *m = filp->private_data;
5252 struct mdstat_info *mi = m->private;
5253 int mask;
5254
5255 poll_wait(filp, &md_event_waiters, wait);
5256
5257
5258 mask = POLLIN | POLLRDNORM;
5259
5260 if (mi->event != atomic_read(&md_event_count))
5261 mask |= POLLERR | POLLPRI;
5262 return mask;
5263}
5264
5265static const struct file_operations md_seq_fops = {
5266 .owner = THIS_MODULE,
5267 .open = md_seq_open,
5268 .read = seq_read,
5269 .llseek = seq_lseek,
5270 .release = seq_release_private,
5271 .poll = mdstat_poll,
5272};
5273
5274int register_md_personality(struct mdk_personality *p)
5275{
5276 spin_lock(&pers_lock);
5277 list_add_tail(&p->list, &pers_list);
5278 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
5279 spin_unlock(&pers_lock);
5280 return 0;
5281}
5282
5283int unregister_md_personality(struct mdk_personality *p)
5284{
5285 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
5286 spin_lock(&pers_lock);
5287 list_del_init(&p->list);
5288 spin_unlock(&pers_lock);
5289 return 0;
5290}
5291
5292static int is_mddev_idle(mddev_t *mddev)
5293{
5294 mdk_rdev_t * rdev;
5295 struct list_head *tmp;
5296 int idle;
5297 long curr_events;
5298
5299 idle = 1;
5300 rdev_for_each(rdev, tmp, mddev) {
5301 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
5302 curr_events = disk_stat_read(disk, sectors[0]) +
5303 disk_stat_read(disk, sectors[1]) -
5304 atomic_read(&disk->sync_io);
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327 if (curr_events - rdev->last_events > 4096) {
5328 rdev->last_events = curr_events;
5329 idle = 0;
5330 }
5331 }
5332 return idle;
5333}
5334
5335void md_done_sync(mddev_t *mddev, int blocks, int ok)
5336{
5337
5338 atomic_sub(blocks, &mddev->recovery_active);
5339 wake_up(&mddev->recovery_wait);
5340 if (!ok) {
5341 set_bit(MD_RECOVERY_ERR, &mddev->recovery);
5342 md_wakeup_thread(mddev->thread);
5343
5344 }
5345}
5346
5347
5348
5349
5350
5351
5352
5353void md_write_start(mddev_t *mddev, struct bio *bi)
5354{
5355 if (bio_data_dir(bi) != WRITE)
5356 return;
5357
5358 BUG_ON(mddev->ro == 1);
5359 if (mddev->ro == 2) {
5360
5361 mddev->ro = 0;
5362 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5363 md_wakeup_thread(mddev->thread);
5364 md_wakeup_thread(mddev->sync_thread);
5365 }
5366 atomic_inc(&mddev->writes_pending);
5367 if (mddev->in_sync) {
5368 spin_lock_irq(&mddev->write_lock);
5369 if (mddev->in_sync) {
5370 mddev->in_sync = 0;
5371 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
5372 md_wakeup_thread(mddev->thread);
5373 }
5374 spin_unlock_irq(&mddev->write_lock);
5375 }
5376 wait_event(mddev->sb_wait, mddev->flags==0);
5377}
5378
5379void md_write_end(mddev_t *mddev)
5380{
5381 if (atomic_dec_and_test(&mddev->writes_pending)) {
5382 if (mddev->safemode == 2)
5383 md_wakeup_thread(mddev->thread);
5384 else if (mddev->safemode_delay)
5385 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
5386 }
5387}
5388
5389
5390
5391
5392
5393
5394
5395void md_allow_write(mddev_t *mddev)
5396{
5397 if (!mddev->pers)
5398 return;
5399 if (mddev->ro)
5400 return;
5401
5402 spin_lock_irq(&mddev->write_lock);
5403 if (mddev->in_sync) {
5404 mddev->in_sync = 0;
5405 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
5406 if (mddev->safemode_delay &&
5407 mddev->safemode == 0)
5408 mddev->safemode = 1;
5409 spin_unlock_irq(&mddev->write_lock);
5410 md_update_sb(mddev, 0);
5411 } else
5412 spin_unlock_irq(&mddev->write_lock);
5413}
5414EXPORT_SYMBOL_GPL(md_allow_write);
5415
5416static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
5417
5418#define SYNC_MARKS 10
5419#define SYNC_MARK_STEP (3*HZ)
5420void md_do_sync(mddev_t *mddev)
5421{
5422 mddev_t *mddev2;
5423 unsigned int currspeed = 0,
5424 window;
5425 sector_t max_sectors,j, io_sectors;
5426 unsigned long mark[SYNC_MARKS];
5427 sector_t mark_cnt[SYNC_MARKS];
5428 int last_mark,m;
5429 struct list_head *tmp;
5430 sector_t last_check;
5431 int skipped = 0;
5432 struct list_head *rtmp;
5433 mdk_rdev_t *rdev;
5434 char *desc;
5435
5436
5437 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
5438 return;
5439 if (mddev->ro)
5440 return;
5441
5442 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
5443 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
5444 desc = "data-check";
5445 else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
5446 desc = "requested-resync";
5447 else
5448 desc = "resync";
5449 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5450 desc = "reshape";
5451 else
5452 desc = "recovery";
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470 do {
5471 mddev->curr_resync = 2;
5472
5473 try_again:
5474 if (kthread_should_stop()) {
5475 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5476 goto skip;
5477 }
5478 for_each_mddev(mddev2, tmp) {
5479 if (mddev2 == mddev)
5480 continue;
5481 if (mddev2->curr_resync &&
5482 match_mddev_units(mddev,mddev2)) {
5483 DEFINE_WAIT(wq);
5484 if (mddev < mddev2 && mddev->curr_resync == 2) {
5485
5486 mddev->curr_resync = 1;
5487 wake_up(&resync_wait);
5488 }
5489 if (mddev > mddev2 && mddev->curr_resync == 1)
5490
5491
5492
5493 continue;
5494 prepare_to_wait(&resync_wait, &wq, TASK_UNINTERRUPTIBLE);
5495 if (!kthread_should_stop() &&
5496 mddev2->curr_resync >= mddev->curr_resync) {
5497 printk(KERN_INFO "md: delaying %s of %s"
5498 " until %s has finished (they"
5499 " share one or more physical units)\n",
5500 desc, mdname(mddev), mdname(mddev2));
5501 mddev_put(mddev2);
5502 schedule();
5503 finish_wait(&resync_wait, &wq);
5504 goto try_again;
5505 }
5506 finish_wait(&resync_wait, &wq);
5507 }
5508 }
5509 } while (mddev->curr_resync < 2);
5510
5511 j = 0;
5512 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
5513
5514
5515
5516 max_sectors = mddev->resync_max_sectors;
5517 mddev->resync_mismatches = 0;
5518
5519 if (!mddev->bitmap &&
5520 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
5521 j = mddev->recovery_cp;
5522 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5523 max_sectors = mddev->size << 1;
5524 else {
5525
5526 max_sectors = mddev->size << 1;
5527 j = MaxSector;
5528 rdev_for_each(rdev, rtmp, mddev)
5529 if (rdev->raid_disk >= 0 &&
5530 !test_bit(Faulty, &rdev->flags) &&
5531 !test_bit(In_sync, &rdev->flags) &&
5532 rdev->recovery_offset < j)
5533 j = rdev->recovery_offset;
5534 }
5535
5536 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
5537 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
5538 " %d KB/sec/disk.\n", speed_min(mddev));
5539 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
5540 "(but not more than %d KB/sec) for %s.\n",
5541 speed_max(mddev), desc);
5542
5543 is_mddev_idle(mddev);
5544
5545 io_sectors = 0;
5546 for (m = 0; m < SYNC_MARKS; m++) {
5547 mark[m] = jiffies;
5548 mark_cnt[m] = io_sectors;
5549 }
5550 last_mark = 0;
5551 mddev->resync_mark = mark[last_mark];
5552 mddev->resync_mark_cnt = mark_cnt[last_mark];
5553
5554
5555
5556
5557 window = 32*(PAGE_SIZE/512);
5558 printk(KERN_INFO "md: using %dk window, over a total of %llu blocks.\n",
5559 window/2,(unsigned long long) max_sectors/2);
5560
5561 atomic_set(&mddev->recovery_active, 0);
5562 init_waitqueue_head(&mddev->recovery_wait);
5563 last_check = 0;
5564
5565 if (j>2) {
5566 printk(KERN_INFO
5567 "md: resuming %s of %s from checkpoint.\n",
5568 desc, mdname(mddev));
5569 mddev->curr_resync = j;
5570 }
5571
5572 while (j < max_sectors) {
5573 sector_t sectors;
5574
5575 skipped = 0;
5576 if (j >= mddev->resync_max) {
5577 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
5578 wait_event(mddev->recovery_wait,
5579 mddev->resync_max > j
5580 || kthread_should_stop());
5581 }
5582 if (kthread_should_stop())
5583 goto interrupted;
5584 sectors = mddev->pers->sync_request(mddev, j, &skipped,
5585 currspeed < speed_min(mddev));
5586 if (sectors == 0) {
5587 set_bit(MD_RECOVERY_ERR, &mddev->recovery);
5588 goto out;
5589 }
5590
5591 if (!skipped) {
5592 io_sectors += sectors;
5593 atomic_add(sectors, &mddev->recovery_active);
5594 }
5595
5596 j += sectors;
5597 if (j>1) mddev->curr_resync = j;
5598 mddev->curr_mark_cnt = io_sectors;
5599 if (last_check == 0)
5600
5601
5602
5603 md_new_event(mddev);
5604
5605 if (last_check + window > io_sectors || j == max_sectors)
5606 continue;
5607
5608 last_check = io_sectors;
5609
5610 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) ||
5611 test_bit(MD_RECOVERY_ERR, &mddev->recovery))
5612 break;
5613
5614 repeat:
5615 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
5616
5617 int next = (last_mark+1) % SYNC_MARKS;
5618
5619 mddev->resync_mark = mark[next];
5620 mddev->resync_mark_cnt = mark_cnt[next];
5621 mark[next] = jiffies;
5622 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
5623 last_mark = next;
5624 }
5625
5626
5627 if (kthread_should_stop())
5628 goto interrupted;
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639 blk_unplug(mddev->queue);
5640 cond_resched();
5641
5642 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
5643 /((jiffies-mddev->resync_mark)/HZ +1) +1;
5644
5645 if (currspeed > speed_min(mddev)) {
5646 if ((currspeed > speed_max(mddev)) ||
5647 !is_mddev_idle(mddev)) {
5648 msleep(500);
5649 goto repeat;
5650 }
5651 }
5652 }
5653 printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
5654
5655
5656
5657 out:
5658 blk_unplug(mddev->queue);
5659
5660 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
5661
5662
5663 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
5664
5665 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
5666 !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
5667 mddev->curr_resync > 2) {
5668 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
5669 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
5670 if (mddev->curr_resync >= mddev->recovery_cp) {
5671 printk(KERN_INFO
5672 "md: checkpointing %s of %s.\n",
5673 desc, mdname(mddev));
5674 mddev->recovery_cp = mddev->curr_resync;
5675 }
5676 } else
5677 mddev->recovery_cp = MaxSector;
5678 } else {
5679 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
5680 mddev->curr_resync = MaxSector;
5681 rdev_for_each(rdev, rtmp, mddev)
5682 if (rdev->raid_disk >= 0 &&
5683 !test_bit(Faulty, &rdev->flags) &&
5684 !test_bit(In_sync, &rdev->flags) &&
5685 rdev->recovery_offset < mddev->curr_resync)
5686 rdev->recovery_offset = mddev->curr_resync;
5687 }
5688 }
5689 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5690
5691 skip:
5692 mddev->curr_resync = 0;
5693 mddev->resync_max = MaxSector;
5694 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
5695 wake_up(&resync_wait);
5696 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
5697 md_wakeup_thread(mddev->thread);
5698 return;
5699
5700 interrupted:
5701
5702
5703
5704 printk(KERN_INFO
5705 "md: md_do_sync() got signal ... exiting\n");
5706 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5707 goto out;
5708
5709}
5710EXPORT_SYMBOL_GPL(md_do_sync);
5711
5712
5713static int remove_and_add_spares(mddev_t *mddev)
5714{
5715 mdk_rdev_t *rdev;
5716 struct list_head *rtmp;
5717 int spares = 0;
5718
5719 rdev_for_each(rdev, rtmp, mddev)
5720 if (rdev->raid_disk >= 0 &&
5721 !mddev->external &&
5722 (test_bit(Faulty, &rdev->flags) ||
5723 ! test_bit(In_sync, &rdev->flags)) &&
5724 atomic_read(&rdev->nr_pending)==0) {
5725 if (mddev->pers->hot_remove_disk(
5726 mddev, rdev->raid_disk)==0) {
5727 char nm[20];
5728 sprintf(nm,"rd%d", rdev->raid_disk);
5729 sysfs_remove_link(&mddev->kobj, nm);
5730 rdev->raid_disk = -1;
5731 }
5732 }
5733
5734 if (mddev->degraded) {
5735 rdev_for_each(rdev, rtmp, mddev)
5736 if (rdev->raid_disk < 0
5737 && !test_bit(Faulty, &rdev->flags)) {
5738 rdev->recovery_offset = 0;
5739 if (mddev->pers->hot_add_disk(mddev,rdev)) {
5740 char nm[20];
5741 sprintf(nm, "rd%d", rdev->raid_disk);
5742 if (sysfs_create_link(&mddev->kobj,
5743 &rdev->kobj, nm))
5744 printk(KERN_WARNING
5745 "md: cannot register "
5746 "%s for %s\n",
5747 nm, mdname(mddev));
5748 spares++;
5749 md_new_event(mddev);
5750 } else
5751 break;
5752 }
5753 }
5754 return spares;
5755}
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778void md_check_recovery(mddev_t *mddev)
5779{
5780 mdk_rdev_t *rdev;
5781 struct list_head *rtmp;
5782
5783
5784 if (mddev->bitmap)
5785 bitmap_daemon_work(mddev->bitmap);
5786
5787 if (mddev->ro)
5788 return;
5789
5790 if (signal_pending(current)) {
5791 if (mddev->pers->sync_request) {
5792 printk(KERN_INFO "md: %s in immediate safe mode\n",
5793 mdname(mddev));
5794 mddev->safemode = 2;
5795 }
5796 flush_signals(current);
5797 }
5798
5799 if ( ! (
5800 (mddev->flags && !mddev->external) ||
5801 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
5802 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
5803 (mddev->safemode == 1) ||
5804 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
5805 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
5806 ))
5807 return;
5808
5809 if (mddev_trylock(mddev)) {
5810 int spares = 0;
5811
5812 spin_lock_irq(&mddev->write_lock);
5813 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
5814 !mddev->in_sync && mddev->recovery_cp == MaxSector) {
5815 mddev->in_sync = 1;
5816 if (mddev->persistent)
5817 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
5818 }
5819 if (mddev->safemode == 1)
5820 mddev->safemode = 0;
5821 spin_unlock_irq(&mddev->write_lock);
5822
5823 if (mddev->flags)
5824 md_update_sb(mddev, 0);
5825
5826
5827 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
5828 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
5829
5830 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5831 goto unlock;
5832 }
5833 if (mddev->sync_thread) {
5834
5835 md_unregister_thread(mddev->sync_thread);
5836 mddev->sync_thread = NULL;
5837 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
5838 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
5839
5840
5841 mddev->pers->spare_active(mddev);
5842 }
5843 md_update_sb(mddev, 1);
5844
5845
5846
5847
5848 if (!mddev->degraded)
5849 rdev_for_each(rdev, rtmp, mddev)
5850 rdev->saved_raid_disk = -1;
5851
5852 mddev->recovery = 0;
5853
5854 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5855 md_new_event(mddev);
5856 goto unlock;
5857 }
5858
5859
5860
5861 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5862 clear_bit(MD_RECOVERY_ERR, &mddev->recovery);
5863 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
5864 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
5865
5866 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
5867 goto unlock;
5868
5869
5870
5871
5872
5873
5874
5875 if (mddev->reshape_position != MaxSector) {
5876 if (mddev->pers->check_reshape(mddev) != 0)
5877
5878 goto unlock;
5879 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
5880 } else if ((spares = remove_and_add_spares(mddev))) {
5881 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
5882 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
5883 } else if (mddev->recovery_cp < MaxSector) {
5884 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
5885 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
5886
5887 goto unlock;
5888
5889 if (mddev->pers->sync_request) {
5890 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
5891 if (spares && mddev->bitmap && ! mddev->bitmap->file) {
5892
5893
5894
5895
5896 bitmap_write_all(mddev->bitmap);
5897 }
5898 mddev->sync_thread = md_register_thread(md_do_sync,
5899 mddev,
5900 "%s_resync");
5901 if (!mddev->sync_thread) {
5902 printk(KERN_ERR "%s: could not start resync"
5903 " thread...\n",
5904 mdname(mddev));
5905
5906 mddev->recovery = 0;
5907 } else
5908 md_wakeup_thread(mddev->sync_thread);
5909 md_new_event(mddev);
5910 }
5911 unlock:
5912 mddev_unlock(mddev);
5913 }
5914}
5915
5916static int md_notify_reboot(struct notifier_block *this,
5917 unsigned long code, void *x)
5918{
5919 struct list_head *tmp;
5920 mddev_t *mddev;
5921
5922 if ((code == SYS_DOWN) || (code == SYS_HALT) || (code == SYS_POWER_OFF)) {
5923
5924 printk(KERN_INFO "md: stopping all md devices.\n");
5925
5926 for_each_mddev(mddev, tmp)
5927 if (mddev_trylock(mddev)) {
5928 do_md_stop (mddev, 1);
5929 mddev_unlock(mddev);
5930 }
5931
5932
5933
5934
5935
5936
5937 mdelay(1000*1);
5938 }
5939 return NOTIFY_DONE;
5940}
5941
5942static struct notifier_block md_notifier = {
5943 .notifier_call = md_notify_reboot,
5944 .next = NULL,
5945 .priority = INT_MAX,
5946};
5947
5948static void md_geninit(void)
5949{
5950 struct proc_dir_entry *p;
5951
5952 dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
5953
5954 p = create_proc_entry("mdstat", S_IRUGO, NULL);
5955 if (p)
5956 p->proc_fops = &md_seq_fops;
5957}
5958
5959static int __init md_init(void)
5960{
5961 if (register_blkdev(MAJOR_NR, "md"))
5962 return -1;
5963 if ((mdp_major=register_blkdev(0, "mdp"))<=0) {
5964 unregister_blkdev(MAJOR_NR, "md");
5965 return -1;
5966 }
5967 blk_register_region(MKDEV(MAJOR_NR, 0), 1UL<<MINORBITS, THIS_MODULE,
5968 md_probe, NULL, NULL);
5969 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
5970 md_probe, NULL, NULL);
5971
5972 register_reboot_notifier(&md_notifier);
5973 raid_table_header = register_sysctl_table(raid_root_table);
5974
5975 md_geninit();
5976 return (0);
5977}
5978
5979
5980#ifndef MODULE
5981
5982
5983
5984
5985
5986
5987static LIST_HEAD(all_detected_devices);
5988struct detected_devices_node {
5989 struct list_head list;
5990 dev_t dev;
5991};
5992
5993void md_autodetect_dev(dev_t dev)
5994{
5995 struct detected_devices_node *node_detected_dev;
5996
5997 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
5998 if (node_detected_dev) {
5999 node_detected_dev->dev = dev;
6000 list_add_tail(&node_detected_dev->list, &all_detected_devices);
6001 } else {
6002 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
6003 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
6004 }
6005}
6006
6007
6008static void autostart_arrays(int part)
6009{
6010 mdk_rdev_t *rdev;
6011 struct detected_devices_node *node_detected_dev;
6012 dev_t dev;
6013 int i_scanned, i_passed;
6014
6015 i_scanned = 0;
6016 i_passed = 0;
6017
6018 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
6019
6020 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
6021 i_scanned++;
6022 node_detected_dev = list_entry(all_detected_devices.next,
6023 struct detected_devices_node, list);
6024 list_del(&node_detected_dev->list);
6025 dev = node_detected_dev->dev;
6026 kfree(node_detected_dev);
6027 rdev = md_import_device(dev,0, 90);
6028 if (IS_ERR(rdev))
6029 continue;
6030
6031 if (test_bit(Faulty, &rdev->flags)) {
6032 MD_BUG();
6033 continue;
6034 }
6035 set_bit(AutoDetected, &rdev->flags);
6036 list_add(&rdev->same_set, &pending_raid_disks);
6037 i_passed++;
6038 }
6039
6040 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
6041 i_scanned, i_passed);
6042
6043 autorun_devices(part);
6044}
6045
6046#endif
6047
6048static __exit void md_exit(void)
6049{
6050 mddev_t *mddev;
6051 struct list_head *tmp;
6052
6053 blk_unregister_region(MKDEV(MAJOR_NR,0), 1U << MINORBITS);
6054 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
6055
6056 unregister_blkdev(MAJOR_NR,"md");
6057 unregister_blkdev(mdp_major, "mdp");
6058 unregister_reboot_notifier(&md_notifier);
6059 unregister_sysctl_table(raid_table_header);
6060 remove_proc_entry("mdstat", NULL);
6061 for_each_mddev(mddev, tmp) {
6062 struct gendisk *disk = mddev->gendisk;
6063 if (!disk)
6064 continue;
6065 export_array(mddev);
6066 del_gendisk(disk);
6067 put_disk(disk);
6068 mddev->gendisk = NULL;
6069 mddev_put(mddev);
6070 }
6071}
6072
6073subsys_initcall(md_init);
6074module_exit(md_exit)
6075
6076static int get_ro(char *buffer, struct kernel_param *kp)
6077{
6078 return sprintf(buffer, "%d", start_readonly);
6079}
6080static int set_ro(const char *val, struct kernel_param *kp)
6081{
6082 char *e;
6083 int num = simple_strtoul(val, &e, 10);
6084 if (*val && (*e == '\0' || *e == '\n')) {
6085 start_readonly = num;
6086 return 0;
6087 }
6088 return -EINVAL;
6089}
6090
6091module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
6092module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
6093
6094
6095EXPORT_SYMBOL(register_md_personality);
6096EXPORT_SYMBOL(unregister_md_personality);
6097EXPORT_SYMBOL(md_error);
6098EXPORT_SYMBOL(md_done_sync);
6099EXPORT_SYMBOL(md_write_start);
6100EXPORT_SYMBOL(md_write_end);
6101EXPORT_SYMBOL(md_register_thread);
6102EXPORT_SYMBOL(md_unregister_thread);
6103EXPORT_SYMBOL(md_wakeup_thread);
6104EXPORT_SYMBOL(md_check_recovery);
6105MODULE_LICENSE("GPL");
6106MODULE_ALIAS("md");
6107MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
6108