1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/buffer_head.h>
40#include <linux/poll.h>
41#include <linux/ctype.h>
42#include <linux/hdreg.h>
43#include <linux/proc_fs.h>
44#include <linux/random.h>
45#include <linux/reboot.h>
46#include <linux/file.h>
47#include <linux/delay.h>
48#include <linux/raid/md_p.h>
49#include <linux/raid/md_u.h>
50#include "md.h"
51#include "bitmap.h"
52
53#define DEBUG 0
54#define dprintk(x...) ((void)(DEBUG && printk(x)))
55
56
57#ifndef MODULE
58static void autostart_arrays(int part);
59#endif
60
61static LIST_HEAD(pers_list);
62static DEFINE_SPINLOCK(pers_lock);
63
64static void md_print_devices(void);
65
66static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
67
68#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83static int sysctl_speed_limit_min = 1000;
84static int sysctl_speed_limit_max = 200000;
85static inline int speed_min(mddev_t *mddev)
86{
87 return mddev->sync_speed_min ?
88 mddev->sync_speed_min : sysctl_speed_limit_min;
89}
90
91static inline int speed_max(mddev_t *mddev)
92{
93 return mddev->sync_speed_max ?
94 mddev->sync_speed_max : sysctl_speed_limit_max;
95}
96
97static struct ctl_table_header *raid_table_header;
98
99static ctl_table raid_table[] = {
100 {
101 .ctl_name = DEV_RAID_SPEED_LIMIT_MIN,
102 .procname = "speed_limit_min",
103 .data = &sysctl_speed_limit_min,
104 .maxlen = sizeof(int),
105 .mode = S_IRUGO|S_IWUSR,
106 .proc_handler = &proc_dointvec,
107 },
108 {
109 .ctl_name = DEV_RAID_SPEED_LIMIT_MAX,
110 .procname = "speed_limit_max",
111 .data = &sysctl_speed_limit_max,
112 .maxlen = sizeof(int),
113 .mode = S_IRUGO|S_IWUSR,
114 .proc_handler = &proc_dointvec,
115 },
116 { .ctl_name = 0 }
117};
118
119static ctl_table raid_dir_table[] = {
120 {
121 .ctl_name = DEV_RAID,
122 .procname = "raid",
123 .maxlen = 0,
124 .mode = S_IRUGO|S_IXUGO,
125 .child = raid_table,
126 },
127 { .ctl_name = 0 }
128};
129
130static ctl_table raid_root_table[] = {
131 {
132 .ctl_name = CTL_DEV,
133 .procname = "dev",
134 .maxlen = 0,
135 .mode = 0555,
136 .child = raid_dir_table,
137 },
138 { .ctl_name = 0 }
139};
140
141static struct block_device_operations md_fops;
142
143static int start_readonly;
144
145
146
147
148
149
150
151
152
153
154
155static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
156static atomic_t md_event_count;
157void md_new_event(mddev_t *mddev)
158{
159 atomic_inc(&md_event_count);
160 wake_up(&md_event_waiters);
161}
162EXPORT_SYMBOL_GPL(md_new_event);
163
164
165
166
167static void md_new_event_inintr(mddev_t *mddev)
168{
169 atomic_inc(&md_event_count);
170 wake_up(&md_event_waiters);
171}
172
173
174
175
176
177static LIST_HEAD(all_mddevs);
178static DEFINE_SPINLOCK(all_mddevs_lock);
179
180
181
182
183
184
185
186
187
188#define for_each_mddev(mddev,tmp) \
189 \
190 for (({ spin_lock(&all_mddevs_lock); \
191 tmp = all_mddevs.next; \
192 mddev = NULL;}); \
193 ({ if (tmp != &all_mddevs) \
194 mddev_get(list_entry(tmp, mddev_t, all_mddevs));\
195 spin_unlock(&all_mddevs_lock); \
196 if (mddev) mddev_put(mddev); \
197 mddev = list_entry(tmp, mddev_t, all_mddevs); \
198 tmp != &all_mddevs;}); \
199 ({ spin_lock(&all_mddevs_lock); \
200 tmp = tmp->next;}) \
201 )
202
203
204
205
206
207
208
209
210
211static int md_make_request(struct request_queue *q, struct bio *bio)
212{
213 mddev_t *mddev = q->queuedata;
214 int rv;
215 if (mddev == NULL || mddev->pers == NULL) {
216 bio_io_error(bio);
217 return 0;
218 }
219 rcu_read_lock();
220 if (mddev->suspended) {
221 DEFINE_WAIT(__wait);
222 for (;;) {
223 prepare_to_wait(&mddev->sb_wait, &__wait,
224 TASK_UNINTERRUPTIBLE);
225 if (!mddev->suspended)
226 break;
227 rcu_read_unlock();
228 schedule();
229 rcu_read_lock();
230 }
231 finish_wait(&mddev->sb_wait, &__wait);
232 }
233 atomic_inc(&mddev->active_io);
234 rcu_read_unlock();
235 rv = mddev->pers->make_request(q, bio);
236 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
237 wake_up(&mddev->sb_wait);
238
239 return rv;
240}
241
242static void mddev_suspend(mddev_t *mddev)
243{
244 BUG_ON(mddev->suspended);
245 mddev->suspended = 1;
246 synchronize_rcu();
247 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
248 mddev->pers->quiesce(mddev, 1);
249 md_unregister_thread(mddev->thread);
250 mddev->thread = NULL;
251
252
253
254
255
256}
257
258static void mddev_resume(mddev_t *mddev)
259{
260 mddev->suspended = 0;
261 wake_up(&mddev->sb_wait);
262 mddev->pers->quiesce(mddev, 0);
263}
264
265
266static inline mddev_t *mddev_get(mddev_t *mddev)
267{
268 atomic_inc(&mddev->active);
269 return mddev;
270}
271
272static void mddev_delayed_delete(struct work_struct *ws);
273
274static void mddev_put(mddev_t *mddev)
275{
276 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
277 return;
278 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
279 !mddev->hold_active) {
280 list_del(&mddev->all_mddevs);
281 if (mddev->gendisk) {
282
283
284
285
286
287
288 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
289 schedule_work(&mddev->del_work);
290 } else
291 kfree(mddev);
292 }
293 spin_unlock(&all_mddevs_lock);
294}
295
296static mddev_t * mddev_find(dev_t unit)
297{
298 mddev_t *mddev, *new = NULL;
299
300 retry:
301 spin_lock(&all_mddevs_lock);
302
303 if (unit) {
304 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
305 if (mddev->unit == unit) {
306 mddev_get(mddev);
307 spin_unlock(&all_mddevs_lock);
308 kfree(new);
309 return mddev;
310 }
311
312 if (new) {
313 list_add(&new->all_mddevs, &all_mddevs);
314 spin_unlock(&all_mddevs_lock);
315 new->hold_active = UNTIL_IOCTL;
316 return new;
317 }
318 } else if (new) {
319
320 static int next_minor = 512;
321 int start = next_minor;
322 int is_free = 0;
323 int dev = 0;
324 while (!is_free) {
325 dev = MKDEV(MD_MAJOR, next_minor);
326 next_minor++;
327 if (next_minor > MINORMASK)
328 next_minor = 0;
329 if (next_minor == start) {
330
331 spin_unlock(&all_mddevs_lock);
332 kfree(new);
333 return NULL;
334 }
335
336 is_free = 1;
337 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
338 if (mddev->unit == dev) {
339 is_free = 0;
340 break;
341 }
342 }
343 new->unit = dev;
344 new->md_minor = MINOR(dev);
345 new->hold_active = UNTIL_STOP;
346 list_add(&new->all_mddevs, &all_mddevs);
347 spin_unlock(&all_mddevs_lock);
348 return new;
349 }
350 spin_unlock(&all_mddevs_lock);
351
352 new = kzalloc(sizeof(*new), GFP_KERNEL);
353 if (!new)
354 return NULL;
355
356 new->unit = unit;
357 if (MAJOR(unit) == MD_MAJOR)
358 new->md_minor = MINOR(unit);
359 else
360 new->md_minor = MINOR(unit) >> MdpMinorShift;
361
362 mutex_init(&new->open_mutex);
363 mutex_init(&new->reconfig_mutex);
364 INIT_LIST_HEAD(&new->disks);
365 INIT_LIST_HEAD(&new->all_mddevs);
366 init_timer(&new->safemode_timer);
367 atomic_set(&new->active, 1);
368 atomic_set(&new->openers, 0);
369 atomic_set(&new->active_io, 0);
370 spin_lock_init(&new->write_lock);
371 init_waitqueue_head(&new->sb_wait);
372 init_waitqueue_head(&new->recovery_wait);
373 new->reshape_position = MaxSector;
374 new->resync_min = 0;
375 new->resync_max = MaxSector;
376 new->level = LEVEL_NONE;
377
378 goto retry;
379}
380
381static inline int mddev_lock(mddev_t * mddev)
382{
383 return mutex_lock_interruptible(&mddev->reconfig_mutex);
384}
385
386static inline int mddev_is_locked(mddev_t *mddev)
387{
388 return mutex_is_locked(&mddev->reconfig_mutex);
389}
390
391static inline int mddev_trylock(mddev_t * mddev)
392{
393 return mutex_trylock(&mddev->reconfig_mutex);
394}
395
396static inline void mddev_unlock(mddev_t * mddev)
397{
398 mutex_unlock(&mddev->reconfig_mutex);
399
400 md_wakeup_thread(mddev->thread);
401}
402
403static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
404{
405 mdk_rdev_t *rdev;
406
407 list_for_each_entry(rdev, &mddev->disks, same_set)
408 if (rdev->desc_nr == nr)
409 return rdev;
410
411 return NULL;
412}
413
414static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
415{
416 mdk_rdev_t *rdev;
417
418 list_for_each_entry(rdev, &mddev->disks, same_set)
419 if (rdev->bdev->bd_dev == dev)
420 return rdev;
421
422 return NULL;
423}
424
425static struct mdk_personality *find_pers(int level, char *clevel)
426{
427 struct mdk_personality *pers;
428 list_for_each_entry(pers, &pers_list, list) {
429 if (level != LEVEL_NONE && pers->level == level)
430 return pers;
431 if (strcmp(pers->name, clevel)==0)
432 return pers;
433 }
434 return NULL;
435}
436
437
438static inline sector_t calc_dev_sboffset(struct block_device *bdev)
439{
440 sector_t num_sectors = bdev->bd_inode->i_size / 512;
441 return MD_NEW_SIZE_SECTORS(num_sectors);
442}
443
444static int alloc_disk_sb(mdk_rdev_t * rdev)
445{
446 if (rdev->sb_page)
447 MD_BUG();
448
449 rdev->sb_page = alloc_page(GFP_KERNEL);
450 if (!rdev->sb_page) {
451 printk(KERN_ALERT "md: out of memory.\n");
452 return -ENOMEM;
453 }
454
455 return 0;
456}
457
458static void free_disk_sb(mdk_rdev_t * rdev)
459{
460 if (rdev->sb_page) {
461 put_page(rdev->sb_page);
462 rdev->sb_loaded = 0;
463 rdev->sb_page = NULL;
464 rdev->sb_start = 0;
465 rdev->sectors = 0;
466 }
467}
468
469
470static void super_written(struct bio *bio, int error)
471{
472 mdk_rdev_t *rdev = bio->bi_private;
473 mddev_t *mddev = rdev->mddev;
474
475 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
476 printk("md: super_written gets error=%d, uptodate=%d\n",
477 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
478 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
479 md_error(mddev, rdev);
480 }
481
482 if (atomic_dec_and_test(&mddev->pending_writes))
483 wake_up(&mddev->sb_wait);
484 bio_put(bio);
485}
486
487static void super_written_barrier(struct bio *bio, int error)
488{
489 struct bio *bio2 = bio->bi_private;
490 mdk_rdev_t *rdev = bio2->bi_private;
491 mddev_t *mddev = rdev->mddev;
492
493 if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
494 error == -EOPNOTSUPP) {
495 unsigned long flags;
496
497 set_bit(BarriersNotsupp, &rdev->flags);
498 mddev->barriers_work = 0;
499 spin_lock_irqsave(&mddev->write_lock, flags);
500 bio2->bi_next = mddev->biolist;
501 mddev->biolist = bio2;
502 spin_unlock_irqrestore(&mddev->write_lock, flags);
503 wake_up(&mddev->sb_wait);
504 bio_put(bio);
505 } else {
506 bio_put(bio2);
507 bio->bi_private = rdev;
508 super_written(bio, error);
509 }
510}
511
512void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
513 sector_t sector, int size, struct page *page)
514{
515
516
517
518
519
520
521
522
523
524 struct bio *bio = bio_alloc(GFP_NOIO, 1);
525 int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG);
526
527 bio->bi_bdev = rdev->bdev;
528 bio->bi_sector = sector;
529 bio_add_page(bio, page, size, 0);
530 bio->bi_private = rdev;
531 bio->bi_end_io = super_written;
532 bio->bi_rw = rw;
533
534 atomic_inc(&mddev->pending_writes);
535 if (!test_bit(BarriersNotsupp, &rdev->flags)) {
536 struct bio *rbio;
537 rw |= (1<<BIO_RW_BARRIER);
538 rbio = bio_clone(bio, GFP_NOIO);
539 rbio->bi_private = bio;
540 rbio->bi_end_io = super_written_barrier;
541 submit_bio(rw, rbio);
542 } else
543 submit_bio(rw, bio);
544}
545
546void md_super_wait(mddev_t *mddev)
547{
548
549
550
551 DEFINE_WAIT(wq);
552 for(;;) {
553 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
554 if (atomic_read(&mddev->pending_writes)==0)
555 break;
556 while (mddev->biolist) {
557 struct bio *bio;
558 spin_lock_irq(&mddev->write_lock);
559 bio = mddev->biolist;
560 mddev->biolist = bio->bi_next ;
561 bio->bi_next = NULL;
562 spin_unlock_irq(&mddev->write_lock);
563 submit_bio(bio->bi_rw, bio);
564 }
565 schedule();
566 }
567 finish_wait(&mddev->sb_wait, &wq);
568}
569
570static void bi_complete(struct bio *bio, int error)
571{
572 complete((struct completion*)bio->bi_private);
573}
574
575int sync_page_io(struct block_device *bdev, sector_t sector, int size,
576 struct page *page, int rw)
577{
578 struct bio *bio = bio_alloc(GFP_NOIO, 1);
579 struct completion event;
580 int ret;
581
582 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
583
584 bio->bi_bdev = bdev;
585 bio->bi_sector = sector;
586 bio_add_page(bio, page, size, 0);
587 init_completion(&event);
588 bio->bi_private = &event;
589 bio->bi_end_io = bi_complete;
590 submit_bio(rw, bio);
591 wait_for_completion(&event);
592
593 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
594 bio_put(bio);
595 return ret;
596}
597EXPORT_SYMBOL_GPL(sync_page_io);
598
599static int read_disk_sb(mdk_rdev_t * rdev, int size)
600{
601 char b[BDEVNAME_SIZE];
602 if (!rdev->sb_page) {
603 MD_BUG();
604 return -EINVAL;
605 }
606 if (rdev->sb_loaded)
607 return 0;
608
609
610 if (!sync_page_io(rdev->bdev, rdev->sb_start, size, rdev->sb_page, READ))
611 goto fail;
612 rdev->sb_loaded = 1;
613 return 0;
614
615fail:
616 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
617 bdevname(rdev->bdev,b));
618 return -EINVAL;
619}
620
621static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
622{
623 return sb1->set_uuid0 == sb2->set_uuid0 &&
624 sb1->set_uuid1 == sb2->set_uuid1 &&
625 sb1->set_uuid2 == sb2->set_uuid2 &&
626 sb1->set_uuid3 == sb2->set_uuid3;
627}
628
629static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
630{
631 int ret;
632 mdp_super_t *tmp1, *tmp2;
633
634 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
635 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
636
637 if (!tmp1 || !tmp2) {
638 ret = 0;
639 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
640 goto abort;
641 }
642
643 *tmp1 = *sb1;
644 *tmp2 = *sb2;
645
646
647
648
649 tmp1->nr_disks = 0;
650 tmp2->nr_disks = 0;
651
652 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
653abort:
654 kfree(tmp1);
655 kfree(tmp2);
656 return ret;
657}
658
659
660static u32 md_csum_fold(u32 csum)
661{
662 csum = (csum & 0xffff) + (csum >> 16);
663 return (csum & 0xffff) + (csum >> 16);
664}
665
666static unsigned int calc_sb_csum(mdp_super_t * sb)
667{
668 u64 newcsum = 0;
669 u32 *sb32 = (u32*)sb;
670 int i;
671 unsigned int disk_csum, csum;
672
673 disk_csum = sb->sb_csum;
674 sb->sb_csum = 0;
675
676 for (i = 0; i < MD_SB_BYTES/4 ; i++)
677 newcsum += sb32[i];
678 csum = (newcsum & 0xffffffff) + (newcsum>>32);
679
680
681#ifdef CONFIG_ALPHA
682
683
684
685
686
687
688
689
690 sb->sb_csum = md_csum_fold(disk_csum);
691#else
692 sb->sb_csum = disk_csum;
693#endif
694 return csum;
695}
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728struct super_type {
729 char *name;
730 struct module *owner;
731 int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev,
732 int minor_version);
733 int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);
734 void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);
735 unsigned long long (*rdev_size_change)(mdk_rdev_t *rdev,
736 sector_t num_sectors);
737};
738
739
740
741
742
743
744
745
746
747int md_check_no_bitmap(mddev_t *mddev)
748{
749 if (!mddev->bitmap_file && !mddev->bitmap_offset)
750 return 0;
751 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
752 mdname(mddev), mddev->pers->name);
753 return 1;
754}
755EXPORT_SYMBOL(md_check_no_bitmap);
756
757
758
759
760static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
761{
762 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
763 mdp_super_t *sb;
764 int ret;
765
766
767
768
769
770
771
772 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
773
774 ret = read_disk_sb(rdev, MD_SB_BYTES);
775 if (ret) return ret;
776
777 ret = -EINVAL;
778
779 bdevname(rdev->bdev, b);
780 sb = (mdp_super_t*)page_address(rdev->sb_page);
781
782 if (sb->md_magic != MD_SB_MAGIC) {
783 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
784 b);
785 goto abort;
786 }
787
788 if (sb->major_version != 0 ||
789 sb->minor_version < 90 ||
790 sb->minor_version > 91) {
791 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
792 sb->major_version, sb->minor_version,
793 b);
794 goto abort;
795 }
796
797 if (sb->raid_disks <= 0)
798 goto abort;
799
800 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
801 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
802 b);
803 goto abort;
804 }
805
806 rdev->preferred_minor = sb->md_minor;
807 rdev->data_offset = 0;
808 rdev->sb_size = MD_SB_BYTES;
809
810 if (sb->level == LEVEL_MULTIPATH)
811 rdev->desc_nr = -1;
812 else
813 rdev->desc_nr = sb->this_disk.number;
814
815 if (!refdev) {
816 ret = 1;
817 } else {
818 __u64 ev1, ev2;
819 mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
820 if (!uuid_equal(refsb, sb)) {
821 printk(KERN_WARNING "md: %s has different UUID to %s\n",
822 b, bdevname(refdev->bdev,b2));
823 goto abort;
824 }
825 if (!sb_equal(refsb, sb)) {
826 printk(KERN_WARNING "md: %s has same UUID"
827 " but different superblock to %s\n",
828 b, bdevname(refdev->bdev, b2));
829 goto abort;
830 }
831 ev1 = md_event(sb);
832 ev2 = md_event(refsb);
833 if (ev1 > ev2)
834 ret = 1;
835 else
836 ret = 0;
837 }
838 rdev->sectors = rdev->sb_start;
839
840 if (rdev->sectors < sb->size * 2 && sb->level > 1)
841
842 ret = -EINVAL;
843
844 abort:
845 return ret;
846}
847
848
849
850
851static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
852{
853 mdp_disk_t *desc;
854 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
855 __u64 ev1 = md_event(sb);
856
857 rdev->raid_disk = -1;
858 clear_bit(Faulty, &rdev->flags);
859 clear_bit(In_sync, &rdev->flags);
860 clear_bit(WriteMostly, &rdev->flags);
861 clear_bit(BarriersNotsupp, &rdev->flags);
862
863 if (mddev->raid_disks == 0) {
864 mddev->major_version = 0;
865 mddev->minor_version = sb->minor_version;
866 mddev->patch_version = sb->patch_version;
867 mddev->external = 0;
868 mddev->chunk_sectors = sb->chunk_size >> 9;
869 mddev->ctime = sb->ctime;
870 mddev->utime = sb->utime;
871 mddev->level = sb->level;
872 mddev->clevel[0] = 0;
873 mddev->layout = sb->layout;
874 mddev->raid_disks = sb->raid_disks;
875 mddev->dev_sectors = sb->size * 2;
876 mddev->events = ev1;
877 mddev->bitmap_offset = 0;
878 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
879
880 if (mddev->minor_version >= 91) {
881 mddev->reshape_position = sb->reshape_position;
882 mddev->delta_disks = sb->delta_disks;
883 mddev->new_level = sb->new_level;
884 mddev->new_layout = sb->new_layout;
885 mddev->new_chunk_sectors = sb->new_chunk >> 9;
886 } else {
887 mddev->reshape_position = MaxSector;
888 mddev->delta_disks = 0;
889 mddev->new_level = mddev->level;
890 mddev->new_layout = mddev->layout;
891 mddev->new_chunk_sectors = mddev->chunk_sectors;
892 }
893
894 if (sb->state & (1<<MD_SB_CLEAN))
895 mddev->recovery_cp = MaxSector;
896 else {
897 if (sb->events_hi == sb->cp_events_hi &&
898 sb->events_lo == sb->cp_events_lo) {
899 mddev->recovery_cp = sb->recovery_cp;
900 } else
901 mddev->recovery_cp = 0;
902 }
903
904 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
905 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
906 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
907 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
908
909 mddev->max_disks = MD_SB_DISKS;
910
911 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
912 mddev->bitmap_file == NULL)
913 mddev->bitmap_offset = mddev->default_bitmap_offset;
914
915 } else if (mddev->pers == NULL) {
916
917 ++ev1;
918 if (ev1 < mddev->events)
919 return -EINVAL;
920 } else if (mddev->bitmap) {
921
922
923
924 if (ev1 < mddev->bitmap->events_cleared)
925 return 0;
926 } else {
927 if (ev1 < mddev->events)
928
929 return 0;
930 }
931
932 if (mddev->level != LEVEL_MULTIPATH) {
933 desc = sb->disks + rdev->desc_nr;
934
935 if (desc->state & (1<<MD_DISK_FAULTY))
936 set_bit(Faulty, &rdev->flags);
937 else if (desc->state & (1<<MD_DISK_SYNC)
938) {
939 set_bit(In_sync, &rdev->flags);
940 rdev->raid_disk = desc->raid_disk;
941 }
942 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
943 set_bit(WriteMostly, &rdev->flags);
944 } else
945 set_bit(In_sync, &rdev->flags);
946 return 0;
947}
948
949
950
951
952static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
953{
954 mdp_super_t *sb;
955 mdk_rdev_t *rdev2;
956 int next_spare = mddev->raid_disks;
957
958
959
960
961
962
963
964
965
966
967
968
969 int i;
970 int active=0, working=0,failed=0,spare=0,nr_disks=0;
971
972 rdev->sb_size = MD_SB_BYTES;
973
974 sb = (mdp_super_t*)page_address(rdev->sb_page);
975
976 memset(sb, 0, sizeof(*sb));
977
978 sb->md_magic = MD_SB_MAGIC;
979 sb->major_version = mddev->major_version;
980 sb->patch_version = mddev->patch_version;
981 sb->gvalid_words = 0;
982 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
983 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
984 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
985 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
986
987 sb->ctime = mddev->ctime;
988 sb->level = mddev->level;
989 sb->size = mddev->dev_sectors / 2;
990 sb->raid_disks = mddev->raid_disks;
991 sb->md_minor = mddev->md_minor;
992 sb->not_persistent = 0;
993 sb->utime = mddev->utime;
994 sb->state = 0;
995 sb->events_hi = (mddev->events>>32);
996 sb->events_lo = (u32)mddev->events;
997
998 if (mddev->reshape_position == MaxSector)
999 sb->minor_version = 90;
1000 else {
1001 sb->minor_version = 91;
1002 sb->reshape_position = mddev->reshape_position;
1003 sb->new_level = mddev->new_level;
1004 sb->delta_disks = mddev->delta_disks;
1005 sb->new_layout = mddev->new_layout;
1006 sb->new_chunk = mddev->new_chunk_sectors << 9;
1007 }
1008 mddev->minor_version = sb->minor_version;
1009 if (mddev->in_sync)
1010 {
1011 sb->recovery_cp = mddev->recovery_cp;
1012 sb->cp_events_hi = (mddev->events>>32);
1013 sb->cp_events_lo = (u32)mddev->events;
1014 if (mddev->recovery_cp == MaxSector)
1015 sb->state = (1<< MD_SB_CLEAN);
1016 } else
1017 sb->recovery_cp = 0;
1018
1019 sb->layout = mddev->layout;
1020 sb->chunk_size = mddev->chunk_sectors << 9;
1021
1022 if (mddev->bitmap && mddev->bitmap_file == NULL)
1023 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1024
1025 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1026 list_for_each_entry(rdev2, &mddev->disks, same_set) {
1027 mdp_disk_t *d;
1028 int desc_nr;
1029 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
1030 && !test_bit(Faulty, &rdev2->flags))
1031 desc_nr = rdev2->raid_disk;
1032 else
1033 desc_nr = next_spare++;
1034 rdev2->desc_nr = desc_nr;
1035 d = &sb->disks[rdev2->desc_nr];
1036 nr_disks++;
1037 d->number = rdev2->desc_nr;
1038 d->major = MAJOR(rdev2->bdev->bd_dev);
1039 d->minor = MINOR(rdev2->bdev->bd_dev);
1040 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
1041 && !test_bit(Faulty, &rdev2->flags))
1042 d->raid_disk = rdev2->raid_disk;
1043 else
1044 d->raid_disk = rdev2->desc_nr;
1045 if (test_bit(Faulty, &rdev2->flags))
1046 d->state = (1<<MD_DISK_FAULTY);
1047 else if (test_bit(In_sync, &rdev2->flags)) {
1048 d->state = (1<<MD_DISK_ACTIVE);
1049 d->state |= (1<<MD_DISK_SYNC);
1050 active++;
1051 working++;
1052 } else {
1053 d->state = 0;
1054 spare++;
1055 working++;
1056 }
1057 if (test_bit(WriteMostly, &rdev2->flags))
1058 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1059 }
1060
1061 for (i=0 ; i < mddev->raid_disks ; i++) {
1062 mdp_disk_t *d = &sb->disks[i];
1063 if (d->state == 0 && d->number == 0) {
1064 d->number = i;
1065 d->raid_disk = i;
1066 d->state = (1<<MD_DISK_REMOVED);
1067 d->state |= (1<<MD_DISK_FAULTY);
1068 failed++;
1069 }
1070 }
1071 sb->nr_disks = nr_disks;
1072 sb->active_disks = active;
1073 sb->working_disks = working;
1074 sb->failed_disks = failed;
1075 sb->spare_disks = spare;
1076
1077 sb->this_disk = sb->disks[rdev->desc_nr];
1078 sb->sb_csum = calc_sb_csum(sb);
1079}
1080
1081
1082
1083
1084static unsigned long long
1085super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1086{
1087 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1088 return 0;
1089 if (rdev->mddev->bitmap_offset)
1090 return 0;
1091 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
1092 if (!num_sectors || num_sectors > rdev->sb_start)
1093 num_sectors = rdev->sb_start;
1094 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1095 rdev->sb_page);
1096 md_super_wait(rdev->mddev);
1097 return num_sectors / 2;
1098}
1099
1100
1101
1102
1103
1104
1105static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1106{
1107 __le32 disk_csum;
1108 u32 csum;
1109 unsigned long long newcsum;
1110 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1111 __le32 *isuper = (__le32*)sb;
1112 int i;
1113
1114 disk_csum = sb->sb_csum;
1115 sb->sb_csum = 0;
1116 newcsum = 0;
1117 for (i=0; size>=4; size -= 4 )
1118 newcsum += le32_to_cpu(*isuper++);
1119
1120 if (size == 2)
1121 newcsum += le16_to_cpu(*(__le16*) isuper);
1122
1123 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1124 sb->sb_csum = disk_csum;
1125 return cpu_to_le32(csum);
1126}
1127
1128static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1129{
1130 struct mdp_superblock_1 *sb;
1131 int ret;
1132 sector_t sb_start;
1133 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1134 int bmask;
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144 switch(minor_version) {
1145 case 0:
1146 sb_start = rdev->bdev->bd_inode->i_size >> 9;
1147 sb_start -= 8*2;
1148 sb_start &= ~(sector_t)(4*2-1);
1149 break;
1150 case 1:
1151 sb_start = 0;
1152 break;
1153 case 2:
1154 sb_start = 8;
1155 break;
1156 default:
1157 return -EINVAL;
1158 }
1159 rdev->sb_start = sb_start;
1160
1161
1162
1163
1164 ret = read_disk_sb(rdev, 4096);
1165 if (ret) return ret;
1166
1167
1168 sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1169
1170 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1171 sb->major_version != cpu_to_le32(1) ||
1172 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1173 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1174 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1175 return -EINVAL;
1176
1177 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1178 printk("md: invalid superblock checksum on %s\n",
1179 bdevname(rdev->bdev,b));
1180 return -EINVAL;
1181 }
1182 if (le64_to_cpu(sb->data_size) < 10) {
1183 printk("md: data_size too small on %s\n",
1184 bdevname(rdev->bdev,b));
1185 return -EINVAL;
1186 }
1187
1188 rdev->preferred_minor = 0xffff;
1189 rdev->data_offset = le64_to_cpu(sb->data_offset);
1190 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1191
1192 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1193 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1194 if (rdev->sb_size & bmask)
1195 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1196
1197 if (minor_version
1198 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1199 return -EINVAL;
1200
1201 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1202 rdev->desc_nr = -1;
1203 else
1204 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1205
1206 if (!refdev) {
1207 ret = 1;
1208 } else {
1209 __u64 ev1, ev2;
1210 struct mdp_superblock_1 *refsb =
1211 (struct mdp_superblock_1*)page_address(refdev->sb_page);
1212
1213 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1214 sb->level != refsb->level ||
1215 sb->layout != refsb->layout ||
1216 sb->chunksize != refsb->chunksize) {
1217 printk(KERN_WARNING "md: %s has strangely different"
1218 " superblock to %s\n",
1219 bdevname(rdev->bdev,b),
1220 bdevname(refdev->bdev,b2));
1221 return -EINVAL;
1222 }
1223 ev1 = le64_to_cpu(sb->events);
1224 ev2 = le64_to_cpu(refsb->events);
1225
1226 if (ev1 > ev2)
1227 ret = 1;
1228 else
1229 ret = 0;
1230 }
1231 if (minor_version)
1232 rdev->sectors = (rdev->bdev->bd_inode->i_size >> 9) -
1233 le64_to_cpu(sb->data_offset);
1234 else
1235 rdev->sectors = rdev->sb_start;
1236 if (rdev->sectors < le64_to_cpu(sb->data_size))
1237 return -EINVAL;
1238 rdev->sectors = le64_to_cpu(sb->data_size);
1239 if (le64_to_cpu(sb->size) > rdev->sectors)
1240 return -EINVAL;
1241 return ret;
1242}
1243
1244static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1245{
1246 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1247 __u64 ev1 = le64_to_cpu(sb->events);
1248
1249 rdev->raid_disk = -1;
1250 clear_bit(Faulty, &rdev->flags);
1251 clear_bit(In_sync, &rdev->flags);
1252 clear_bit(WriteMostly, &rdev->flags);
1253 clear_bit(BarriersNotsupp, &rdev->flags);
1254
1255 if (mddev->raid_disks == 0) {
1256 mddev->major_version = 1;
1257 mddev->patch_version = 0;
1258 mddev->external = 0;
1259 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1260 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1261 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1262 mddev->level = le32_to_cpu(sb->level);
1263 mddev->clevel[0] = 0;
1264 mddev->layout = le32_to_cpu(sb->layout);
1265 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1266 mddev->dev_sectors = le64_to_cpu(sb->size);
1267 mddev->events = ev1;
1268 mddev->bitmap_offset = 0;
1269 mddev->default_bitmap_offset = 1024 >> 9;
1270
1271 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1272 memcpy(mddev->uuid, sb->set_uuid, 16);
1273
1274 mddev->max_disks = (4096-256)/2;
1275
1276 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1277 mddev->bitmap_file == NULL )
1278 mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
1279
1280 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1281 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1282 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1283 mddev->new_level = le32_to_cpu(sb->new_level);
1284 mddev->new_layout = le32_to_cpu(sb->new_layout);
1285 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1286 } else {
1287 mddev->reshape_position = MaxSector;
1288 mddev->delta_disks = 0;
1289 mddev->new_level = mddev->level;
1290 mddev->new_layout = mddev->layout;
1291 mddev->new_chunk_sectors = mddev->chunk_sectors;
1292 }
1293
1294 } else if (mddev->pers == NULL) {
1295
1296 ++ev1;
1297 if (ev1 < mddev->events)
1298 return -EINVAL;
1299 } else if (mddev->bitmap) {
1300
1301
1302
1303 if (ev1 < mddev->bitmap->events_cleared)
1304 return 0;
1305 } else {
1306 if (ev1 < mddev->events)
1307
1308 return 0;
1309 }
1310 if (mddev->level != LEVEL_MULTIPATH) {
1311 int role;
1312 if (rdev->desc_nr < 0 ||
1313 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1314 role = 0xffff;
1315 rdev->desc_nr = -1;
1316 } else
1317 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1318 switch(role) {
1319 case 0xffff:
1320 break;
1321 case 0xfffe:
1322 set_bit(Faulty, &rdev->flags);
1323 break;
1324 default:
1325 if ((le32_to_cpu(sb->feature_map) &
1326 MD_FEATURE_RECOVERY_OFFSET))
1327 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1328 else
1329 set_bit(In_sync, &rdev->flags);
1330 rdev->raid_disk = role;
1331 break;
1332 }
1333 if (sb->devflags & WriteMostly1)
1334 set_bit(WriteMostly, &rdev->flags);
1335 } else
1336 set_bit(In_sync, &rdev->flags);
1337
1338 return 0;
1339}
1340
1341static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1342{
1343 struct mdp_superblock_1 *sb;
1344 mdk_rdev_t *rdev2;
1345 int max_dev, i;
1346
1347
1348 sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1349
1350 sb->feature_map = 0;
1351 sb->pad0 = 0;
1352 sb->recovery_offset = cpu_to_le64(0);
1353 memset(sb->pad1, 0, sizeof(sb->pad1));
1354 memset(sb->pad2, 0, sizeof(sb->pad2));
1355 memset(sb->pad3, 0, sizeof(sb->pad3));
1356
1357 sb->utime = cpu_to_le64((__u64)mddev->utime);
1358 sb->events = cpu_to_le64(mddev->events);
1359 if (mddev->in_sync)
1360 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1361 else
1362 sb->resync_offset = cpu_to_le64(0);
1363
1364 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1365
1366 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1367 sb->size = cpu_to_le64(mddev->dev_sectors);
1368 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1369 sb->level = cpu_to_le32(mddev->level);
1370 sb->layout = cpu_to_le32(mddev->layout);
1371
1372 if (mddev->bitmap && mddev->bitmap_file == NULL) {
1373 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
1374 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1375 }
1376
1377 if (rdev->raid_disk >= 0 &&
1378 !test_bit(In_sync, &rdev->flags)) {
1379 if (mddev->curr_resync_completed > rdev->recovery_offset)
1380 rdev->recovery_offset = mddev->curr_resync_completed;
1381 if (rdev->recovery_offset > 0) {
1382 sb->feature_map |=
1383 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1384 sb->recovery_offset =
1385 cpu_to_le64(rdev->recovery_offset);
1386 }
1387 }
1388
1389 if (mddev->reshape_position != MaxSector) {
1390 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1391 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1392 sb->new_layout = cpu_to_le32(mddev->new_layout);
1393 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1394 sb->new_level = cpu_to_le32(mddev->new_level);
1395 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1396 }
1397
1398 max_dev = 0;
1399 list_for_each_entry(rdev2, &mddev->disks, same_set)
1400 if (rdev2->desc_nr+1 > max_dev)
1401 max_dev = rdev2->desc_nr+1;
1402
1403 if (max_dev > le32_to_cpu(sb->max_dev)) {
1404 int bmask;
1405 sb->max_dev = cpu_to_le32(max_dev);
1406 rdev->sb_size = max_dev * 2 + 256;
1407 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1408 if (rdev->sb_size & bmask)
1409 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1410 }
1411 for (i=0; i<max_dev;i++)
1412 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1413
1414 list_for_each_entry(rdev2, &mddev->disks, same_set) {
1415 i = rdev2->desc_nr;
1416 if (test_bit(Faulty, &rdev2->flags))
1417 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1418 else if (test_bit(In_sync, &rdev2->flags))
1419 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1420 else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0)
1421 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1422 else
1423 sb->dev_roles[i] = cpu_to_le16(0xffff);
1424 }
1425
1426 sb->sb_csum = calc_sb_1_csum(sb);
1427}
1428
1429static unsigned long long
1430super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1431{
1432 struct mdp_superblock_1 *sb;
1433 sector_t max_sectors;
1434 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1435 return 0;
1436 if (rdev->sb_start < rdev->data_offset) {
1437
1438 max_sectors = rdev->bdev->bd_inode->i_size >> 9;
1439 max_sectors -= rdev->data_offset;
1440 if (!num_sectors || num_sectors > max_sectors)
1441 num_sectors = max_sectors;
1442 } else if (rdev->mddev->bitmap_offset) {
1443
1444 return 0;
1445 } else {
1446
1447 sector_t sb_start;
1448 sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2;
1449 sb_start &= ~(sector_t)(4*2 - 1);
1450 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1451 if (!num_sectors || num_sectors > max_sectors)
1452 num_sectors = max_sectors;
1453 rdev->sb_start = sb_start;
1454 }
1455 sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page);
1456 sb->data_size = cpu_to_le64(num_sectors);
1457 sb->super_offset = rdev->sb_start;
1458 sb->sb_csum = calc_sb_1_csum(sb);
1459 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1460 rdev->sb_page);
1461 md_super_wait(rdev->mddev);
1462 return num_sectors / 2;
1463}
1464
1465static struct super_type super_types[] = {
1466 [0] = {
1467 .name = "0.90.0",
1468 .owner = THIS_MODULE,
1469 .load_super = super_90_load,
1470 .validate_super = super_90_validate,
1471 .sync_super = super_90_sync,
1472 .rdev_size_change = super_90_rdev_size_change,
1473 },
1474 [1] = {
1475 .name = "md-1",
1476 .owner = THIS_MODULE,
1477 .load_super = super_1_load,
1478 .validate_super = super_1_validate,
1479 .sync_super = super_1_sync,
1480 .rdev_size_change = super_1_rdev_size_change,
1481 },
1482};
1483
1484static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1485{
1486 mdk_rdev_t *rdev, *rdev2;
1487
1488 rcu_read_lock();
1489 rdev_for_each_rcu(rdev, mddev1)
1490 rdev_for_each_rcu(rdev2, mddev2)
1491 if (rdev->bdev->bd_contains ==
1492 rdev2->bdev->bd_contains) {
1493 rcu_read_unlock();
1494 return 1;
1495 }
1496 rcu_read_unlock();
1497 return 0;
1498}
1499
1500static LIST_HEAD(pending_raid_disks);
1501
1502
1503
1504
1505
1506
1507
1508
1509int md_integrity_register(mddev_t *mddev)
1510{
1511 mdk_rdev_t *rdev, *reference = NULL;
1512
1513 if (list_empty(&mddev->disks))
1514 return 0;
1515 if (blk_get_integrity(mddev->gendisk))
1516 return 0;
1517 list_for_each_entry(rdev, &mddev->disks, same_set) {
1518
1519 if (test_bit(Faulty, &rdev->flags))
1520 continue;
1521 if (rdev->raid_disk < 0)
1522 continue;
1523
1524
1525
1526
1527 if (!bdev_get_integrity(rdev->bdev))
1528 return -EINVAL;
1529 if (!reference) {
1530
1531 reference = rdev;
1532 continue;
1533 }
1534
1535 if (blk_integrity_compare(reference->bdev->bd_disk,
1536 rdev->bdev->bd_disk) < 0)
1537 return -EINVAL;
1538 }
1539
1540
1541
1542
1543 if (blk_integrity_register(mddev->gendisk,
1544 bdev_get_integrity(reference->bdev)) != 0) {
1545 printk(KERN_ERR "md: failed to register integrity for %s\n",
1546 mdname(mddev));
1547 return -EINVAL;
1548 }
1549 printk(KERN_NOTICE "md: data integrity on %s enabled\n",
1550 mdname(mddev));
1551 return 0;
1552}
1553EXPORT_SYMBOL(md_integrity_register);
1554
1555
1556void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
1557{
1558 struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
1559 struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
1560
1561 if (!bi_mddev)
1562 return;
1563 if (rdev->raid_disk < 0)
1564 return;
1565 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
1566 rdev->bdev->bd_disk) >= 0)
1567 return;
1568 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
1569 blk_integrity_unregister(mddev->gendisk);
1570}
1571EXPORT_SYMBOL(md_integrity_add_rdev);
1572
1573static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1574{
1575 char b[BDEVNAME_SIZE];
1576 struct kobject *ko;
1577 char *s;
1578 int err;
1579
1580 if (rdev->mddev) {
1581 MD_BUG();
1582 return -EINVAL;
1583 }
1584
1585
1586 if (find_rdev(mddev, rdev->bdev->bd_dev))
1587 return -EEXIST;
1588
1589
1590 if (rdev->sectors && (mddev->dev_sectors == 0 ||
1591 rdev->sectors < mddev->dev_sectors)) {
1592 if (mddev->pers) {
1593
1594
1595
1596
1597 if (mddev->level > 0)
1598 return -ENOSPC;
1599 } else
1600 mddev->dev_sectors = rdev->sectors;
1601 }
1602
1603
1604
1605
1606
1607 if (rdev->desc_nr < 0) {
1608 int choice = 0;
1609 if (mddev->pers) choice = mddev->raid_disks;
1610 while (find_rdev_nr(mddev, choice))
1611 choice++;
1612 rdev->desc_nr = choice;
1613 } else {
1614 if (find_rdev_nr(mddev, rdev->desc_nr))
1615 return -EBUSY;
1616 }
1617 if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
1618 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
1619 mdname(mddev), mddev->max_disks);
1620 return -EBUSY;
1621 }
1622 bdevname(rdev->bdev,b);
1623 while ( (s=strchr(b, '/')) != NULL)
1624 *s = '!';
1625
1626 rdev->mddev = mddev;
1627 printk(KERN_INFO "md: bind<%s>\n", b);
1628
1629 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
1630 goto fail;
1631
1632 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
1633 if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) {
1634 kobject_del(&rdev->kobj);
1635 goto fail;
1636 }
1637 rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, "state");
1638
1639 list_add_rcu(&rdev->same_set, &mddev->disks);
1640 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
1641
1642
1643 mddev->recovery_disabled = 0;
1644
1645 return 0;
1646
1647 fail:
1648 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
1649 b, mdname(mddev));
1650 return err;
1651}
1652
1653static void md_delayed_delete(struct work_struct *ws)
1654{
1655 mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
1656 kobject_del(&rdev->kobj);
1657 kobject_put(&rdev->kobj);
1658}
1659
1660static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1661{
1662 char b[BDEVNAME_SIZE];
1663 if (!rdev->mddev) {
1664 MD_BUG();
1665 return;
1666 }
1667 bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
1668 list_del_rcu(&rdev->same_set);
1669 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
1670 rdev->mddev = NULL;
1671 sysfs_remove_link(&rdev->kobj, "block");
1672 sysfs_put(rdev->sysfs_state);
1673 rdev->sysfs_state = NULL;
1674
1675
1676
1677
1678 synchronize_rcu();
1679 INIT_WORK(&rdev->del_work, md_delayed_delete);
1680 kobject_get(&rdev->kobj);
1681 schedule_work(&rdev->del_work);
1682}
1683
1684
1685
1686
1687
1688
1689static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
1690{
1691 int err = 0;
1692 struct block_device *bdev;
1693 char b[BDEVNAME_SIZE];
1694
1695 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
1696 if (IS_ERR(bdev)) {
1697 printk(KERN_ERR "md: could not open %s.\n",
1698 __bdevname(dev, b));
1699 return PTR_ERR(bdev);
1700 }
1701 err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
1702 if (err) {
1703 printk(KERN_ERR "md: could not bd_claim %s.\n",
1704 bdevname(bdev, b));
1705 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1706 return err;
1707 }
1708 if (!shared)
1709 set_bit(AllReserved, &rdev->flags);
1710 rdev->bdev = bdev;
1711 return err;
1712}
1713
1714static void unlock_rdev(mdk_rdev_t *rdev)
1715{
1716 struct block_device *bdev = rdev->bdev;
1717 rdev->bdev = NULL;
1718 if (!bdev)
1719 MD_BUG();
1720 bd_release(bdev);
1721 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1722}
1723
1724void md_autodetect_dev(dev_t dev);
1725
1726static void export_rdev(mdk_rdev_t * rdev)
1727{
1728 char b[BDEVNAME_SIZE];
1729 printk(KERN_INFO "md: export_rdev(%s)\n",
1730 bdevname(rdev->bdev,b));
1731 if (rdev->mddev)
1732 MD_BUG();
1733 free_disk_sb(rdev);
1734#ifndef MODULE
1735 if (test_bit(AutoDetected, &rdev->flags))
1736 md_autodetect_dev(rdev->bdev->bd_dev);
1737#endif
1738 unlock_rdev(rdev);
1739 kobject_put(&rdev->kobj);
1740}
1741
1742static void kick_rdev_from_array(mdk_rdev_t * rdev)
1743{
1744 unbind_rdev_from_array(rdev);
1745 export_rdev(rdev);
1746}
1747
1748static void export_array(mddev_t *mddev)
1749{
1750 mdk_rdev_t *rdev, *tmp;
1751
1752 rdev_for_each(rdev, tmp, mddev) {
1753 if (!rdev->mddev) {
1754 MD_BUG();
1755 continue;
1756 }
1757 kick_rdev_from_array(rdev);
1758 }
1759 if (!list_empty(&mddev->disks))
1760 MD_BUG();
1761 mddev->raid_disks = 0;
1762 mddev->major_version = 0;
1763}
1764
1765static void print_desc(mdp_disk_t *desc)
1766{
1767 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
1768 desc->major,desc->minor,desc->raid_disk,desc->state);
1769}
1770
1771static void print_sb_90(mdp_super_t *sb)
1772{
1773 int i;
1774
1775 printk(KERN_INFO
1776 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
1777 sb->major_version, sb->minor_version, sb->patch_version,
1778 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
1779 sb->ctime);
1780 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
1781 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
1782 sb->md_minor, sb->layout, sb->chunk_size);
1783 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
1784 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
1785 sb->utime, sb->state, sb->active_disks, sb->working_disks,
1786 sb->failed_disks, sb->spare_disks,
1787 sb->sb_csum, (unsigned long)sb->events_lo);
1788
1789 printk(KERN_INFO);
1790 for (i = 0; i < MD_SB_DISKS; i++) {
1791 mdp_disk_t *desc;
1792
1793 desc = sb->disks + i;
1794 if (desc->number || desc->major || desc->minor ||
1795 desc->raid_disk || (desc->state && (desc->state != 4))) {
1796 printk(" D %2d: ", i);
1797 print_desc(desc);
1798 }
1799 }
1800 printk(KERN_INFO "md: THIS: ");
1801 print_desc(&sb->this_disk);
1802}
1803
1804static void print_sb_1(struct mdp_superblock_1 *sb)
1805{
1806 __u8 *uuid;
1807
1808 uuid = sb->set_uuid;
1809 printk(KERN_INFO
1810 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x"
1811 ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
1812 "md: Name: \"%s\" CT:%llu\n",
1813 le32_to_cpu(sb->major_version),
1814 le32_to_cpu(sb->feature_map),
1815 uuid[0], uuid[1], uuid[2], uuid[3],
1816 uuid[4], uuid[5], uuid[6], uuid[7],
1817 uuid[8], uuid[9], uuid[10], uuid[11],
1818 uuid[12], uuid[13], uuid[14], uuid[15],
1819 sb->set_name,
1820 (unsigned long long)le64_to_cpu(sb->ctime)
1821 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
1822
1823 uuid = sb->device_uuid;
1824 printk(KERN_INFO
1825 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
1826 " RO:%llu\n"
1827 "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x"
1828 ":%02x%02x%02x%02x%02x%02x\n"
1829 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
1830 "md: (MaxDev:%u) \n",
1831 le32_to_cpu(sb->level),
1832 (unsigned long long)le64_to_cpu(sb->size),
1833 le32_to_cpu(sb->raid_disks),
1834 le32_to_cpu(sb->layout),
1835 le32_to_cpu(sb->chunksize),
1836 (unsigned long long)le64_to_cpu(sb->data_offset),
1837 (unsigned long long)le64_to_cpu(sb->data_size),
1838 (unsigned long long)le64_to_cpu(sb->super_offset),
1839 (unsigned long long)le64_to_cpu(sb->recovery_offset),
1840 le32_to_cpu(sb->dev_number),
1841 uuid[0], uuid[1], uuid[2], uuid[3],
1842 uuid[4], uuid[5], uuid[6], uuid[7],
1843 uuid[8], uuid[9], uuid[10], uuid[11],
1844 uuid[12], uuid[13], uuid[14], uuid[15],
1845 sb->devflags,
1846 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
1847 (unsigned long long)le64_to_cpu(sb->events),
1848 (unsigned long long)le64_to_cpu(sb->resync_offset),
1849 le32_to_cpu(sb->sb_csum),
1850 le32_to_cpu(sb->max_dev)
1851 );
1852}
1853
1854static void print_rdev(mdk_rdev_t *rdev, int major_version)
1855{
1856 char b[BDEVNAME_SIZE];
1857 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
1858 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
1859 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
1860 rdev->desc_nr);
1861 if (rdev->sb_loaded) {
1862 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
1863 switch (major_version) {
1864 case 0:
1865 print_sb_90((mdp_super_t*)page_address(rdev->sb_page));
1866 break;
1867 case 1:
1868 print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page));
1869 break;
1870 }
1871 } else
1872 printk(KERN_INFO "md: no rdev superblock!\n");
1873}
1874
1875static void md_print_devices(void)
1876{
1877 struct list_head *tmp;
1878 mdk_rdev_t *rdev;
1879 mddev_t *mddev;
1880 char b[BDEVNAME_SIZE];
1881
1882 printk("\n");
1883 printk("md: **********************************\n");
1884 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
1885 printk("md: **********************************\n");
1886 for_each_mddev(mddev, tmp) {
1887
1888 if (mddev->bitmap)
1889 bitmap_print_sb(mddev->bitmap);
1890 else
1891 printk("%s: ", mdname(mddev));
1892 list_for_each_entry(rdev, &mddev->disks, same_set)
1893 printk("<%s>", bdevname(rdev->bdev,b));
1894 printk("\n");
1895
1896 list_for_each_entry(rdev, &mddev->disks, same_set)
1897 print_rdev(rdev, mddev->major_version);
1898 }
1899 printk("md: **********************************\n");
1900 printk("\n");
1901}
1902
1903
1904static void sync_sbs(mddev_t * mddev, int nospares)
1905{
1906
1907
1908
1909
1910
1911
1912 mdk_rdev_t *rdev;
1913
1914 list_for_each_entry(rdev, &mddev->disks, same_set) {
1915 if (rdev->sb_events == mddev->events ||
1916 (nospares &&
1917 rdev->raid_disk < 0 &&
1918 (rdev->sb_events&1)==0 &&
1919 rdev->sb_events+1 == mddev->events)) {
1920
1921 rdev->sb_loaded = 2;
1922 } else {
1923 super_types[mddev->major_version].
1924 sync_super(mddev, rdev);
1925 rdev->sb_loaded = 1;
1926 }
1927 }
1928}
1929
1930static void md_update_sb(mddev_t * mddev, int force_change)
1931{
1932 mdk_rdev_t *rdev;
1933 int sync_req;
1934 int nospares = 0;
1935
1936 mddev->utime = get_seconds();
1937 if (mddev->external)
1938 return;
1939repeat:
1940 spin_lock_irq(&mddev->write_lock);
1941
1942 set_bit(MD_CHANGE_PENDING, &mddev->flags);
1943 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
1944 force_change = 1;
1945 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
1946
1947
1948
1949
1950 nospares = 1;
1951 if (force_change)
1952 nospares = 0;
1953 if (mddev->degraded)
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963 nospares = 0;
1964
1965 sync_req = mddev->in_sync;
1966
1967
1968
1969 if (nospares
1970 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
1971 && (mddev->events & 1)
1972 && mddev->events != 1)
1973 mddev->events--;
1974 else {
1975
1976 mddev->events ++;
1977 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) {
1978
1979
1980 if ((mddev->events&1)==0)
1981 nospares = 0;
1982 } else {
1983
1984 if ((mddev->events&1))
1985 nospares = 0;
1986 }
1987 }
1988
1989 if (!mddev->events) {
1990
1991
1992
1993
1994
1995 MD_BUG();
1996 mddev->events --;
1997 }
1998
1999
2000
2001
2002
2003 if (!mddev->persistent) {
2004 if (!mddev->external)
2005 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2006
2007 spin_unlock_irq(&mddev->write_lock);
2008 wake_up(&mddev->sb_wait);
2009 return;
2010 }
2011 sync_sbs(mddev, nospares);
2012 spin_unlock_irq(&mddev->write_lock);
2013
2014 dprintk(KERN_INFO
2015 "md: updating %s RAID superblock on device (in sync %d)\n",
2016 mdname(mddev),mddev->in_sync);
2017
2018 bitmap_update_sb(mddev->bitmap);
2019 list_for_each_entry(rdev, &mddev->disks, same_set) {
2020 char b[BDEVNAME_SIZE];
2021 dprintk(KERN_INFO "md: ");
2022 if (rdev->sb_loaded != 1)
2023 continue;
2024 if (test_bit(Faulty, &rdev->flags))
2025 dprintk("(skipping faulty ");
2026
2027 dprintk("%s ", bdevname(rdev->bdev,b));
2028 if (!test_bit(Faulty, &rdev->flags)) {
2029 md_super_write(mddev,rdev,
2030 rdev->sb_start, rdev->sb_size,
2031 rdev->sb_page);
2032 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
2033 bdevname(rdev->bdev,b),
2034 (unsigned long long)rdev->sb_start);
2035 rdev->sb_events = mddev->events;
2036
2037 } else
2038 dprintk(")\n");
2039 if (mddev->level == LEVEL_MULTIPATH)
2040
2041 break;
2042 }
2043 md_super_wait(mddev);
2044
2045
2046 spin_lock_irq(&mddev->write_lock);
2047 if (mddev->in_sync != sync_req ||
2048 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2049
2050 spin_unlock_irq(&mddev->write_lock);
2051 goto repeat;
2052 }
2053 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2054 spin_unlock_irq(&mddev->write_lock);
2055 wake_up(&mddev->sb_wait);
2056 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2057 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2058
2059}
2060
2061
2062
2063
2064static int cmd_match(const char *cmd, const char *str)
2065{
2066
2067
2068
2069
2070 while (*cmd && *str && *cmd == *str) {
2071 cmd++;
2072 str++;
2073 }
2074 if (*cmd == '\n')
2075 cmd++;
2076 if (*str || *cmd)
2077 return 0;
2078 return 1;
2079}
2080
2081struct rdev_sysfs_entry {
2082 struct attribute attr;
2083 ssize_t (*show)(mdk_rdev_t *, char *);
2084 ssize_t (*store)(mdk_rdev_t *, const char *, size_t);
2085};
2086
2087static ssize_t
2088state_show(mdk_rdev_t *rdev, char *page)
2089{
2090 char *sep = "";
2091 size_t len = 0;
2092
2093 if (test_bit(Faulty, &rdev->flags)) {
2094 len+= sprintf(page+len, "%sfaulty",sep);
2095 sep = ",";
2096 }
2097 if (test_bit(In_sync, &rdev->flags)) {
2098 len += sprintf(page+len, "%sin_sync",sep);
2099 sep = ",";
2100 }
2101 if (test_bit(WriteMostly, &rdev->flags)) {
2102 len += sprintf(page+len, "%swrite_mostly",sep);
2103 sep = ",";
2104 }
2105 if (test_bit(Blocked, &rdev->flags)) {
2106 len += sprintf(page+len, "%sblocked", sep);
2107 sep = ",";
2108 }
2109 if (!test_bit(Faulty, &rdev->flags) &&
2110 !test_bit(In_sync, &rdev->flags)) {
2111 len += sprintf(page+len, "%sspare", sep);
2112 sep = ",";
2113 }
2114 return len+sprintf(page+len, "\n");
2115}
2116
2117static ssize_t
2118state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2119{
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129 int err = -EINVAL;
2130 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2131 md_error(rdev->mddev, rdev);
2132 err = 0;
2133 } else if (cmd_match(buf, "remove")) {
2134 if (rdev->raid_disk >= 0)
2135 err = -EBUSY;
2136 else {
2137 mddev_t *mddev = rdev->mddev;
2138 kick_rdev_from_array(rdev);
2139 if (mddev->pers)
2140 md_update_sb(mddev, 1);
2141 md_new_event(mddev);
2142 err = 0;
2143 }
2144 } else if (cmd_match(buf, "writemostly")) {
2145 set_bit(WriteMostly, &rdev->flags);
2146 err = 0;
2147 } else if (cmd_match(buf, "-writemostly")) {
2148 clear_bit(WriteMostly, &rdev->flags);
2149 err = 0;
2150 } else if (cmd_match(buf, "blocked")) {
2151 set_bit(Blocked, &rdev->flags);
2152 err = 0;
2153 } else if (cmd_match(buf, "-blocked")) {
2154 clear_bit(Blocked, &rdev->flags);
2155 wake_up(&rdev->blocked_wait);
2156 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2157 md_wakeup_thread(rdev->mddev->thread);
2158
2159 err = 0;
2160 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2161 set_bit(In_sync, &rdev->flags);
2162 err = 0;
2163 }
2164 if (!err && rdev->sysfs_state)
2165 sysfs_notify_dirent(rdev->sysfs_state);
2166 return err ? err : len;
2167}
2168static struct rdev_sysfs_entry rdev_state =
2169__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2170
2171static ssize_t
2172errors_show(mdk_rdev_t *rdev, char *page)
2173{
2174 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2175}
2176
2177static ssize_t
2178errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2179{
2180 char *e;
2181 unsigned long n = simple_strtoul(buf, &e, 10);
2182 if (*buf && (*e == 0 || *e == '\n')) {
2183 atomic_set(&rdev->corrected_errors, n);
2184 return len;
2185 }
2186 return -EINVAL;
2187}
2188static struct rdev_sysfs_entry rdev_errors =
2189__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2190
2191static ssize_t
2192slot_show(mdk_rdev_t *rdev, char *page)
2193{
2194 if (rdev->raid_disk < 0)
2195 return sprintf(page, "none\n");
2196 else
2197 return sprintf(page, "%d\n", rdev->raid_disk);
2198}
2199
2200static ssize_t
2201slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2202{
2203 char *e;
2204 int err;
2205 char nm[20];
2206 int slot = simple_strtoul(buf, &e, 10);
2207 if (strncmp(buf, "none", 4)==0)
2208 slot = -1;
2209 else if (e==buf || (*e && *e!= '\n'))
2210 return -EINVAL;
2211 if (rdev->mddev->pers && slot == -1) {
2212
2213
2214
2215
2216
2217
2218
2219 if (rdev->raid_disk == -1)
2220 return -EEXIST;
2221
2222 if (rdev->mddev->pers->hot_add_disk == NULL)
2223 return -EINVAL;
2224 err = rdev->mddev->pers->
2225 hot_remove_disk(rdev->mddev, rdev->raid_disk);
2226 if (err)
2227 return err;
2228 sprintf(nm, "rd%d", rdev->raid_disk);
2229 sysfs_remove_link(&rdev->mddev->kobj, nm);
2230 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2231 md_wakeup_thread(rdev->mddev->thread);
2232 } else if (rdev->mddev->pers) {
2233 mdk_rdev_t *rdev2;
2234
2235
2236
2237
2238 if (rdev->raid_disk != -1)
2239 return -EBUSY;
2240
2241 if (rdev->mddev->pers->hot_add_disk == NULL)
2242 return -EINVAL;
2243
2244 list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
2245 if (rdev2->raid_disk == slot)
2246 return -EEXIST;
2247
2248 rdev->raid_disk = slot;
2249 if (test_bit(In_sync, &rdev->flags))
2250 rdev->saved_raid_disk = slot;
2251 else
2252 rdev->saved_raid_disk = -1;
2253 err = rdev->mddev->pers->
2254 hot_add_disk(rdev->mddev, rdev);
2255 if (err) {
2256 rdev->raid_disk = -1;
2257 return err;
2258 } else
2259 sysfs_notify_dirent(rdev->sysfs_state);
2260 sprintf(nm, "rd%d", rdev->raid_disk);
2261 if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
2262 printk(KERN_WARNING
2263 "md: cannot register "
2264 "%s for %s\n",
2265 nm, mdname(rdev->mddev));
2266
2267
2268 } else {
2269 if (slot >= rdev->mddev->raid_disks)
2270 return -ENOSPC;
2271 rdev->raid_disk = slot;
2272
2273 clear_bit(Faulty, &rdev->flags);
2274 clear_bit(WriteMostly, &rdev->flags);
2275 set_bit(In_sync, &rdev->flags);
2276 sysfs_notify_dirent(rdev->sysfs_state);
2277 }
2278 return len;
2279}
2280
2281
2282static struct rdev_sysfs_entry rdev_slot =
2283__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2284
2285static ssize_t
2286offset_show(mdk_rdev_t *rdev, char *page)
2287{
2288 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2289}
2290
2291static ssize_t
2292offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2293{
2294 char *e;
2295 unsigned long long offset = simple_strtoull(buf, &e, 10);
2296 if (e==buf || (*e && *e != '\n'))
2297 return -EINVAL;
2298 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2299 return -EBUSY;
2300 if (rdev->sectors && rdev->mddev->external)
2301
2302
2303 return -EBUSY;
2304 rdev->data_offset = offset;
2305 return len;
2306}
2307
2308static struct rdev_sysfs_entry rdev_offset =
2309__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2310
2311static ssize_t
2312rdev_size_show(mdk_rdev_t *rdev, char *page)
2313{
2314 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2315}
2316
2317static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2318{
2319
2320 if (s1+l1 <= s2)
2321 return 0;
2322 if (s2+l2 <= s1)
2323 return 0;
2324 return 1;
2325}
2326
2327static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2328{
2329 unsigned long long blocks;
2330 sector_t new;
2331
2332 if (strict_strtoull(buf, 10, &blocks) < 0)
2333 return -EINVAL;
2334
2335 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2336 return -EINVAL;
2337
2338 new = blocks * 2;
2339 if (new != blocks * 2)
2340 return -EINVAL;
2341
2342 *sectors = new;
2343 return 0;
2344}
2345
2346static ssize_t
2347rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2348{
2349 mddev_t *my_mddev = rdev->mddev;
2350 sector_t oldsectors = rdev->sectors;
2351 sector_t sectors;
2352
2353 if (strict_blocks_to_sectors(buf, §ors) < 0)
2354 return -EINVAL;
2355 if (my_mddev->pers && rdev->raid_disk >= 0) {
2356 if (my_mddev->persistent) {
2357 sectors = super_types[my_mddev->major_version].
2358 rdev_size_change(rdev, sectors);
2359 if (!sectors)
2360 return -EBUSY;
2361 } else if (!sectors)
2362 sectors = (rdev->bdev->bd_inode->i_size >> 9) -
2363 rdev->data_offset;
2364 }
2365 if (sectors < my_mddev->dev_sectors)
2366 return -EINVAL;
2367
2368 rdev->sectors = sectors;
2369 if (sectors > oldsectors && my_mddev->external) {
2370
2371
2372
2373
2374
2375 mddev_t *mddev;
2376 int overlap = 0;
2377 struct list_head *tmp;
2378
2379 mddev_unlock(my_mddev);
2380 for_each_mddev(mddev, tmp) {
2381 mdk_rdev_t *rdev2;
2382
2383 mddev_lock(mddev);
2384 list_for_each_entry(rdev2, &mddev->disks, same_set)
2385 if (test_bit(AllReserved, &rdev2->flags) ||
2386 (rdev->bdev == rdev2->bdev &&
2387 rdev != rdev2 &&
2388 overlaps(rdev->data_offset, rdev->sectors,
2389 rdev2->data_offset,
2390 rdev2->sectors))) {
2391 overlap = 1;
2392 break;
2393 }
2394 mddev_unlock(mddev);
2395 if (overlap) {
2396 mddev_put(mddev);
2397 break;
2398 }
2399 }
2400 mddev_lock(my_mddev);
2401 if (overlap) {
2402
2403
2404
2405
2406
2407
2408 rdev->sectors = oldsectors;
2409 return -EBUSY;
2410 }
2411 }
2412 return len;
2413}
2414
2415static struct rdev_sysfs_entry rdev_size =
2416__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
2417
2418static struct attribute *rdev_default_attrs[] = {
2419 &rdev_state.attr,
2420 &rdev_errors.attr,
2421 &rdev_slot.attr,
2422 &rdev_offset.attr,
2423 &rdev_size.attr,
2424 NULL,
2425};
2426static ssize_t
2427rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
2428{
2429 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2430 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2431 mddev_t *mddev = rdev->mddev;
2432 ssize_t rv;
2433
2434 if (!entry->show)
2435 return -EIO;
2436
2437 rv = mddev ? mddev_lock(mddev) : -EBUSY;
2438 if (!rv) {
2439 if (rdev->mddev == NULL)
2440 rv = -EBUSY;
2441 else
2442 rv = entry->show(rdev, page);
2443 mddev_unlock(mddev);
2444 }
2445 return rv;
2446}
2447
2448static ssize_t
2449rdev_attr_store(struct kobject *kobj, struct attribute *attr,
2450 const char *page, size_t length)
2451{
2452 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2453 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2454 ssize_t rv;
2455 mddev_t *mddev = rdev->mddev;
2456
2457 if (!entry->store)
2458 return -EIO;
2459 if (!capable(CAP_SYS_ADMIN))
2460 return -EACCES;
2461 rv = mddev ? mddev_lock(mddev): -EBUSY;
2462 if (!rv) {
2463 if (rdev->mddev == NULL)
2464 rv = -EBUSY;
2465 else
2466 rv = entry->store(rdev, page, length);
2467 mddev_unlock(mddev);
2468 }
2469 return rv;
2470}
2471
2472static void rdev_free(struct kobject *ko)
2473{
2474 mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
2475 kfree(rdev);
2476}
2477static struct sysfs_ops rdev_sysfs_ops = {
2478 .show = rdev_attr_show,
2479 .store = rdev_attr_store,
2480};
2481static struct kobj_type rdev_ktype = {
2482 .release = rdev_free,
2483 .sysfs_ops = &rdev_sysfs_ops,
2484 .default_attrs = rdev_default_attrs,
2485};
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_minor)
2498{
2499 char b[BDEVNAME_SIZE];
2500 int err;
2501 mdk_rdev_t *rdev;
2502 sector_t size;
2503
2504 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
2505 if (!rdev) {
2506 printk(KERN_ERR "md: could not alloc mem for new device!\n");
2507 return ERR_PTR(-ENOMEM);
2508 }
2509
2510 if ((err = alloc_disk_sb(rdev)))
2511 goto abort_free;
2512
2513 err = lock_rdev(rdev, newdev, super_format == -2);
2514 if (err)
2515 goto abort_free;
2516
2517 kobject_init(&rdev->kobj, &rdev_ktype);
2518
2519 rdev->desc_nr = -1;
2520 rdev->saved_raid_disk = -1;
2521 rdev->raid_disk = -1;
2522 rdev->flags = 0;
2523 rdev->data_offset = 0;
2524 rdev->sb_events = 0;
2525 atomic_set(&rdev->nr_pending, 0);
2526 atomic_set(&rdev->read_errors, 0);
2527 atomic_set(&rdev->corrected_errors, 0);
2528
2529 size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
2530 if (!size) {
2531 printk(KERN_WARNING
2532 "md: %s has zero or unknown size, marking faulty!\n",
2533 bdevname(rdev->bdev,b));
2534 err = -EINVAL;
2535 goto abort_free;
2536 }
2537
2538 if (super_format >= 0) {
2539 err = super_types[super_format].
2540 load_super(rdev, NULL, super_minor);
2541 if (err == -EINVAL) {
2542 printk(KERN_WARNING
2543 "md: %s does not have a valid v%d.%d "
2544 "superblock, not importing!\n",
2545 bdevname(rdev->bdev,b),
2546 super_format, super_minor);
2547 goto abort_free;
2548 }
2549 if (err < 0) {
2550 printk(KERN_WARNING
2551 "md: could not read %s's sb, not importing!\n",
2552 bdevname(rdev->bdev,b));
2553 goto abort_free;
2554 }
2555 }
2556
2557 INIT_LIST_HEAD(&rdev->same_set);
2558 init_waitqueue_head(&rdev->blocked_wait);
2559
2560 return rdev;
2561
2562abort_free:
2563 if (rdev->sb_page) {
2564 if (rdev->bdev)
2565 unlock_rdev(rdev);
2566 free_disk_sb(rdev);
2567 }
2568 kfree(rdev);
2569 return ERR_PTR(err);
2570}
2571
2572
2573
2574
2575
2576
2577static void analyze_sbs(mddev_t * mddev)
2578{
2579 int i;
2580 mdk_rdev_t *rdev, *freshest, *tmp;
2581 char b[BDEVNAME_SIZE];
2582
2583 freshest = NULL;
2584 rdev_for_each(rdev, tmp, mddev)
2585 switch (super_types[mddev->major_version].
2586 load_super(rdev, freshest, mddev->minor_version)) {
2587 case 1:
2588 freshest = rdev;
2589 break;
2590 case 0:
2591 break;
2592 default:
2593 printk( KERN_ERR \
2594 "md: fatal superblock inconsistency in %s"
2595 " -- removing from array\n",
2596 bdevname(rdev->bdev,b));
2597 kick_rdev_from_array(rdev);
2598 }
2599
2600
2601 super_types[mddev->major_version].
2602 validate_super(mddev, freshest);
2603
2604 i = 0;
2605 rdev_for_each(rdev, tmp, mddev) {
2606 if (rdev->desc_nr >= mddev->max_disks ||
2607 i > mddev->max_disks) {
2608 printk(KERN_WARNING
2609 "md: %s: %s: only %d devices permitted\n",
2610 mdname(mddev), bdevname(rdev->bdev, b),
2611 mddev->max_disks);
2612 kick_rdev_from_array(rdev);
2613 continue;
2614 }
2615 if (rdev != freshest)
2616 if (super_types[mddev->major_version].
2617 validate_super(mddev, rdev)) {
2618 printk(KERN_WARNING "md: kicking non-fresh %s"
2619 " from array!\n",
2620 bdevname(rdev->bdev,b));
2621 kick_rdev_from_array(rdev);
2622 continue;
2623 }
2624 if (mddev->level == LEVEL_MULTIPATH) {
2625 rdev->desc_nr = i++;
2626 rdev->raid_disk = rdev->desc_nr;
2627 set_bit(In_sync, &rdev->flags);
2628 } else if (rdev->raid_disk >= mddev->raid_disks) {
2629 rdev->raid_disk = -1;
2630 clear_bit(In_sync, &rdev->flags);
2631 }
2632 }
2633}
2634
2635static void md_safemode_timeout(unsigned long data);
2636
2637static ssize_t
2638safe_delay_show(mddev_t *mddev, char *page)
2639{
2640 int msec = (mddev->safemode_delay*1000)/HZ;
2641 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
2642}
2643static ssize_t
2644safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
2645{
2646 int scale=1;
2647 int dot=0;
2648 int i;
2649 unsigned long msec;
2650 char buf[30];
2651
2652
2653 if (len >= sizeof(buf))
2654 return -EINVAL;
2655 strlcpy(buf, cbuf, sizeof(buf));
2656 for (i=0; i<len; i++) {
2657 if (dot) {
2658 if (isdigit(buf[i])) {
2659 buf[i-1] = buf[i];
2660 scale *= 10;
2661 }
2662 buf[i] = 0;
2663 } else if (buf[i] == '.') {
2664 dot=1;
2665 buf[i] = 0;
2666 }
2667 }
2668 if (strict_strtoul(buf, 10, &msec) < 0)
2669 return -EINVAL;
2670 msec = (msec * 1000) / scale;
2671 if (msec == 0)
2672 mddev->safemode_delay = 0;
2673 else {
2674 unsigned long old_delay = mddev->safemode_delay;
2675 mddev->safemode_delay = (msec*HZ)/1000;
2676 if (mddev->safemode_delay == 0)
2677 mddev->safemode_delay = 1;
2678 if (mddev->safemode_delay < old_delay)
2679 md_safemode_timeout((unsigned long)mddev);
2680 }
2681 return len;
2682}
2683static struct md_sysfs_entry md_safe_delay =
2684__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
2685
2686static ssize_t
2687level_show(mddev_t *mddev, char *page)
2688{
2689 struct mdk_personality *p = mddev->pers;
2690 if (p)
2691 return sprintf(page, "%s\n", p->name);
2692 else if (mddev->clevel[0])
2693 return sprintf(page, "%s\n", mddev->clevel);
2694 else if (mddev->level != LEVEL_NONE)
2695 return sprintf(page, "%d\n", mddev->level);
2696 else
2697 return 0;
2698}
2699
2700static ssize_t
2701level_store(mddev_t *mddev, const char *buf, size_t len)
2702{
2703 char level[16];
2704 ssize_t rv = len;
2705 struct mdk_personality *pers;
2706 void *priv;
2707 mdk_rdev_t *rdev;
2708
2709 if (mddev->pers == NULL) {
2710 if (len == 0)
2711 return 0;
2712 if (len >= sizeof(mddev->clevel))
2713 return -ENOSPC;
2714 strncpy(mddev->clevel, buf, len);
2715 if (mddev->clevel[len-1] == '\n')
2716 len--;
2717 mddev->clevel[len] = 0;
2718 mddev->level = LEVEL_NONE;
2719 return rv;
2720 }
2721
2722
2723
2724
2725
2726
2727
2728 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
2729 return -EBUSY;
2730
2731 if (!mddev->pers->quiesce) {
2732 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
2733 mdname(mddev), mddev->pers->name);
2734 return -EINVAL;
2735 }
2736
2737
2738 if (len == 0 || len >= sizeof(level))
2739 return -EINVAL;
2740 strncpy(level, buf, len);
2741 if (level[len-1] == '\n')
2742 len--;
2743 level[len] = 0;
2744
2745 request_module("md-%s", level);
2746 spin_lock(&pers_lock);
2747 pers = find_pers(LEVEL_NONE, level);
2748 if (!pers || !try_module_get(pers->owner)) {
2749 spin_unlock(&pers_lock);
2750 printk(KERN_WARNING "md: personality %s not loaded\n", level);
2751 return -EINVAL;
2752 }
2753 spin_unlock(&pers_lock);
2754
2755 if (pers == mddev->pers) {
2756
2757 module_put(pers->owner);
2758 return rv;
2759 }
2760 if (!pers->takeover) {
2761 module_put(pers->owner);
2762 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
2763 mdname(mddev), level);
2764 return -EINVAL;
2765 }
2766
2767
2768
2769
2770 priv = pers->takeover(mddev);
2771 if (IS_ERR(priv)) {
2772 mddev->new_level = mddev->level;
2773 mddev->new_layout = mddev->layout;
2774 mddev->new_chunk_sectors = mddev->chunk_sectors;
2775 mddev->raid_disks -= mddev->delta_disks;
2776 mddev->delta_disks = 0;
2777 module_put(pers->owner);
2778 printk(KERN_WARNING "md: %s: %s would not accept array\n",
2779 mdname(mddev), level);
2780 return PTR_ERR(priv);
2781 }
2782
2783
2784 mddev_suspend(mddev);
2785 mddev->pers->stop(mddev);
2786 module_put(mddev->pers->owner);
2787
2788 list_for_each_entry(rdev, &mddev->disks, same_set)
2789 if (rdev->raid_disk >= mddev->raid_disks) {
2790 rdev->raid_disk = -1;
2791 clear_bit(In_sync, &rdev->flags);
2792 }
2793 mddev->pers = pers;
2794 mddev->private = priv;
2795 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
2796 mddev->level = mddev->new_level;
2797 mddev->layout = mddev->new_layout;
2798 mddev->chunk_sectors = mddev->new_chunk_sectors;
2799 mddev->delta_disks = 0;
2800 pers->run(mddev);
2801 mddev_resume(mddev);
2802 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2803 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2804 md_wakeup_thread(mddev->thread);
2805 return rv;
2806}
2807
2808static struct md_sysfs_entry md_level =
2809__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
2810
2811
2812static ssize_t
2813layout_show(mddev_t *mddev, char *page)
2814{
2815
2816 if (mddev->reshape_position != MaxSector &&
2817 mddev->layout != mddev->new_layout)
2818 return sprintf(page, "%d (%d)\n",
2819 mddev->new_layout, mddev->layout);
2820 return sprintf(page, "%d\n", mddev->layout);
2821}
2822
2823static ssize_t
2824layout_store(mddev_t *mddev, const char *buf, size_t len)
2825{
2826 char *e;
2827 unsigned long n = simple_strtoul(buf, &e, 10);
2828
2829 if (!*buf || (*e && *e != '\n'))
2830 return -EINVAL;
2831
2832 if (mddev->pers) {
2833 int err;
2834 if (mddev->pers->check_reshape == NULL)
2835 return -EBUSY;
2836 mddev->new_layout = n;
2837 err = mddev->pers->check_reshape(mddev);
2838 if (err) {
2839 mddev->new_layout = mddev->layout;
2840 return err;
2841 }
2842 } else {
2843 mddev->new_layout = n;
2844 if (mddev->reshape_position == MaxSector)
2845 mddev->layout = n;
2846 }
2847 return len;
2848}
2849static struct md_sysfs_entry md_layout =
2850__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
2851
2852
2853static ssize_t
2854raid_disks_show(mddev_t *mddev, char *page)
2855{
2856 if (mddev->raid_disks == 0)
2857 return 0;
2858 if (mddev->reshape_position != MaxSector &&
2859 mddev->delta_disks != 0)
2860 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
2861 mddev->raid_disks - mddev->delta_disks);
2862 return sprintf(page, "%d\n", mddev->raid_disks);
2863}
2864
2865static int update_raid_disks(mddev_t *mddev, int raid_disks);
2866
2867static ssize_t
2868raid_disks_store(mddev_t *mddev, const char *buf, size_t len)
2869{
2870 char *e;
2871 int rv = 0;
2872 unsigned long n = simple_strtoul(buf, &e, 10);
2873
2874 if (!*buf || (*e && *e != '\n'))
2875 return -EINVAL;
2876
2877 if (mddev->pers)
2878 rv = update_raid_disks(mddev, n);
2879 else if (mddev->reshape_position != MaxSector) {
2880 int olddisks = mddev->raid_disks - mddev->delta_disks;
2881 mddev->delta_disks = n - olddisks;
2882 mddev->raid_disks = n;
2883 } else
2884 mddev->raid_disks = n;
2885 return rv ? rv : len;
2886}
2887static struct md_sysfs_entry md_raid_disks =
2888__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
2889
2890static ssize_t
2891chunk_size_show(mddev_t *mddev, char *page)
2892{
2893 if (mddev->reshape_position != MaxSector &&
2894 mddev->chunk_sectors != mddev->new_chunk_sectors)
2895 return sprintf(page, "%d (%d)\n",
2896 mddev->new_chunk_sectors << 9,
2897 mddev->chunk_sectors << 9);
2898 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
2899}
2900
2901static ssize_t
2902chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
2903{
2904 char *e;
2905 unsigned long n = simple_strtoul(buf, &e, 10);
2906
2907 if (!*buf || (*e && *e != '\n'))
2908 return -EINVAL;
2909
2910 if (mddev->pers) {
2911 int err;
2912 if (mddev->pers->check_reshape == NULL)
2913 return -EBUSY;
2914 mddev->new_chunk_sectors = n >> 9;
2915 err = mddev->pers->check_reshape(mddev);
2916 if (err) {
2917 mddev->new_chunk_sectors = mddev->chunk_sectors;
2918 return err;
2919 }
2920 } else {
2921 mddev->new_chunk_sectors = n >> 9;
2922 if (mddev->reshape_position == MaxSector)
2923 mddev->chunk_sectors = n >> 9;
2924 }
2925 return len;
2926}
2927static struct md_sysfs_entry md_chunk_size =
2928__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
2929
2930static ssize_t
2931resync_start_show(mddev_t *mddev, char *page)
2932{
2933 if (mddev->recovery_cp == MaxSector)
2934 return sprintf(page, "none\n");
2935 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
2936}
2937
2938static ssize_t
2939resync_start_store(mddev_t *mddev, const char *buf, size_t len)
2940{
2941 char *e;
2942 unsigned long long n = simple_strtoull(buf, &e, 10);
2943
2944 if (mddev->pers)
2945 return -EBUSY;
2946 if (!*buf || (*e && *e != '\n'))
2947 return -EINVAL;
2948
2949 mddev->recovery_cp = n;
2950 return len;
2951}
2952static struct md_sysfs_entry md_resync_start =
2953__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
2992 write_pending, active_idle, bad_word};
2993static char *array_states[] = {
2994 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
2995 "write-pending", "active-idle", NULL };
2996
2997static int match_word(const char *word, char **list)
2998{
2999 int n;
3000 for (n=0; list[n]; n++)
3001 if (cmd_match(word, list[n]))
3002 break;
3003 return n;
3004}
3005
3006static ssize_t
3007array_state_show(mddev_t *mddev, char *page)
3008{
3009 enum array_state st = inactive;
3010
3011 if (mddev->pers)
3012 switch(mddev->ro) {
3013 case 1:
3014 st = readonly;
3015 break;
3016 case 2:
3017 st = read_auto;
3018 break;
3019 case 0:
3020 if (mddev->in_sync)
3021 st = clean;
3022 else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
3023 st = write_pending;
3024 else if (mddev->safemode)
3025 st = active_idle;
3026 else
3027 st = active;
3028 }
3029 else {
3030 if (list_empty(&mddev->disks) &&
3031 mddev->raid_disks == 0 &&
3032 mddev->dev_sectors == 0)
3033 st = clear;
3034 else
3035 st = inactive;
3036 }
3037 return sprintf(page, "%s\n", array_states[st]);
3038}
3039
3040static int do_md_stop(mddev_t * mddev, int ro, int is_open);
3041static int do_md_run(mddev_t * mddev);
3042static int restart_array(mddev_t *mddev);
3043
3044static ssize_t
3045array_state_store(mddev_t *mddev, const char *buf, size_t len)
3046{
3047 int err = -EINVAL;
3048 enum array_state st = match_word(buf, array_states);
3049 switch(st) {
3050 case bad_word:
3051 break;
3052 case clear:
3053
3054 if (atomic_read(&mddev->openers) > 0)
3055 return -EBUSY;
3056 err = do_md_stop(mddev, 0, 0);
3057 break;
3058 case inactive:
3059
3060 if (mddev->pers) {
3061 if (atomic_read(&mddev->openers) > 0)
3062 return -EBUSY;
3063 err = do_md_stop(mddev, 2, 0);
3064 } else
3065 err = 0;
3066 break;
3067 case suspended:
3068 break;
3069 case readonly:
3070 if (mddev->pers)
3071 err = do_md_stop(mddev, 1, 0);
3072 else {
3073 mddev->ro = 1;
3074 set_disk_ro(mddev->gendisk, 1);
3075 err = do_md_run(mddev);
3076 }
3077 break;
3078 case read_auto:
3079 if (mddev->pers) {
3080 if (mddev->ro == 0)
3081 err = do_md_stop(mddev, 1, 0);
3082 else if (mddev->ro == 1)
3083 err = restart_array(mddev);
3084 if (err == 0) {
3085 mddev->ro = 2;
3086 set_disk_ro(mddev->gendisk, 0);
3087 }
3088 } else {
3089 mddev->ro = 2;
3090 err = do_md_run(mddev);
3091 }
3092 break;
3093 case clean:
3094 if (mddev->pers) {
3095 restart_array(mddev);
3096 spin_lock_irq(&mddev->write_lock);
3097 if (atomic_read(&mddev->writes_pending) == 0) {
3098 if (mddev->in_sync == 0) {
3099 mddev->in_sync = 1;
3100 if (mddev->safemode == 1)
3101 mddev->safemode = 0;
3102 if (mddev->persistent)
3103 set_bit(MD_CHANGE_CLEAN,
3104 &mddev->flags);
3105 }
3106 err = 0;
3107 } else
3108 err = -EBUSY;
3109 spin_unlock_irq(&mddev->write_lock);
3110 } else
3111 err = -EINVAL;
3112 break;
3113 case active:
3114 if (mddev->pers) {
3115 restart_array(mddev);
3116 if (mddev->external)
3117 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
3118 wake_up(&mddev->sb_wait);
3119 err = 0;
3120 } else {
3121 mddev->ro = 0;
3122 set_disk_ro(mddev->gendisk, 0);
3123 err = do_md_run(mddev);
3124 }
3125 break;
3126 case write_pending:
3127 case active_idle:
3128
3129 break;
3130 }
3131 if (err)
3132 return err;
3133 else {
3134 sysfs_notify_dirent(mddev->sysfs_state);
3135 return len;
3136 }
3137}
3138static struct md_sysfs_entry md_array_state =
3139__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3140
3141static ssize_t
3142null_show(mddev_t *mddev, char *page)
3143{
3144 return -EINVAL;
3145}
3146
3147static ssize_t
3148new_dev_store(mddev_t *mddev, const char *buf, size_t len)
3149{
3150
3151
3152
3153
3154
3155
3156
3157 char *e;
3158 int major = simple_strtoul(buf, &e, 10);
3159 int minor;
3160 dev_t dev;
3161 mdk_rdev_t *rdev;
3162 int err;
3163
3164 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
3165 return -EINVAL;
3166 minor = simple_strtoul(e+1, &e, 10);
3167 if (*e && *e != '\n')
3168 return -EINVAL;
3169 dev = MKDEV(major, minor);
3170 if (major != MAJOR(dev) ||
3171 minor != MINOR(dev))
3172 return -EOVERFLOW;
3173
3174
3175 if (mddev->persistent) {
3176 rdev = md_import_device(dev, mddev->major_version,
3177 mddev->minor_version);
3178 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
3179 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
3180 mdk_rdev_t, same_set);
3181 err = super_types[mddev->major_version]
3182 .load_super(rdev, rdev0, mddev->minor_version);
3183 if (err < 0)
3184 goto out;
3185 }
3186 } else if (mddev->external)
3187 rdev = md_import_device(dev, -2, -1);
3188 else
3189 rdev = md_import_device(dev, -1, -1);
3190
3191 if (IS_ERR(rdev))
3192 return PTR_ERR(rdev);
3193 err = bind_rdev_to_array(rdev, mddev);
3194 out:
3195 if (err)
3196 export_rdev(rdev);
3197 return err ? err : len;
3198}
3199
3200static struct md_sysfs_entry md_new_device =
3201__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
3202
3203static ssize_t
3204bitmap_store(mddev_t *mddev, const char *buf, size_t len)
3205{
3206 char *end;
3207 unsigned long chunk, end_chunk;
3208
3209 if (!mddev->bitmap)
3210 goto out;
3211
3212 while (*buf) {
3213 chunk = end_chunk = simple_strtoul(buf, &end, 0);
3214 if (buf == end) break;
3215 if (*end == '-') {
3216 buf = end + 1;
3217 end_chunk = simple_strtoul(buf, &end, 0);
3218 if (buf == end) break;
3219 }
3220 if (*end && !isspace(*end)) break;
3221 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
3222 buf = end;
3223 while (isspace(*buf)) buf++;
3224 }
3225 bitmap_unplug(mddev->bitmap);
3226out:
3227 return len;
3228}
3229
3230static struct md_sysfs_entry md_bitmap =
3231__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
3232
3233static ssize_t
3234size_show(mddev_t *mddev, char *page)
3235{
3236 return sprintf(page, "%llu\n",
3237 (unsigned long long)mddev->dev_sectors / 2);
3238}
3239
3240static int update_size(mddev_t *mddev, sector_t num_sectors);
3241
3242static ssize_t
3243size_store(mddev_t *mddev, const char *buf, size_t len)
3244{
3245
3246
3247
3248
3249 sector_t sectors;
3250 int err = strict_blocks_to_sectors(buf, §ors);
3251
3252 if (err < 0)
3253 return err;
3254 if (mddev->pers) {
3255 err = update_size(mddev, sectors);
3256 md_update_sb(mddev, 1);
3257 } else {
3258 if (mddev->dev_sectors == 0 ||
3259 mddev->dev_sectors > sectors)
3260 mddev->dev_sectors = sectors;
3261 else
3262 err = -ENOSPC;
3263 }
3264 return err ? err : len;
3265}
3266
3267static struct md_sysfs_entry md_size =
3268__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
3269
3270
3271
3272
3273
3274
3275
3276
3277static ssize_t
3278metadata_show(mddev_t *mddev, char *page)
3279{
3280 if (mddev->persistent)
3281 return sprintf(page, "%d.%d\n",
3282 mddev->major_version, mddev->minor_version);
3283 else if (mddev->external)
3284 return sprintf(page, "external:%s\n", mddev->metadata_type);
3285 else
3286 return sprintf(page, "none\n");
3287}
3288
3289static ssize_t
3290metadata_store(mddev_t *mddev, const char *buf, size_t len)
3291{
3292 int major, minor;
3293 char *e;
3294
3295
3296
3297
3298 if (mddev->external && strncmp(buf, "external:", 9) == 0)
3299 ;
3300 else if (!list_empty(&mddev->disks))
3301 return -EBUSY;
3302
3303 if (cmd_match(buf, "none")) {
3304 mddev->persistent = 0;
3305 mddev->external = 0;
3306 mddev->major_version = 0;
3307 mddev->minor_version = 90;
3308 return len;
3309 }
3310 if (strncmp(buf, "external:", 9) == 0) {
3311 size_t namelen = len-9;
3312 if (namelen >= sizeof(mddev->metadata_type))
3313 namelen = sizeof(mddev->metadata_type)-1;
3314 strncpy(mddev->metadata_type, buf+9, namelen);
3315 mddev->metadata_type[namelen] = 0;
3316 if (namelen && mddev->metadata_type[namelen-1] == '\n')
3317 mddev->metadata_type[--namelen] = 0;
3318 mddev->persistent = 0;
3319 mddev->external = 1;
3320 mddev->major_version = 0;
3321 mddev->minor_version = 90;
3322 return len;
3323 }
3324 major = simple_strtoul(buf, &e, 10);
3325 if (e==buf || *e != '.')
3326 return -EINVAL;
3327 buf = e+1;
3328 minor = simple_strtoul(buf, &e, 10);
3329 if (e==buf || (*e && *e != '\n') )
3330 return -EINVAL;
3331 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
3332 return -ENOENT;
3333 mddev->major_version = major;
3334 mddev->minor_version = minor;
3335 mddev->persistent = 1;
3336 mddev->external = 0;
3337 return len;
3338}
3339
3340static struct md_sysfs_entry md_metadata =
3341__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
3342
3343static ssize_t
3344action_show(mddev_t *mddev, char *page)
3345{
3346 char *type = "idle";
3347 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3348 type = "frozen";
3349 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3350 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
3351 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
3352 type = "reshape";
3353 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
3354 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
3355 type = "resync";
3356 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
3357 type = "check";
3358 else
3359 type = "repair";
3360 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
3361 type = "recover";
3362 }
3363 return sprintf(page, "%s\n", type);
3364}
3365
3366static ssize_t
3367action_store(mddev_t *mddev, const char *page, size_t len)
3368{
3369 if (!mddev->pers || !mddev->pers->sync_request)
3370 return -EINVAL;
3371
3372 if (cmd_match(page, "frozen"))
3373 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3374 else
3375 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3376
3377 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
3378 if (mddev->sync_thread) {
3379 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3380 md_unregister_thread(mddev->sync_thread);
3381 mddev->sync_thread = NULL;
3382 mddev->recovery = 0;
3383 }
3384 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3385 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
3386 return -EBUSY;
3387 else if (cmd_match(page, "resync"))
3388 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3389 else if (cmd_match(page, "recover")) {
3390 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
3391 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3392 } else if (cmd_match(page, "reshape")) {
3393 int err;
3394 if (mddev->pers->start_reshape == NULL)
3395 return -EINVAL;
3396 err = mddev->pers->start_reshape(mddev);
3397 if (err)
3398 return err;
3399 sysfs_notify(&mddev->kobj, NULL, "degraded");
3400 } else {
3401 if (cmd_match(page, "check"))
3402 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
3403 else if (!cmd_match(page, "repair"))
3404 return -EINVAL;
3405 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
3406 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
3407 }
3408 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3409 md_wakeup_thread(mddev->thread);
3410 sysfs_notify_dirent(mddev->sysfs_action);
3411 return len;
3412}
3413
3414static ssize_t
3415mismatch_cnt_show(mddev_t *mddev, char *page)
3416{
3417 return sprintf(page, "%llu\n",
3418 (unsigned long long) mddev->resync_mismatches);
3419}
3420
3421static struct md_sysfs_entry md_scan_mode =
3422__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
3423
3424
3425static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
3426
3427static ssize_t
3428sync_min_show(mddev_t *mddev, char *page)
3429{
3430 return sprintf(page, "%d (%s)\n", speed_min(mddev),
3431 mddev->sync_speed_min ? "local": "system");
3432}
3433
3434static ssize_t
3435sync_min_store(mddev_t *mddev, const char *buf, size_t len)
3436{
3437 int min;
3438 char *e;
3439 if (strncmp(buf, "system", 6)==0) {
3440 mddev->sync_speed_min = 0;
3441 return len;
3442 }
3443 min = simple_strtoul(buf, &e, 10);
3444 if (buf == e || (*e && *e != '\n') || min <= 0)
3445 return -EINVAL;
3446 mddev->sync_speed_min = min;
3447 return len;
3448}
3449
3450static struct md_sysfs_entry md_sync_min =
3451__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
3452
3453static ssize_t
3454sync_max_show(mddev_t *mddev, char *page)
3455{
3456 return sprintf(page, "%d (%s)\n", speed_max(mddev),
3457 mddev->sync_speed_max ? "local": "system");
3458}
3459
3460static ssize_t
3461sync_max_store(mddev_t *mddev, const char *buf, size_t len)
3462{
3463 int max;
3464 char *e;
3465 if (strncmp(buf, "system", 6)==0) {
3466 mddev->sync_speed_max = 0;
3467 return len;
3468 }
3469 max = simple_strtoul(buf, &e, 10);
3470 if (buf == e || (*e && *e != '\n') || max <= 0)
3471 return -EINVAL;
3472 mddev->sync_speed_max = max;
3473 return len;
3474}
3475
3476static struct md_sysfs_entry md_sync_max =
3477__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
3478
3479static ssize_t
3480degraded_show(mddev_t *mddev, char *page)
3481{
3482 return sprintf(page, "%d\n", mddev->degraded);
3483}
3484static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
3485
3486static ssize_t
3487sync_force_parallel_show(mddev_t *mddev, char *page)
3488{
3489 return sprintf(page, "%d\n", mddev->parallel_resync);
3490}
3491
3492static ssize_t
3493sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
3494{
3495 long n;
3496
3497 if (strict_strtol(buf, 10, &n))
3498 return -EINVAL;
3499
3500 if (n != 0 && n != 1)
3501 return -EINVAL;
3502
3503 mddev->parallel_resync = n;
3504
3505 if (mddev->sync_thread)
3506 wake_up(&resync_wait);
3507
3508 return len;
3509}
3510
3511
3512static struct md_sysfs_entry md_sync_force_parallel =
3513__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
3514 sync_force_parallel_show, sync_force_parallel_store);
3515
3516static ssize_t
3517sync_speed_show(mddev_t *mddev, char *page)
3518{
3519 unsigned long resync, dt, db;
3520 if (mddev->curr_resync == 0)
3521 return sprintf(page, "none\n");
3522 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
3523 dt = (jiffies - mddev->resync_mark) / HZ;
3524 if (!dt) dt++;
3525 db = resync - mddev->resync_mark_cnt;
3526 return sprintf(page, "%lu\n", db/dt/2);
3527}
3528
3529static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
3530
3531static ssize_t
3532sync_completed_show(mddev_t *mddev, char *page)
3533{
3534 unsigned long max_sectors, resync;
3535
3536 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3537 return sprintf(page, "none\n");
3538
3539 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
3540 max_sectors = mddev->resync_max_sectors;
3541 else
3542 max_sectors = mddev->dev_sectors;
3543
3544 resync = mddev->curr_resync_completed;
3545 return sprintf(page, "%lu / %lu\n", resync, max_sectors);
3546}
3547
3548static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
3549
3550static ssize_t
3551min_sync_show(mddev_t *mddev, char *page)
3552{
3553 return sprintf(page, "%llu\n",
3554 (unsigned long long)mddev->resync_min);
3555}
3556static ssize_t
3557min_sync_store(mddev_t *mddev, const char *buf, size_t len)
3558{
3559 unsigned long long min;
3560 if (strict_strtoull(buf, 10, &min))
3561 return -EINVAL;
3562 if (min > mddev->resync_max)
3563 return -EINVAL;
3564 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3565 return -EBUSY;
3566
3567
3568 if (mddev->chunk_sectors) {
3569 sector_t temp = min;
3570 if (sector_div(temp, mddev->chunk_sectors))
3571 return -EINVAL;
3572 }
3573 mddev->resync_min = min;
3574
3575 return len;
3576}
3577
3578static struct md_sysfs_entry md_min_sync =
3579__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
3580
3581static ssize_t
3582max_sync_show(mddev_t *mddev, char *page)
3583{
3584 if (mddev->resync_max == MaxSector)
3585 return sprintf(page, "max\n");
3586 else
3587 return sprintf(page, "%llu\n",
3588 (unsigned long long)mddev->resync_max);
3589}
3590static ssize_t
3591max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3592{
3593 if (strncmp(buf, "max", 3) == 0)
3594 mddev->resync_max = MaxSector;
3595 else {
3596 unsigned long long max;
3597 if (strict_strtoull(buf, 10, &max))
3598 return -EINVAL;
3599 if (max < mddev->resync_min)
3600 return -EINVAL;
3601 if (max < mddev->resync_max &&
3602 mddev->ro == 0 &&
3603 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3604 return -EBUSY;
3605
3606
3607 if (mddev->chunk_sectors) {
3608 sector_t temp = max;
3609 if (sector_div(temp, mddev->chunk_sectors))
3610 return -EINVAL;
3611 }
3612 mddev->resync_max = max;
3613 }
3614 wake_up(&mddev->recovery_wait);
3615 return len;
3616}
3617
3618static struct md_sysfs_entry md_max_sync =
3619__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
3620
3621static ssize_t
3622suspend_lo_show(mddev_t *mddev, char *page)
3623{
3624 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
3625}
3626
3627static ssize_t
3628suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
3629{
3630 char *e;
3631 unsigned long long new = simple_strtoull(buf, &e, 10);
3632
3633 if (mddev->pers == NULL ||
3634 mddev->pers->quiesce == NULL)
3635 return -EINVAL;
3636 if (buf == e || (*e && *e != '\n'))
3637 return -EINVAL;
3638 if (new >= mddev->suspend_hi ||
3639 (new > mddev->suspend_lo && new < mddev->suspend_hi)) {
3640 mddev->suspend_lo = new;
3641 mddev->pers->quiesce(mddev, 2);
3642 return len;
3643 } else
3644 return -EINVAL;
3645}
3646static struct md_sysfs_entry md_suspend_lo =
3647__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
3648
3649
3650static ssize_t
3651suspend_hi_show(mddev_t *mddev, char *page)
3652{
3653 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
3654}
3655
3656static ssize_t
3657suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
3658{
3659 char *e;
3660 unsigned long long new = simple_strtoull(buf, &e, 10);
3661
3662 if (mddev->pers == NULL ||
3663 mddev->pers->quiesce == NULL)
3664 return -EINVAL;
3665 if (buf == e || (*e && *e != '\n'))
3666 return -EINVAL;
3667 if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) ||
3668 (new > mddev->suspend_lo && new > mddev->suspend_hi)) {
3669 mddev->suspend_hi = new;
3670 mddev->pers->quiesce(mddev, 1);
3671 mddev->pers->quiesce(mddev, 0);
3672 return len;
3673 } else
3674 return -EINVAL;
3675}
3676static struct md_sysfs_entry md_suspend_hi =
3677__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
3678
3679static ssize_t
3680reshape_position_show(mddev_t *mddev, char *page)
3681{
3682 if (mddev->reshape_position != MaxSector)
3683 return sprintf(page, "%llu\n",
3684 (unsigned long long)mddev->reshape_position);
3685 strcpy(page, "none\n");
3686 return 5;
3687}
3688
3689static ssize_t
3690reshape_position_store(mddev_t *mddev, const char *buf, size_t len)
3691{
3692 char *e;
3693 unsigned long long new = simple_strtoull(buf, &e, 10);
3694 if (mddev->pers)
3695 return -EBUSY;
3696 if (buf == e || (*e && *e != '\n'))
3697 return -EINVAL;
3698 mddev->reshape_position = new;
3699 mddev->delta_disks = 0;
3700 mddev->new_level = mddev->level;
3701 mddev->new_layout = mddev->layout;
3702 mddev->new_chunk_sectors = mddev->chunk_sectors;
3703 return len;
3704}
3705
3706static struct md_sysfs_entry md_reshape_position =
3707__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
3708 reshape_position_store);
3709
3710static ssize_t
3711array_size_show(mddev_t *mddev, char *page)
3712{
3713 if (mddev->external_size)
3714 return sprintf(page, "%llu\n",
3715 (unsigned long long)mddev->array_sectors/2);
3716 else
3717 return sprintf(page, "default\n");
3718}
3719
3720static ssize_t
3721array_size_store(mddev_t *mddev, const char *buf, size_t len)
3722{
3723 sector_t sectors;
3724
3725 if (strncmp(buf, "default", 7) == 0) {
3726 if (mddev->pers)
3727 sectors = mddev->pers->size(mddev, 0, 0);
3728 else
3729 sectors = mddev->array_sectors;
3730
3731 mddev->external_size = 0;
3732 } else {
3733 if (strict_blocks_to_sectors(buf, §ors) < 0)
3734 return -EINVAL;
3735 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
3736 return -E2BIG;
3737
3738 mddev->external_size = 1;
3739 }
3740
3741 mddev->array_sectors = sectors;
3742 set_capacity(mddev->gendisk, mddev->array_sectors);
3743 if (mddev->pers)
3744 revalidate_disk(mddev->gendisk);
3745
3746 return len;
3747}
3748
3749static struct md_sysfs_entry md_array_size =
3750__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
3751 array_size_store);
3752
3753static struct attribute *md_default_attrs[] = {
3754 &md_level.attr,
3755 &md_layout.attr,
3756 &md_raid_disks.attr,
3757 &md_chunk_size.attr,
3758 &md_size.attr,
3759 &md_resync_start.attr,
3760 &md_metadata.attr,
3761 &md_new_device.attr,
3762 &md_safe_delay.attr,
3763 &md_array_state.attr,
3764 &md_reshape_position.attr,
3765 &md_array_size.attr,
3766 NULL,
3767};
3768
3769static struct attribute *md_redundancy_attrs[] = {
3770 &md_scan_mode.attr,
3771 &md_mismatches.attr,
3772 &md_sync_min.attr,
3773 &md_sync_max.attr,
3774 &md_sync_speed.attr,
3775 &md_sync_force_parallel.attr,
3776 &md_sync_completed.attr,
3777 &md_min_sync.attr,
3778 &md_max_sync.attr,
3779 &md_suspend_lo.attr,
3780 &md_suspend_hi.attr,
3781 &md_bitmap.attr,
3782 &md_degraded.attr,
3783 NULL,
3784};
3785static struct attribute_group md_redundancy_group = {
3786 .name = NULL,
3787 .attrs = md_redundancy_attrs,
3788};
3789
3790
3791static ssize_t
3792md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3793{
3794 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
3795 mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
3796 ssize_t rv;
3797
3798 if (!entry->show)
3799 return -EIO;
3800 rv = mddev_lock(mddev);
3801 if (!rv) {
3802 rv = entry->show(mddev, page);
3803 mddev_unlock(mddev);
3804 }
3805 return rv;
3806}
3807
3808static ssize_t
3809md_attr_store(struct kobject *kobj, struct attribute *attr,
3810 const char *page, size_t length)
3811{
3812 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
3813 mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
3814 ssize_t rv;
3815
3816 if (!entry->store)
3817 return -EIO;
3818 if (!capable(CAP_SYS_ADMIN))
3819 return -EACCES;
3820 rv = mddev_lock(mddev);
3821 if (mddev->hold_active == UNTIL_IOCTL)
3822 mddev->hold_active = 0;
3823 if (!rv) {
3824 rv = entry->store(mddev, page, length);
3825 mddev_unlock(mddev);
3826 }
3827 return rv;
3828}
3829
3830static void md_free(struct kobject *ko)
3831{
3832 mddev_t *mddev = container_of(ko, mddev_t, kobj);
3833
3834 if (mddev->sysfs_state)
3835 sysfs_put(mddev->sysfs_state);
3836
3837 if (mddev->gendisk) {
3838 del_gendisk(mddev->gendisk);
3839 put_disk(mddev->gendisk);
3840 }
3841 if (mddev->queue)
3842 blk_cleanup_queue(mddev->queue);
3843
3844 kfree(mddev);
3845}
3846
3847static struct sysfs_ops md_sysfs_ops = {
3848 .show = md_attr_show,
3849 .store = md_attr_store,
3850};
3851static struct kobj_type md_ktype = {
3852 .release = md_free,
3853 .sysfs_ops = &md_sysfs_ops,
3854 .default_attrs = md_default_attrs,
3855};
3856
3857int mdp_major = 0;
3858
3859static void mddev_delayed_delete(struct work_struct *ws)
3860{
3861 mddev_t *mddev = container_of(ws, mddev_t, del_work);
3862
3863 if (mddev->private == &md_redundancy_group) {
3864 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
3865 if (mddev->sysfs_action)
3866 sysfs_put(mddev->sysfs_action);
3867 mddev->sysfs_action = NULL;
3868 mddev->private = NULL;
3869 }
3870 kobject_del(&mddev->kobj);
3871 kobject_put(&mddev->kobj);
3872}
3873
3874static int md_alloc(dev_t dev, char *name)
3875{
3876 static DEFINE_MUTEX(disks_mutex);
3877 mddev_t *mddev = mddev_find(dev);
3878 struct gendisk *disk;
3879 int partitioned;
3880 int shift;
3881 int unit;
3882 int error;
3883
3884 if (!mddev)
3885 return -ENODEV;
3886
3887 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
3888 shift = partitioned ? MdpMinorShift : 0;
3889 unit = MINOR(mddev->unit) >> shift;
3890
3891
3892
3893
3894 flush_scheduled_work();
3895
3896 mutex_lock(&disks_mutex);
3897 error = -EEXIST;
3898 if (mddev->gendisk)
3899 goto abort;
3900
3901 if (name) {
3902
3903
3904 mddev_t *mddev2;
3905 spin_lock(&all_mddevs_lock);
3906
3907 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
3908 if (mddev2->gendisk &&
3909 strcmp(mddev2->gendisk->disk_name, name) == 0) {
3910 spin_unlock(&all_mddevs_lock);
3911 goto abort;
3912 }
3913 spin_unlock(&all_mddevs_lock);
3914 }
3915
3916 error = -ENOMEM;
3917 mddev->queue = blk_alloc_queue(GFP_KERNEL);
3918 if (!mddev->queue)
3919 goto abort;
3920 mddev->queue->queuedata = mddev;
3921
3922
3923 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
3924
3925 blk_queue_make_request(mddev->queue, md_make_request);
3926
3927 disk = alloc_disk(1 << shift);
3928 if (!disk) {
3929 blk_cleanup_queue(mddev->queue);
3930 mddev->queue = NULL;
3931 goto abort;
3932 }
3933 disk->major = MAJOR(mddev->unit);
3934 disk->first_minor = unit << shift;
3935 if (name)
3936 strcpy(disk->disk_name, name);
3937 else if (partitioned)
3938 sprintf(disk->disk_name, "md_d%d", unit);
3939 else
3940 sprintf(disk->disk_name, "md%d", unit);
3941 disk->fops = &md_fops;
3942 disk->private_data = mddev;
3943 disk->queue = mddev->queue;
3944
3945
3946
3947
3948 disk->flags |= GENHD_FL_EXT_DEVT;
3949 add_disk(disk);
3950 mddev->gendisk = disk;
3951 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
3952 &disk_to_dev(disk)->kobj, "%s", "md");
3953 if (error) {
3954
3955
3956
3957 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
3958 disk->disk_name);
3959 error = 0;
3960 }
3961 abort:
3962 mutex_unlock(&disks_mutex);
3963 if (!error) {
3964 kobject_uevent(&mddev->kobj, KOBJ_ADD);
3965 mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
3966 }
3967 mddev_put(mddev);
3968 return error;
3969}
3970
3971static struct kobject *md_probe(dev_t dev, int *part, void *data)
3972{
3973 md_alloc(dev, NULL);
3974 return NULL;
3975}
3976
3977static int add_named_array(const char *val, struct kernel_param *kp)
3978{
3979
3980
3981
3982
3983 int len = strlen(val);
3984 char buf[DISK_NAME_LEN];
3985
3986 while (len && val[len-1] == '\n')
3987 len--;
3988 if (len >= DISK_NAME_LEN)
3989 return -E2BIG;
3990 strlcpy(buf, val, len+1);
3991 if (strncmp(buf, "md_", 3) != 0)
3992 return -EINVAL;
3993 return md_alloc(0, buf);
3994}
3995
3996static void md_safemode_timeout(unsigned long data)
3997{
3998 mddev_t *mddev = (mddev_t *) data;
3999
4000 if (!atomic_read(&mddev->writes_pending)) {
4001 mddev->safemode = 1;
4002 if (mddev->external)
4003 sysfs_notify_dirent(mddev->sysfs_state);
4004 }
4005 md_wakeup_thread(mddev->thread);
4006}
4007
4008static int start_dirty_degraded;
4009
4010static int do_md_run(mddev_t * mddev)
4011{
4012 int err;
4013 mdk_rdev_t *rdev;
4014 struct gendisk *disk;
4015 struct mdk_personality *pers;
4016
4017 if (list_empty(&mddev->disks))
4018
4019 return -EINVAL;
4020
4021 if (mddev->pers)
4022 return -EBUSY;
4023
4024
4025
4026
4027 if (!mddev->raid_disks) {
4028 if (!mddev->persistent)
4029 return -EINVAL;
4030 analyze_sbs(mddev);
4031 }
4032
4033 if (mddev->level != LEVEL_NONE)
4034 request_module("md-level-%d", mddev->level);
4035 else if (mddev->clevel[0])
4036 request_module("md-%s", mddev->clevel);
4037
4038
4039
4040
4041
4042
4043 list_for_each_entry(rdev, &mddev->disks, same_set) {
4044 if (test_bit(Faulty, &rdev->flags))
4045 continue;
4046 sync_blockdev(rdev->bdev);
4047 invalidate_bdev(rdev->bdev);
4048
4049
4050
4051
4052
4053 if (rdev->data_offset < rdev->sb_start) {
4054 if (mddev->dev_sectors &&
4055 rdev->data_offset + mddev->dev_sectors
4056 > rdev->sb_start) {
4057 printk("md: %s: data overlaps metadata\n",
4058 mdname(mddev));
4059 return -EINVAL;
4060 }
4061 } else {
4062 if (rdev->sb_start + rdev->sb_size/512
4063 > rdev->data_offset) {
4064 printk("md: %s: metadata overlaps data\n",
4065 mdname(mddev));
4066 return -EINVAL;
4067 }
4068 }
4069 sysfs_notify_dirent(rdev->sysfs_state);
4070 }
4071
4072 md_probe(mddev->unit, NULL, NULL);
4073 disk = mddev->gendisk;
4074 if (!disk)
4075 return -ENOMEM;
4076
4077 spin_lock(&pers_lock);
4078 pers = find_pers(mddev->level, mddev->clevel);
4079 if (!pers || !try_module_get(pers->owner)) {
4080 spin_unlock(&pers_lock);
4081 if (mddev->level != LEVEL_NONE)
4082 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
4083 mddev->level);
4084 else
4085 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
4086 mddev->clevel);
4087 return -EINVAL;
4088 }
4089 mddev->pers = pers;
4090 spin_unlock(&pers_lock);
4091 if (mddev->level != pers->level) {
4092 mddev->level = pers->level;
4093 mddev->new_level = pers->level;
4094 }
4095 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4096
4097 if (mddev->reshape_position != MaxSector &&
4098 pers->start_reshape == NULL) {
4099
4100 mddev->pers = NULL;
4101 module_put(pers->owner);
4102 return -EINVAL;
4103 }
4104
4105 if (pers->sync_request) {
4106
4107
4108
4109 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
4110 mdk_rdev_t *rdev2;
4111 int warned = 0;
4112
4113 list_for_each_entry(rdev, &mddev->disks, same_set)
4114 list_for_each_entry(rdev2, &mddev->disks, same_set) {
4115 if (rdev < rdev2 &&
4116 rdev->bdev->bd_contains ==
4117 rdev2->bdev->bd_contains) {
4118 printk(KERN_WARNING
4119 "%s: WARNING: %s appears to be"
4120 " on the same physical disk as"
4121 " %s.\n",
4122 mdname(mddev),
4123 bdevname(rdev->bdev,b),
4124 bdevname(rdev2->bdev,b2));
4125 warned = 1;
4126 }
4127 }
4128
4129 if (warned)
4130 printk(KERN_WARNING
4131 "True protection against single-disk"
4132 " failure might be compromised.\n");
4133 }
4134
4135 mddev->recovery = 0;
4136
4137 mddev->resync_max_sectors = mddev->dev_sectors;
4138
4139 mddev->barriers_work = 1;
4140 mddev->ok_start_degraded = start_dirty_degraded;
4141
4142 if (start_readonly)
4143 mddev->ro = 2;
4144
4145 err = mddev->pers->run(mddev);
4146 if (err)
4147 printk(KERN_ERR "md: pers->run() failed ...\n");
4148 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
4149 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
4150 " but 'external_size' not in effect?\n", __func__);
4151 printk(KERN_ERR
4152 "md: invalid array_size %llu > default size %llu\n",
4153 (unsigned long long)mddev->array_sectors / 2,
4154 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
4155 err = -EINVAL;
4156 mddev->pers->stop(mddev);
4157 }
4158 if (err == 0 && mddev->pers->sync_request) {
4159 err = bitmap_create(mddev);
4160 if (err) {
4161 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
4162 mdname(mddev), err);
4163 mddev->pers->stop(mddev);
4164 }
4165 }
4166 if (err) {
4167 module_put(mddev->pers->owner);
4168 mddev->pers = NULL;
4169 bitmap_destroy(mddev);
4170 return err;
4171 }
4172 if (mddev->pers->sync_request) {
4173 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4174 printk(KERN_WARNING
4175 "md: cannot register extra attributes for %s\n",
4176 mdname(mddev));
4177 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4178 } else if (mddev->ro == 2)
4179 mddev->ro = 0;
4180
4181 atomic_set(&mddev->writes_pending,0);
4182 mddev->safemode = 0;
4183 mddev->safemode_timer.function = md_safemode_timeout;
4184 mddev->safemode_timer.data = (unsigned long) mddev;
4185 mddev->safemode_delay = (200 * HZ)/1000 +1;
4186 mddev->in_sync = 1;
4187
4188 list_for_each_entry(rdev, &mddev->disks, same_set)
4189 if (rdev->raid_disk >= 0) {
4190 char nm[20];
4191 sprintf(nm, "rd%d", rdev->raid_disk);
4192 if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
4193 printk("md: cannot register %s for %s\n",
4194 nm, mdname(mddev));
4195 }
4196
4197 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4198
4199 if (mddev->flags)
4200 md_update_sb(mddev, 0);
4201
4202 set_capacity(disk, mddev->array_sectors);
4203
4204
4205
4206
4207
4208 if (mddev->degraded && !mddev->sync_thread) {
4209 int spares = 0;
4210 list_for_each_entry(rdev, &mddev->disks, same_set)
4211 if (rdev->raid_disk >= 0 &&
4212 !test_bit(In_sync, &rdev->flags) &&
4213 !test_bit(Faulty, &rdev->flags))
4214
4215 spares++;
4216 if (spares && mddev->pers->sync_request) {
4217 mddev->recovery = 0;
4218 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
4219 mddev->sync_thread = md_register_thread(md_do_sync,
4220 mddev,
4221 "%s_resync");
4222 if (!mddev->sync_thread) {
4223 printk(KERN_ERR "%s: could not start resync"
4224 " thread...\n",
4225 mdname(mddev));
4226
4227 mddev->recovery = 0;
4228 }
4229 }
4230 }
4231 md_wakeup_thread(mddev->thread);
4232 md_wakeup_thread(mddev->sync_thread);
4233
4234 revalidate_disk(mddev->gendisk);
4235 mddev->changed = 1;
4236 md_new_event(mddev);
4237 sysfs_notify_dirent(mddev->sysfs_state);
4238 if (mddev->sysfs_action)
4239 sysfs_notify_dirent(mddev->sysfs_action);
4240 sysfs_notify(&mddev->kobj, NULL, "degraded");
4241 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4242 return 0;
4243}
4244
4245static int restart_array(mddev_t *mddev)
4246{
4247 struct gendisk *disk = mddev->gendisk;
4248
4249
4250 if (list_empty(&mddev->disks))
4251 return -ENXIO;
4252 if (!mddev->pers)
4253 return -EINVAL;
4254 if (!mddev->ro)
4255 return -EBUSY;
4256 mddev->safemode = 0;
4257 mddev->ro = 0;
4258 set_disk_ro(disk, 0);
4259 printk(KERN_INFO "md: %s switched to read-write mode.\n",
4260 mdname(mddev));
4261
4262 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4263 md_wakeup_thread(mddev->thread);
4264 md_wakeup_thread(mddev->sync_thread);
4265 sysfs_notify_dirent(mddev->sysfs_state);
4266 return 0;
4267}
4268
4269
4270
4271static int deny_bitmap_write_access(struct file * file)
4272{
4273 struct inode *inode = file->f_mapping->host;
4274
4275 spin_lock(&inode->i_lock);
4276 if (atomic_read(&inode->i_writecount) > 1) {
4277 spin_unlock(&inode->i_lock);
4278 return -ETXTBSY;
4279 }
4280 atomic_set(&inode->i_writecount, -1);
4281 spin_unlock(&inode->i_lock);
4282
4283 return 0;
4284}
4285
4286static void restore_bitmap_write_access(struct file *file)
4287{
4288 struct inode *inode = file->f_mapping->host;
4289
4290 spin_lock(&inode->i_lock);
4291 atomic_set(&inode->i_writecount, 1);
4292 spin_unlock(&inode->i_lock);
4293}
4294
4295
4296
4297
4298
4299
4300static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4301{
4302 int err = 0;
4303 struct gendisk *disk = mddev->gendisk;
4304 mdk_rdev_t *rdev;
4305
4306 mutex_lock(&mddev->open_mutex);
4307 if (atomic_read(&mddev->openers) > is_open) {
4308 printk("md: %s still in use.\n",mdname(mddev));
4309 err = -EBUSY;
4310 } else if (mddev->pers) {
4311
4312 if (mddev->sync_thread) {
4313 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4314 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4315 md_unregister_thread(mddev->sync_thread);
4316 mddev->sync_thread = NULL;
4317 }
4318
4319 del_timer_sync(&mddev->safemode_timer);
4320
4321 switch(mode) {
4322 case 1:
4323 err = -ENXIO;
4324 if (mddev->ro==1)
4325 goto out;
4326 mddev->ro = 1;
4327 break;
4328 case 0:
4329 case 2:
4330 bitmap_flush(mddev);
4331 md_super_wait(mddev);
4332 if (mddev->ro)
4333 set_disk_ro(disk, 0);
4334
4335 mddev->pers->stop(mddev);
4336 mddev->queue->merge_bvec_fn = NULL;
4337 mddev->queue->unplug_fn = NULL;
4338 mddev->queue->backing_dev_info.congested_fn = NULL;
4339 module_put(mddev->pers->owner);
4340 if (mddev->pers->sync_request)
4341 mddev->private = &md_redundancy_group;
4342 mddev->pers = NULL;
4343
4344 sysfs_notify_dirent(mddev->sysfs_state);
4345
4346 list_for_each_entry(rdev, &mddev->disks, same_set)
4347 if (rdev->raid_disk >= 0) {
4348 char nm[20];
4349 sprintf(nm, "rd%d", rdev->raid_disk);
4350 sysfs_remove_link(&mddev->kobj, nm);
4351 }
4352
4353 set_capacity(disk, 0);
4354 mddev->changed = 1;
4355
4356 if (mddev->ro)
4357 mddev->ro = 0;
4358 }
4359 if (!mddev->in_sync || mddev->flags) {
4360
4361 mddev->in_sync = 1;
4362 md_update_sb(mddev, 1);
4363 }
4364 if (mode == 1)
4365 set_disk_ro(disk, 1);
4366 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4367 err = 0;
4368 }
4369out:
4370 mutex_unlock(&mddev->open_mutex);
4371 if (err)
4372 return err;
4373
4374
4375
4376 if (mode == 0) {
4377
4378 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
4379
4380 bitmap_destroy(mddev);
4381 if (mddev->bitmap_file) {
4382 restore_bitmap_write_access(mddev->bitmap_file);
4383 fput(mddev->bitmap_file);
4384 mddev->bitmap_file = NULL;
4385 }
4386 mddev->bitmap_offset = 0;
4387
4388
4389 flush_scheduled_work();
4390
4391 export_array(mddev);
4392
4393 mddev->array_sectors = 0;
4394 mddev->external_size = 0;
4395 mddev->dev_sectors = 0;
4396 mddev->raid_disks = 0;
4397 mddev->recovery_cp = 0;
4398 mddev->resync_min = 0;
4399 mddev->resync_max = MaxSector;
4400 mddev->reshape_position = MaxSector;
4401 mddev->external = 0;
4402 mddev->persistent = 0;
4403 mddev->level = LEVEL_NONE;
4404 mddev->clevel[0] = 0;
4405 mddev->flags = 0;
4406 mddev->ro = 0;
4407 mddev->metadata_type[0] = 0;
4408 mddev->chunk_sectors = 0;
4409 mddev->ctime = mddev->utime = 0;
4410 mddev->layout = 0;
4411 mddev->max_disks = 0;
4412 mddev->events = 0;
4413 mddev->delta_disks = 0;
4414 mddev->new_level = LEVEL_NONE;
4415 mddev->new_layout = 0;
4416 mddev->new_chunk_sectors = 0;
4417 mddev->curr_resync = 0;
4418 mddev->resync_mismatches = 0;
4419 mddev->suspend_lo = mddev->suspend_hi = 0;
4420 mddev->sync_speed_min = mddev->sync_speed_max = 0;
4421 mddev->recovery = 0;
4422 mddev->in_sync = 0;
4423 mddev->changed = 0;
4424 mddev->degraded = 0;
4425 mddev->barriers_work = 0;
4426 mddev->safemode = 0;
4427 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4428 if (mddev->hold_active == UNTIL_STOP)
4429 mddev->hold_active = 0;
4430
4431 } else if (mddev->pers)
4432 printk(KERN_INFO "md: %s switched to read-only mode.\n",
4433 mdname(mddev));
4434 err = 0;
4435 blk_integrity_unregister(disk);
4436 md_new_event(mddev);
4437 sysfs_notify_dirent(mddev->sysfs_state);
4438 return err;
4439}
4440
4441#ifndef MODULE
4442static void autorun_array(mddev_t *mddev)
4443{
4444 mdk_rdev_t *rdev;
4445 int err;
4446
4447 if (list_empty(&mddev->disks))
4448 return;
4449
4450 printk(KERN_INFO "md: running: ");
4451
4452 list_for_each_entry(rdev, &mddev->disks, same_set) {
4453 char b[BDEVNAME_SIZE];
4454 printk("<%s>", bdevname(rdev->bdev,b));
4455 }
4456 printk("\n");
4457
4458 err = do_md_run(mddev);
4459 if (err) {
4460 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
4461 do_md_stop(mddev, 0, 0);
4462 }
4463}
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477static void autorun_devices(int part)
4478{
4479 mdk_rdev_t *rdev0, *rdev, *tmp;
4480 mddev_t *mddev;
4481 char b[BDEVNAME_SIZE];
4482
4483 printk(KERN_INFO "md: autorun ...\n");
4484 while (!list_empty(&pending_raid_disks)) {
4485 int unit;
4486 dev_t dev;
4487 LIST_HEAD(candidates);
4488 rdev0 = list_entry(pending_raid_disks.next,
4489 mdk_rdev_t, same_set);
4490
4491 printk(KERN_INFO "md: considering %s ...\n",
4492 bdevname(rdev0->bdev,b));
4493 INIT_LIST_HEAD(&candidates);
4494 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
4495 if (super_90_load(rdev, rdev0, 0) >= 0) {
4496 printk(KERN_INFO "md: adding %s ...\n",
4497 bdevname(rdev->bdev,b));
4498 list_move(&rdev->same_set, &candidates);
4499 }
4500
4501
4502
4503
4504
4505 if (part) {
4506 dev = MKDEV(mdp_major,
4507 rdev0->preferred_minor << MdpMinorShift);
4508 unit = MINOR(dev) >> MdpMinorShift;
4509 } else {
4510 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
4511 unit = MINOR(dev);
4512 }
4513 if (rdev0->preferred_minor != unit) {
4514 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
4515 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
4516 break;
4517 }
4518
4519 md_probe(dev, NULL, NULL);
4520 mddev = mddev_find(dev);
4521 if (!mddev || !mddev->gendisk) {
4522 if (mddev)
4523 mddev_put(mddev);
4524 printk(KERN_ERR
4525 "md: cannot allocate memory for md drive.\n");
4526 break;
4527 }
4528 if (mddev_lock(mddev))
4529 printk(KERN_WARNING "md: %s locked, cannot run\n",
4530 mdname(mddev));
4531 else if (mddev->raid_disks || mddev->major_version
4532 || !list_empty(&mddev->disks)) {
4533 printk(KERN_WARNING
4534 "md: %s already running, cannot run %s\n",
4535 mdname(mddev), bdevname(rdev0->bdev,b));
4536 mddev_unlock(mddev);
4537 } else {
4538 printk(KERN_INFO "md: created %s\n", mdname(mddev));
4539 mddev->persistent = 1;
4540 rdev_for_each_list(rdev, tmp, &candidates) {
4541 list_del_init(&rdev->same_set);
4542 if (bind_rdev_to_array(rdev, mddev))
4543 export_rdev(rdev);
4544 }
4545 autorun_array(mddev);
4546 mddev_unlock(mddev);
4547 }
4548
4549
4550
4551 rdev_for_each_list(rdev, tmp, &candidates) {
4552 list_del_init(&rdev->same_set);
4553 export_rdev(rdev);
4554 }
4555 mddev_put(mddev);
4556 }
4557 printk(KERN_INFO "md: ... autorun DONE.\n");
4558}
4559#endif
4560
4561static int get_version(void __user * arg)
4562{
4563 mdu_version_t ver;
4564
4565 ver.major = MD_MAJOR_VERSION;
4566 ver.minor = MD_MINOR_VERSION;
4567 ver.patchlevel = MD_PATCHLEVEL_VERSION;
4568
4569 if (copy_to_user(arg, &ver, sizeof(ver)))
4570 return -EFAULT;
4571
4572 return 0;
4573}
4574
4575static int get_array_info(mddev_t * mddev, void __user * arg)
4576{
4577 mdu_array_info_t info;
4578 int nr,working,active,failed,spare;
4579 mdk_rdev_t *rdev;
4580
4581 nr=working=active=failed=spare=0;
4582 list_for_each_entry(rdev, &mddev->disks, same_set) {
4583 nr++;
4584 if (test_bit(Faulty, &rdev->flags))
4585 failed++;
4586 else {
4587 working++;
4588 if (test_bit(In_sync, &rdev->flags))
4589 active++;
4590 else
4591 spare++;
4592 }
4593 }
4594
4595 info.major_version = mddev->major_version;
4596 info.minor_version = mddev->minor_version;
4597 info.patch_version = MD_PATCHLEVEL_VERSION;
4598 info.ctime = mddev->ctime;
4599 info.level = mddev->level;
4600 info.size = mddev->dev_sectors / 2;
4601 if (info.size != mddev->dev_sectors / 2)
4602 info.size = -1;
4603 info.nr_disks = nr;
4604 info.raid_disks = mddev->raid_disks;
4605 info.md_minor = mddev->md_minor;
4606 info.not_persistent= !mddev->persistent;
4607
4608 info.utime = mddev->utime;
4609 info.state = 0;
4610 if (mddev->in_sync)
4611 info.state = (1<<MD_SB_CLEAN);
4612 if (mddev->bitmap && mddev->bitmap_offset)
4613 info.state = (1<<MD_SB_BITMAP_PRESENT);
4614 info.active_disks = active;
4615 info.working_disks = working;
4616 info.failed_disks = failed;
4617 info.spare_disks = spare;
4618
4619 info.layout = mddev->layout;
4620 info.chunk_size = mddev->chunk_sectors << 9;
4621
4622 if (copy_to_user(arg, &info, sizeof(info)))
4623 return -EFAULT;
4624
4625 return 0;
4626}
4627
4628static int get_bitmap_file(mddev_t * mddev, void __user * arg)
4629{
4630 mdu_bitmap_file_t *file = NULL;
4631 char *ptr, *buf = NULL;
4632 int err = -ENOMEM;
4633
4634 if (md_allow_write(mddev))
4635 file = kmalloc(sizeof(*file), GFP_NOIO);
4636 else
4637 file = kmalloc(sizeof(*file), GFP_KERNEL);
4638
4639 if (!file)
4640 goto out;
4641
4642
4643 if (!mddev->bitmap || !mddev->bitmap->file) {
4644 file->pathname[0] = '\0';
4645 goto copy_out;
4646 }
4647
4648 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
4649 if (!buf)
4650 goto out;
4651
4652 ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
4653 if (IS_ERR(ptr))
4654 goto out;
4655
4656 strcpy(file->pathname, ptr);
4657
4658copy_out:
4659 err = 0;
4660 if (copy_to_user(arg, file, sizeof(*file)))
4661 err = -EFAULT;
4662out:
4663 kfree(buf);
4664 kfree(file);
4665 return err;
4666}
4667
4668static int get_disk_info(mddev_t * mddev, void __user * arg)
4669{
4670 mdu_disk_info_t info;
4671 mdk_rdev_t *rdev;
4672
4673 if (copy_from_user(&info, arg, sizeof(info)))
4674 return -EFAULT;
4675
4676 rdev = find_rdev_nr(mddev, info.number);
4677 if (rdev) {
4678 info.major = MAJOR(rdev->bdev->bd_dev);
4679 info.minor = MINOR(rdev->bdev->bd_dev);
4680 info.raid_disk = rdev->raid_disk;
4681 info.state = 0;
4682 if (test_bit(Faulty, &rdev->flags))
4683 info.state |= (1<<MD_DISK_FAULTY);
4684 else if (test_bit(In_sync, &rdev->flags)) {
4685 info.state |= (1<<MD_DISK_ACTIVE);
4686 info.state |= (1<<MD_DISK_SYNC);
4687 }
4688 if (test_bit(WriteMostly, &rdev->flags))
4689 info.state |= (1<<MD_DISK_WRITEMOSTLY);
4690 } else {
4691 info.major = info.minor = 0;
4692 info.raid_disk = -1;
4693 info.state = (1<<MD_DISK_REMOVED);
4694 }
4695
4696 if (copy_to_user(arg, &info, sizeof(info)))
4697 return -EFAULT;
4698
4699 return 0;
4700}
4701
4702static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
4703{
4704 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
4705 mdk_rdev_t *rdev;
4706 dev_t dev = MKDEV(info->major,info->minor);
4707
4708 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
4709 return -EOVERFLOW;
4710
4711 if (!mddev->raid_disks) {
4712 int err;
4713
4714 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
4715 if (IS_ERR(rdev)) {
4716 printk(KERN_WARNING
4717 "md: md_import_device returned %ld\n",
4718 PTR_ERR(rdev));
4719 return PTR_ERR(rdev);
4720 }
4721 if (!list_empty(&mddev->disks)) {
4722 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
4723 mdk_rdev_t, same_set);
4724 int err = super_types[mddev->major_version]
4725 .load_super(rdev, rdev0, mddev->minor_version);
4726 if (err < 0) {
4727 printk(KERN_WARNING
4728 "md: %s has different UUID to %s\n",
4729 bdevname(rdev->bdev,b),
4730 bdevname(rdev0->bdev,b2));
4731 export_rdev(rdev);
4732 return -EINVAL;
4733 }
4734 }
4735 err = bind_rdev_to_array(rdev, mddev);
4736 if (err)
4737 export_rdev(rdev);
4738 return err;
4739 }
4740
4741
4742
4743
4744
4745
4746 if (mddev->pers) {
4747 int err;
4748 if (!mddev->pers->hot_add_disk) {
4749 printk(KERN_WARNING
4750 "%s: personality does not support diskops!\n",
4751 mdname(mddev));
4752 return -EINVAL;
4753 }
4754 if (mddev->persistent)
4755 rdev = md_import_device(dev, mddev->major_version,
4756 mddev->minor_version);
4757 else
4758 rdev = md_import_device(dev, -1, -1);
4759 if (IS_ERR(rdev)) {
4760 printk(KERN_WARNING
4761 "md: md_import_device returned %ld\n",
4762 PTR_ERR(rdev));
4763 return PTR_ERR(rdev);
4764 }
4765
4766 if (!mddev->persistent) {
4767 if (info->state & (1<<MD_DISK_SYNC) &&
4768 info->raid_disk < mddev->raid_disks)
4769 rdev->raid_disk = info->raid_disk;
4770 else
4771 rdev->raid_disk = -1;
4772 } else
4773 super_types[mddev->major_version].
4774 validate_super(mddev, rdev);
4775 rdev->saved_raid_disk = rdev->raid_disk;
4776
4777 clear_bit(In_sync, &rdev->flags);
4778 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
4779 set_bit(WriteMostly, &rdev->flags);
4780 else
4781 clear_bit(WriteMostly, &rdev->flags);
4782
4783 rdev->raid_disk = -1;
4784 err = bind_rdev_to_array(rdev, mddev);
4785 if (!err && !mddev->pers->hot_remove_disk) {
4786
4787
4788
4789
4790 super_types[mddev->major_version].
4791 validate_super(mddev, rdev);
4792 err = mddev->pers->hot_add_disk(mddev, rdev);
4793 if (err)
4794 unbind_rdev_from_array(rdev);
4795 }
4796 if (err)
4797 export_rdev(rdev);
4798 else
4799 sysfs_notify_dirent(rdev->sysfs_state);
4800
4801 md_update_sb(mddev, 1);
4802 if (mddev->degraded)
4803 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4804 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4805 md_wakeup_thread(mddev->thread);
4806 return err;
4807 }
4808
4809
4810
4811
4812 if (mddev->major_version != 0) {
4813 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
4814 mdname(mddev));
4815 return -EINVAL;
4816 }
4817
4818 if (!(info->state & (1<<MD_DISK_FAULTY))) {
4819 int err;
4820 rdev = md_import_device(dev, -1, 0);
4821 if (IS_ERR(rdev)) {
4822 printk(KERN_WARNING
4823 "md: error, md_import_device() returned %ld\n",
4824 PTR_ERR(rdev));
4825 return PTR_ERR(rdev);
4826 }
4827 rdev->desc_nr = info->number;
4828 if (info->raid_disk < mddev->raid_disks)
4829 rdev->raid_disk = info->raid_disk;
4830 else
4831 rdev->raid_disk = -1;
4832
4833 if (rdev->raid_disk < mddev->raid_disks)
4834 if (info->state & (1<<MD_DISK_SYNC))
4835 set_bit(In_sync, &rdev->flags);
4836
4837 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
4838 set_bit(WriteMostly, &rdev->flags);
4839
4840 if (!mddev->persistent) {
4841 printk(KERN_INFO "md: nonpersistent superblock ...\n");
4842 rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
4843 } else
4844 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
4845 rdev->sectors = rdev->sb_start;
4846
4847 err = bind_rdev_to_array(rdev, mddev);
4848 if (err) {
4849 export_rdev(rdev);
4850 return err;
4851 }
4852 }
4853
4854 return 0;
4855}
4856
4857static int hot_remove_disk(mddev_t * mddev, dev_t dev)
4858{
4859 char b[BDEVNAME_SIZE];
4860 mdk_rdev_t *rdev;
4861
4862 rdev = find_rdev(mddev, dev);
4863 if (!rdev)
4864 return -ENXIO;
4865
4866 if (rdev->raid_disk >= 0)
4867 goto busy;
4868
4869 kick_rdev_from_array(rdev);
4870 md_update_sb(mddev, 1);
4871 md_new_event(mddev);
4872
4873 return 0;
4874busy:
4875 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
4876 bdevname(rdev->bdev,b), mdname(mddev));
4877 return -EBUSY;
4878}
4879
4880static int hot_add_disk(mddev_t * mddev, dev_t dev)
4881{
4882 char b[BDEVNAME_SIZE];
4883 int err;
4884 mdk_rdev_t *rdev;
4885
4886 if (!mddev->pers)
4887 return -ENODEV;
4888
4889 if (mddev->major_version != 0) {
4890 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
4891 " version-0 superblocks.\n",
4892 mdname(mddev));
4893 return -EINVAL;
4894 }
4895 if (!mddev->pers->hot_add_disk) {
4896 printk(KERN_WARNING
4897 "%s: personality does not support diskops!\n",
4898 mdname(mddev));
4899 return -EINVAL;
4900 }
4901
4902 rdev = md_import_device(dev, -1, 0);
4903 if (IS_ERR(rdev)) {
4904 printk(KERN_WARNING
4905 "md: error, md_import_device() returned %ld\n",
4906 PTR_ERR(rdev));
4907 return -EINVAL;
4908 }
4909
4910 if (mddev->persistent)
4911 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
4912 else
4913 rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
4914
4915 rdev->sectors = rdev->sb_start;
4916
4917 if (test_bit(Faulty, &rdev->flags)) {
4918 printk(KERN_WARNING
4919 "md: can not hot-add faulty %s disk to %s!\n",
4920 bdevname(rdev->bdev,b), mdname(mddev));
4921 err = -EINVAL;
4922 goto abort_export;
4923 }
4924 clear_bit(In_sync, &rdev->flags);
4925 rdev->desc_nr = -1;
4926 rdev->saved_raid_disk = -1;
4927 err = bind_rdev_to_array(rdev, mddev);
4928 if (err)
4929 goto abort_export;
4930
4931
4932
4933
4934
4935
4936 rdev->raid_disk = -1;
4937
4938 md_update_sb(mddev, 1);
4939
4940
4941
4942
4943
4944 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4945 md_wakeup_thread(mddev->thread);
4946 md_new_event(mddev);
4947 return 0;
4948
4949abort_export:
4950 export_rdev(rdev);
4951 return err;
4952}
4953
4954static int set_bitmap_file(mddev_t *mddev, int fd)
4955{
4956 int err;
4957
4958 if (mddev->pers) {
4959 if (!mddev->pers->quiesce)
4960 return -EBUSY;
4961 if (mddev->recovery || mddev->sync_thread)
4962 return -EBUSY;
4963
4964 }
4965
4966
4967 if (fd >= 0) {
4968 if (mddev->bitmap)
4969 return -EEXIST;
4970 mddev->bitmap_file = fget(fd);
4971
4972 if (mddev->bitmap_file == NULL) {
4973 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
4974 mdname(mddev));
4975 return -EBADF;
4976 }
4977
4978 err = deny_bitmap_write_access(mddev->bitmap_file);
4979 if (err) {
4980 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
4981 mdname(mddev));
4982 fput(mddev->bitmap_file);
4983 mddev->bitmap_file = NULL;
4984 return err;
4985 }
4986 mddev->bitmap_offset = 0;
4987 } else if (mddev->bitmap == NULL)
4988 return -ENOENT;
4989 err = 0;
4990 if (mddev->pers) {
4991 mddev->pers->quiesce(mddev, 1);
4992 if (fd >= 0)
4993 err = bitmap_create(mddev);
4994 if (fd < 0 || err) {
4995 bitmap_destroy(mddev);
4996 fd = -1;
4997 }
4998 mddev->pers->quiesce(mddev, 0);
4999 }
5000 if (fd < 0) {
5001 if (mddev->bitmap_file) {
5002 restore_bitmap_write_access(mddev->bitmap_file);
5003 fput(mddev->bitmap_file);
5004 }
5005 mddev->bitmap_file = NULL;
5006 }
5007
5008 return err;
5009}
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
5025{
5026
5027 if (info->raid_disks == 0) {
5028
5029 if (info->major_version < 0 ||
5030 info->major_version >= ARRAY_SIZE(super_types) ||
5031 super_types[info->major_version].name == NULL) {
5032
5033 printk(KERN_INFO
5034 "md: superblock version %d not known\n",
5035 info->major_version);
5036 return -EINVAL;
5037 }
5038 mddev->major_version = info->major_version;
5039 mddev->minor_version = info->minor_version;
5040 mddev->patch_version = info->patch_version;
5041 mddev->persistent = !info->not_persistent;
5042 return 0;
5043 }
5044 mddev->major_version = MD_MAJOR_VERSION;
5045 mddev->minor_version = MD_MINOR_VERSION;
5046 mddev->patch_version = MD_PATCHLEVEL_VERSION;
5047 mddev->ctime = get_seconds();
5048
5049 mddev->level = info->level;
5050 mddev->clevel[0] = 0;
5051 mddev->dev_sectors = 2 * (sector_t)info->size;
5052 mddev->raid_disks = info->raid_disks;
5053
5054
5055
5056 if (info->state & (1<<MD_SB_CLEAN))
5057 mddev->recovery_cp = MaxSector;
5058 else
5059 mddev->recovery_cp = 0;
5060 mddev->persistent = ! info->not_persistent;
5061 mddev->external = 0;
5062
5063 mddev->layout = info->layout;
5064 mddev->chunk_sectors = info->chunk_size >> 9;
5065
5066 mddev->max_disks = MD_SB_DISKS;
5067
5068 if (mddev->persistent)
5069 mddev->flags = 0;
5070 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5071
5072 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
5073 mddev->bitmap_offset = 0;
5074
5075 mddev->reshape_position = MaxSector;
5076
5077
5078
5079
5080 get_random_bytes(mddev->uuid, 16);
5081
5082 mddev->new_level = mddev->level;
5083 mddev->new_chunk_sectors = mddev->chunk_sectors;
5084 mddev->new_layout = mddev->layout;
5085 mddev->delta_disks = 0;
5086
5087 return 0;
5088}
5089
5090void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors)
5091{
5092 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
5093
5094 if (mddev->external_size)
5095 return;
5096
5097 mddev->array_sectors = array_sectors;
5098}
5099EXPORT_SYMBOL(md_set_array_sectors);
5100
5101static int update_size(mddev_t *mddev, sector_t num_sectors)
5102{
5103 mdk_rdev_t *rdev;
5104 int rv;
5105 int fit = (num_sectors == 0);
5106
5107 if (mddev->pers->resize == NULL)
5108 return -EINVAL;
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119 if (mddev->sync_thread)
5120 return -EBUSY;
5121 if (mddev->bitmap)
5122
5123
5124
5125 return -EBUSY;
5126 list_for_each_entry(rdev, &mddev->disks, same_set) {
5127 sector_t avail = rdev->sectors;
5128
5129 if (fit && (num_sectors == 0 || num_sectors > avail))
5130 num_sectors = avail;
5131 if (avail < num_sectors)
5132 return -ENOSPC;
5133 }
5134 rv = mddev->pers->resize(mddev, num_sectors);
5135 if (!rv)
5136 revalidate_disk(mddev->gendisk);
5137 return rv;
5138}
5139
5140static int update_raid_disks(mddev_t *mddev, int raid_disks)
5141{
5142 int rv;
5143
5144 if (mddev->pers->check_reshape == NULL)
5145 return -EINVAL;
5146 if (raid_disks <= 0 ||
5147 raid_disks >= mddev->max_disks)
5148 return -EINVAL;
5149 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
5150 return -EBUSY;
5151 mddev->delta_disks = raid_disks - mddev->raid_disks;
5152
5153 rv = mddev->pers->check_reshape(mddev);
5154 return rv;
5155}
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
5167{
5168 int rv = 0;
5169 int cnt = 0;
5170 int state = 0;
5171
5172
5173 if (mddev->bitmap && mddev->bitmap_offset)
5174 state |= (1 << MD_SB_BITMAP_PRESENT);
5175
5176 if (mddev->major_version != info->major_version ||
5177 mddev->minor_version != info->minor_version ||
5178
5179 mddev->ctime != info->ctime ||
5180 mddev->level != info->level ||
5181
5182 !mddev->persistent != info->not_persistent||
5183 mddev->chunk_sectors != info->chunk_size >> 9 ||
5184
5185 ((state^info->state) & 0xfffffe00)
5186 )
5187 return -EINVAL;
5188
5189 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
5190 cnt++;
5191 if (mddev->raid_disks != info->raid_disks)
5192 cnt++;
5193 if (mddev->layout != info->layout)
5194 cnt++;
5195 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
5196 cnt++;
5197 if (cnt == 0)
5198 return 0;
5199 if (cnt > 1)
5200 return -EINVAL;
5201
5202 if (mddev->layout != info->layout) {
5203
5204
5205
5206
5207 if (mddev->pers->check_reshape == NULL)
5208 return -EINVAL;
5209 else {
5210 mddev->new_layout = info->layout;
5211 rv = mddev->pers->check_reshape(mddev);
5212 if (rv)
5213 mddev->new_layout = mddev->layout;
5214 return rv;
5215 }
5216 }
5217 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
5218 rv = update_size(mddev, (sector_t)info->size * 2);
5219
5220 if (mddev->raid_disks != info->raid_disks)
5221 rv = update_raid_disks(mddev, info->raid_disks);
5222
5223 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
5224 if (mddev->pers->quiesce == NULL)
5225 return -EINVAL;
5226 if (mddev->recovery || mddev->sync_thread)
5227 return -EBUSY;
5228 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
5229
5230 if (mddev->bitmap)
5231 return -EEXIST;
5232 if (mddev->default_bitmap_offset == 0)
5233 return -EINVAL;
5234 mddev->bitmap_offset = mddev->default_bitmap_offset;
5235 mddev->pers->quiesce(mddev, 1);
5236 rv = bitmap_create(mddev);
5237 if (rv)
5238 bitmap_destroy(mddev);
5239 mddev->pers->quiesce(mddev, 0);
5240 } else {
5241
5242 if (!mddev->bitmap)
5243 return -ENOENT;
5244 if (mddev->bitmap->file)
5245 return -EINVAL;
5246 mddev->pers->quiesce(mddev, 1);
5247 bitmap_destroy(mddev);
5248 mddev->pers->quiesce(mddev, 0);
5249 mddev->bitmap_offset = 0;
5250 }
5251 }
5252 md_update_sb(mddev, 1);
5253 return rv;
5254}
5255
5256static int set_disk_faulty(mddev_t *mddev, dev_t dev)
5257{
5258 mdk_rdev_t *rdev;
5259
5260 if (mddev->pers == NULL)
5261 return -ENODEV;
5262
5263 rdev = find_rdev(mddev, dev);
5264 if (!rdev)
5265 return -ENODEV;
5266
5267 md_error(mddev, rdev);
5268 return 0;
5269}
5270
5271
5272
5273
5274
5275
5276
5277static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
5278{
5279 mddev_t *mddev = bdev->bd_disk->private_data;
5280
5281 geo->heads = 2;
5282 geo->sectors = 4;
5283 geo->cylinders = get_capacity(mddev->gendisk) / 8;
5284 return 0;
5285}
5286
5287static int md_ioctl(struct block_device *bdev, fmode_t mode,
5288 unsigned int cmd, unsigned long arg)
5289{
5290 int err = 0;
5291 void __user *argp = (void __user *)arg;
5292 mddev_t *mddev = NULL;
5293
5294 if (!capable(CAP_SYS_ADMIN))
5295 return -EACCES;
5296
5297
5298
5299
5300
5301 switch (cmd)
5302 {
5303 case RAID_VERSION:
5304 err = get_version(argp);
5305 goto done;
5306
5307 case PRINT_RAID_DEBUG:
5308 err = 0;
5309 md_print_devices();
5310 goto done;
5311
5312#ifndef MODULE
5313 case RAID_AUTORUN:
5314 err = 0;
5315 autostart_arrays(arg);
5316 goto done;
5317#endif
5318 default:;
5319 }
5320
5321
5322
5323
5324
5325 mddev = bdev->bd_disk->private_data;
5326
5327 if (!mddev) {
5328 BUG();
5329 goto abort;
5330 }
5331
5332 err = mddev_lock(mddev);
5333 if (err) {
5334 printk(KERN_INFO
5335 "md: ioctl lock interrupted, reason %d, cmd %d\n",
5336 err, cmd);
5337 goto abort;
5338 }
5339
5340 switch (cmd)
5341 {
5342 case SET_ARRAY_INFO:
5343 {
5344 mdu_array_info_t info;
5345 if (!arg)
5346 memset(&info, 0, sizeof(info));
5347 else if (copy_from_user(&info, argp, sizeof(info))) {
5348 err = -EFAULT;
5349 goto abort_unlock;
5350 }
5351 if (mddev->pers) {
5352 err = update_array_info(mddev, &info);
5353 if (err) {
5354 printk(KERN_WARNING "md: couldn't update"
5355 " array info. %d\n", err);
5356 goto abort_unlock;
5357 }
5358 goto done_unlock;
5359 }
5360 if (!list_empty(&mddev->disks)) {
5361 printk(KERN_WARNING
5362 "md: array %s already has disks!\n",
5363 mdname(mddev));
5364 err = -EBUSY;
5365 goto abort_unlock;
5366 }
5367 if (mddev->raid_disks) {
5368 printk(KERN_WARNING
5369 "md: array %s already initialised!\n",
5370 mdname(mddev));
5371 err = -EBUSY;
5372 goto abort_unlock;
5373 }
5374 err = set_array_info(mddev, &info);
5375 if (err) {
5376 printk(KERN_WARNING "md: couldn't set"
5377 " array info. %d\n", err);
5378 goto abort_unlock;
5379 }
5380 }
5381 goto done_unlock;
5382
5383 default:;
5384 }
5385
5386
5387
5388
5389
5390
5391 if ((!mddev->raid_disks && !mddev->external)
5392 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
5393 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
5394 && cmd != GET_BITMAP_FILE) {
5395 err = -ENODEV;
5396 goto abort_unlock;
5397 }
5398
5399
5400
5401
5402 switch (cmd)
5403 {
5404 case GET_ARRAY_INFO:
5405 err = get_array_info(mddev, argp);
5406 goto done_unlock;
5407
5408 case GET_BITMAP_FILE:
5409 err = get_bitmap_file(mddev, argp);
5410 goto done_unlock;
5411
5412 case GET_DISK_INFO:
5413 err = get_disk_info(mddev, argp);
5414 goto done_unlock;
5415
5416 case RESTART_ARRAY_RW:
5417 err = restart_array(mddev);
5418 goto done_unlock;
5419
5420 case STOP_ARRAY:
5421 err = do_md_stop(mddev, 0, 1);
5422 goto done_unlock;
5423
5424 case STOP_ARRAY_RO:
5425 err = do_md_stop(mddev, 1, 1);
5426 goto done_unlock;
5427
5428 }
5429
5430
5431
5432
5433
5434
5435
5436
5437 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
5438 if (mddev->ro == 2) {
5439 mddev->ro = 0;
5440 sysfs_notify_dirent(mddev->sysfs_state);
5441 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5442 md_wakeup_thread(mddev->thread);
5443 } else {
5444 err = -EROFS;
5445 goto abort_unlock;
5446 }
5447 }
5448
5449 switch (cmd)
5450 {
5451 case ADD_NEW_DISK:
5452 {
5453 mdu_disk_info_t info;
5454 if (copy_from_user(&info, argp, sizeof(info)))
5455 err = -EFAULT;
5456 else
5457 err = add_new_disk(mddev, &info);
5458 goto done_unlock;
5459 }
5460
5461 case HOT_REMOVE_DISK:
5462 err = hot_remove_disk(mddev, new_decode_dev(arg));
5463 goto done_unlock;
5464
5465 case HOT_ADD_DISK:
5466 err = hot_add_disk(mddev, new_decode_dev(arg));
5467 goto done_unlock;
5468
5469 case SET_DISK_FAULTY:
5470 err = set_disk_faulty(mddev, new_decode_dev(arg));
5471 goto done_unlock;
5472
5473 case RUN_ARRAY:
5474 err = do_md_run(mddev);
5475 goto done_unlock;
5476
5477 case SET_BITMAP_FILE:
5478 err = set_bitmap_file(mddev, (int)arg);
5479 goto done_unlock;
5480
5481 default:
5482 err = -EINVAL;
5483 goto abort_unlock;
5484 }
5485
5486done_unlock:
5487abort_unlock:
5488 if (mddev->hold_active == UNTIL_IOCTL &&
5489 err != -EINVAL)
5490 mddev->hold_active = 0;
5491 mddev_unlock(mddev);
5492
5493 return err;
5494done:
5495 if (err)
5496 MD_BUG();
5497abort:
5498 return err;
5499}
5500
5501static int md_open(struct block_device *bdev, fmode_t mode)
5502{
5503
5504
5505
5506
5507 mddev_t *mddev = mddev_find(bdev->bd_dev);
5508 int err;
5509
5510 if (mddev->gendisk != bdev->bd_disk) {
5511
5512
5513
5514 mddev_put(mddev);
5515
5516 flush_scheduled_work();
5517
5518 return -ERESTARTSYS;
5519 }
5520 BUG_ON(mddev != bdev->bd_disk->private_data);
5521
5522 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
5523 goto out;
5524
5525 err = 0;
5526 atomic_inc(&mddev->openers);
5527 mutex_unlock(&mddev->open_mutex);
5528
5529 check_disk_change(bdev);
5530 out:
5531 return err;
5532}
5533
5534static int md_release(struct gendisk *disk, fmode_t mode)
5535{
5536 mddev_t *mddev = disk->private_data;
5537
5538 BUG_ON(!mddev);
5539 atomic_dec(&mddev->openers);
5540 mddev_put(mddev);
5541
5542 return 0;
5543}
5544
5545static int md_media_changed(struct gendisk *disk)
5546{
5547 mddev_t *mddev = disk->private_data;
5548
5549 return mddev->changed;
5550}
5551
5552static int md_revalidate(struct gendisk *disk)
5553{
5554 mddev_t *mddev = disk->private_data;
5555
5556 mddev->changed = 0;
5557 return 0;
5558}
5559static struct block_device_operations md_fops =
5560{
5561 .owner = THIS_MODULE,
5562 .open = md_open,
5563 .release = md_release,
5564 .ioctl = md_ioctl,
5565 .getgeo = md_getgeo,
5566 .media_changed = md_media_changed,
5567 .revalidate_disk= md_revalidate,
5568};
5569
5570static int md_thread(void * arg)
5571{
5572 mdk_thread_t *thread = arg;
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586 allow_signal(SIGKILL);
5587 while (!kthread_should_stop()) {
5588
5589
5590
5591
5592
5593
5594 if (signal_pending(current))
5595 flush_signals(current);
5596
5597 wait_event_interruptible_timeout
5598 (thread->wqueue,
5599 test_bit(THREAD_WAKEUP, &thread->flags)
5600 || kthread_should_stop(),
5601 thread->timeout);
5602
5603 clear_bit(THREAD_WAKEUP, &thread->flags);
5604
5605 thread->run(thread->mddev);
5606 }
5607
5608 return 0;
5609}
5610
5611void md_wakeup_thread(mdk_thread_t *thread)
5612{
5613 if (thread) {
5614 dprintk("md: waking up MD thread %s.\n", thread->tsk->comm);
5615 set_bit(THREAD_WAKEUP, &thread->flags);
5616 wake_up(&thread->wqueue);
5617 }
5618}
5619
5620mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
5621 const char *name)
5622{
5623 mdk_thread_t *thread;
5624
5625 thread = kzalloc(sizeof(mdk_thread_t), GFP_KERNEL);
5626 if (!thread)
5627 return NULL;
5628
5629 init_waitqueue_head(&thread->wqueue);
5630
5631 thread->run = run;
5632 thread->mddev = mddev;
5633 thread->timeout = MAX_SCHEDULE_TIMEOUT;
5634 thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev));
5635 if (IS_ERR(thread->tsk)) {
5636 kfree(thread);
5637 return NULL;
5638 }
5639 return thread;
5640}
5641
5642void md_unregister_thread(mdk_thread_t *thread)
5643{
5644 if (!thread)
5645 return;
5646 dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
5647
5648 kthread_stop(thread->tsk);
5649 kfree(thread);
5650}
5651
5652void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
5653{
5654 if (!mddev) {
5655 MD_BUG();
5656 return;
5657 }
5658
5659 if (!rdev || test_bit(Faulty, &rdev->flags))
5660 return;
5661
5662 if (mddev->external)
5663 set_bit(Blocked, &rdev->flags);
5664
5665
5666
5667
5668
5669
5670
5671 if (!mddev->pers)
5672 return;
5673 if (!mddev->pers->error_handler)
5674 return;
5675 mddev->pers->error_handler(mddev,rdev);
5676 if (mddev->degraded)
5677 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5678 set_bit(StateChanged, &rdev->flags);
5679 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5680 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5681 md_wakeup_thread(mddev->thread);
5682 md_new_event_inintr(mddev);
5683}
5684
5685
5686
5687static void status_unused(struct seq_file *seq)
5688{
5689 int i = 0;
5690 mdk_rdev_t *rdev;
5691
5692 seq_printf(seq, "unused devices: ");
5693
5694 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
5695 char b[BDEVNAME_SIZE];
5696 i++;
5697 seq_printf(seq, "%s ",
5698 bdevname(rdev->bdev,b));
5699 }
5700 if (!i)
5701 seq_printf(seq, "<none>");
5702
5703 seq_printf(seq, "\n");
5704}
5705
5706
5707static void status_resync(struct seq_file *seq, mddev_t * mddev)
5708{
5709 sector_t max_sectors, resync, res;
5710 unsigned long dt, db;
5711 sector_t rt;
5712 int scale;
5713 unsigned int per_milli;
5714
5715 resync = mddev->curr_resync - atomic_read(&mddev->recovery_active);
5716
5717 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
5718 max_sectors = mddev->resync_max_sectors;
5719 else
5720 max_sectors = mddev->dev_sectors;
5721
5722
5723
5724
5725 if (!max_sectors) {
5726 MD_BUG();
5727 return;
5728 }
5729
5730
5731
5732
5733
5734 scale = 10;
5735 if (sizeof(sector_t) > sizeof(unsigned long)) {
5736 while ( max_sectors/2 > (1ULL<<(scale+32)))
5737 scale++;
5738 }
5739 res = (resync>>scale)*1000;
5740 sector_div(res, (u32)((max_sectors>>scale)+1));
5741
5742 per_milli = res;
5743 {
5744 int i, x = per_milli/50, y = 20-x;
5745 seq_printf(seq, "[");
5746 for (i = 0; i < x; i++)
5747 seq_printf(seq, "=");
5748 seq_printf(seq, ">");
5749 for (i = 0; i < y; i++)
5750 seq_printf(seq, ".");
5751 seq_printf(seq, "] ");
5752 }
5753 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
5754 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
5755 "reshape" :
5756 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
5757 "check" :
5758 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
5759 "resync" : "recovery"))),
5760 per_milli/10, per_milli % 10,
5761 (unsigned long long) resync/2,
5762 (unsigned long long) max_sectors/2);
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778 dt = ((jiffies - mddev->resync_mark) / HZ);
5779 if (!dt) dt++;
5780 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
5781 - mddev->resync_mark_cnt;
5782
5783 rt = max_sectors - resync;
5784 sector_div(rt, db/32+1);
5785 rt *= dt;
5786 rt >>= 5;
5787
5788 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
5789 ((unsigned long)rt % 60)/6);
5790
5791 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
5792}
5793
5794static void *md_seq_start(struct seq_file *seq, loff_t *pos)
5795{
5796 struct list_head *tmp;
5797 loff_t l = *pos;
5798 mddev_t *mddev;
5799
5800 if (l >= 0x10000)
5801 return NULL;
5802 if (!l--)
5803
5804 return (void*)1;
5805
5806 spin_lock(&all_mddevs_lock);
5807 list_for_each(tmp,&all_mddevs)
5808 if (!l--) {
5809 mddev = list_entry(tmp, mddev_t, all_mddevs);
5810 mddev_get(mddev);
5811 spin_unlock(&all_mddevs_lock);
5812 return mddev;
5813 }
5814 spin_unlock(&all_mddevs_lock);
5815 if (!l--)
5816 return (void*)2;
5817 return NULL;
5818}
5819
5820static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
5821{
5822 struct list_head *tmp;
5823 mddev_t *next_mddev, *mddev = v;
5824
5825 ++*pos;
5826 if (v == (void*)2)
5827 return NULL;
5828
5829 spin_lock(&all_mddevs_lock);
5830 if (v == (void*)1)
5831 tmp = all_mddevs.next;
5832 else
5833 tmp = mddev->all_mddevs.next;
5834 if (tmp != &all_mddevs)
5835 next_mddev = mddev_get(list_entry(tmp,mddev_t,all_mddevs));
5836 else {
5837 next_mddev = (void*)2;
5838 *pos = 0x10000;
5839 }
5840 spin_unlock(&all_mddevs_lock);
5841
5842 if (v != (void*)1)
5843 mddev_put(mddev);
5844 return next_mddev;
5845
5846}
5847
5848static void md_seq_stop(struct seq_file *seq, void *v)
5849{
5850 mddev_t *mddev = v;
5851
5852 if (mddev && v != (void*)1 && v != (void*)2)
5853 mddev_put(mddev);
5854}
5855
5856struct mdstat_info {
5857 int event;
5858};
5859
5860static int md_seq_show(struct seq_file *seq, void *v)
5861{
5862 mddev_t *mddev = v;
5863 sector_t sectors;
5864 mdk_rdev_t *rdev;
5865 struct mdstat_info *mi = seq->private;
5866 struct bitmap *bitmap;
5867
5868 if (v == (void*)1) {
5869 struct mdk_personality *pers;
5870 seq_printf(seq, "Personalities : ");
5871 spin_lock(&pers_lock);
5872 list_for_each_entry(pers, &pers_list, list)
5873 seq_printf(seq, "[%s] ", pers->name);
5874
5875 spin_unlock(&pers_lock);
5876 seq_printf(seq, "\n");
5877 mi->event = atomic_read(&md_event_count);
5878 return 0;
5879 }
5880 if (v == (void*)2) {
5881 status_unused(seq);
5882 return 0;
5883 }
5884
5885 if (mddev_lock(mddev) < 0)
5886 return -EINTR;
5887
5888 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
5889 seq_printf(seq, "%s : %sactive", mdname(mddev),
5890 mddev->pers ? "" : "in");
5891 if (mddev->pers) {
5892 if (mddev->ro==1)
5893 seq_printf(seq, " (read-only)");
5894 if (mddev->ro==2)
5895 seq_printf(seq, " (auto-read-only)");
5896 seq_printf(seq, " %s", mddev->pers->name);
5897 }
5898
5899 sectors = 0;
5900 list_for_each_entry(rdev, &mddev->disks, same_set) {
5901 char b[BDEVNAME_SIZE];
5902 seq_printf(seq, " %s[%d]",
5903 bdevname(rdev->bdev,b), rdev->desc_nr);
5904 if (test_bit(WriteMostly, &rdev->flags))
5905 seq_printf(seq, "(W)");
5906 if (test_bit(Faulty, &rdev->flags)) {
5907 seq_printf(seq, "(F)");
5908 continue;
5909 } else if (rdev->raid_disk < 0)
5910 seq_printf(seq, "(S)");
5911 sectors += rdev->sectors;
5912 }
5913
5914 if (!list_empty(&mddev->disks)) {
5915 if (mddev->pers)
5916 seq_printf(seq, "\n %llu blocks",
5917 (unsigned long long)
5918 mddev->array_sectors / 2);
5919 else
5920 seq_printf(seq, "\n %llu blocks",
5921 (unsigned long long)sectors / 2);
5922 }
5923 if (mddev->persistent) {
5924 if (mddev->major_version != 0 ||
5925 mddev->minor_version != 90) {
5926 seq_printf(seq," super %d.%d",
5927 mddev->major_version,
5928 mddev->minor_version);
5929 }
5930 } else if (mddev->external)
5931 seq_printf(seq, " super external:%s",
5932 mddev->metadata_type);
5933 else
5934 seq_printf(seq, " super non-persistent");
5935
5936 if (mddev->pers) {
5937 mddev->pers->status(seq, mddev);
5938 seq_printf(seq, "\n ");
5939 if (mddev->pers->sync_request) {
5940 if (mddev->curr_resync > 2) {
5941 status_resync(seq, mddev);
5942 seq_printf(seq, "\n ");
5943 } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2)
5944 seq_printf(seq, "\tresync=DELAYED\n ");
5945 else if (mddev->recovery_cp < MaxSector)
5946 seq_printf(seq, "\tresync=PENDING\n ");
5947 }
5948 } else
5949 seq_printf(seq, "\n ");
5950
5951 if ((bitmap = mddev->bitmap)) {
5952 unsigned long chunk_kb;
5953 unsigned long flags;
5954 spin_lock_irqsave(&bitmap->lock, flags);
5955 chunk_kb = bitmap->chunksize >> 10;
5956 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
5957 "%lu%s chunk",
5958 bitmap->pages - bitmap->missing_pages,
5959 bitmap->pages,
5960 (bitmap->pages - bitmap->missing_pages)
5961 << (PAGE_SHIFT - 10),
5962 chunk_kb ? chunk_kb : bitmap->chunksize,
5963 chunk_kb ? "KB" : "B");
5964 if (bitmap->file) {
5965 seq_printf(seq, ", file: ");
5966 seq_path(seq, &bitmap->file->f_path, " \t\n");
5967 }
5968
5969 seq_printf(seq, "\n");
5970 spin_unlock_irqrestore(&bitmap->lock, flags);
5971 }
5972
5973 seq_printf(seq, "\n");
5974 }
5975 mddev_unlock(mddev);
5976
5977 return 0;
5978}
5979
5980static const struct seq_operations md_seq_ops = {
5981 .start = md_seq_start,
5982 .next = md_seq_next,
5983 .stop = md_seq_stop,
5984 .show = md_seq_show,
5985};
5986
5987static int md_seq_open(struct inode *inode, struct file *file)
5988{
5989 int error;
5990 struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL);
5991 if (mi == NULL)
5992 return -ENOMEM;
5993
5994 error = seq_open(file, &md_seq_ops);
5995 if (error)
5996 kfree(mi);
5997 else {
5998 struct seq_file *p = file->private_data;
5999 p->private = mi;
6000 mi->event = atomic_read(&md_event_count);
6001 }
6002 return error;
6003}
6004
6005static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
6006{
6007 struct seq_file *m = filp->private_data;
6008 struct mdstat_info *mi = m->private;
6009 int mask;
6010
6011 poll_wait(filp, &md_event_waiters, wait);
6012
6013
6014 mask = POLLIN | POLLRDNORM;
6015
6016 if (mi->event != atomic_read(&md_event_count))
6017 mask |= POLLERR | POLLPRI;
6018 return mask;
6019}
6020
6021static const struct file_operations md_seq_fops = {
6022 .owner = THIS_MODULE,
6023 .open = md_seq_open,
6024 .read = seq_read,
6025 .llseek = seq_lseek,
6026 .release = seq_release_private,
6027 .poll = mdstat_poll,
6028};
6029
6030int register_md_personality(struct mdk_personality *p)
6031{
6032 spin_lock(&pers_lock);
6033 list_add_tail(&p->list, &pers_list);
6034 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
6035 spin_unlock(&pers_lock);
6036 return 0;
6037}
6038
6039int unregister_md_personality(struct mdk_personality *p)
6040{
6041 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
6042 spin_lock(&pers_lock);
6043 list_del_init(&p->list);
6044 spin_unlock(&pers_lock);
6045 return 0;
6046}
6047
6048static int is_mddev_idle(mddev_t *mddev, int init)
6049{
6050 mdk_rdev_t * rdev;
6051 int idle;
6052 int curr_events;
6053
6054 idle = 1;
6055 rcu_read_lock();
6056 rdev_for_each_rcu(rdev, mddev) {
6057 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
6058 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
6059 (int)part_stat_read(&disk->part0, sectors[1]) -
6060 atomic_read(&disk->sync_io);
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083 if (init || curr_events - rdev->last_events > 64) {
6084 rdev->last_events = curr_events;
6085 idle = 0;
6086 }
6087 }
6088 rcu_read_unlock();
6089 return idle;
6090}
6091
6092void md_done_sync(mddev_t *mddev, int blocks, int ok)
6093{
6094
6095 atomic_sub(blocks, &mddev->recovery_active);
6096 wake_up(&mddev->recovery_wait);
6097 if (!ok) {
6098 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6099 md_wakeup_thread(mddev->thread);
6100
6101 }
6102}
6103
6104
6105
6106
6107
6108
6109
6110void md_write_start(mddev_t *mddev, struct bio *bi)
6111{
6112 int did_change = 0;
6113 if (bio_data_dir(bi) != WRITE)
6114 return;
6115
6116 BUG_ON(mddev->ro == 1);
6117 if (mddev->ro == 2) {
6118
6119 mddev->ro = 0;
6120 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6121 md_wakeup_thread(mddev->thread);
6122 md_wakeup_thread(mddev->sync_thread);
6123 did_change = 1;
6124 }
6125 atomic_inc(&mddev->writes_pending);
6126 if (mddev->safemode == 1)
6127 mddev->safemode = 0;
6128 if (mddev->in_sync) {
6129 spin_lock_irq(&mddev->write_lock);
6130 if (mddev->in_sync) {
6131 mddev->in_sync = 0;
6132 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6133 md_wakeup_thread(mddev->thread);
6134 did_change = 1;
6135 }
6136 spin_unlock_irq(&mddev->write_lock);
6137 }
6138 if (did_change)
6139 sysfs_notify_dirent(mddev->sysfs_state);
6140 wait_event(mddev->sb_wait,
6141 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
6142 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
6143}
6144
6145void md_write_end(mddev_t *mddev)
6146{
6147 if (atomic_dec_and_test(&mddev->writes_pending)) {
6148 if (mddev->safemode == 2)
6149 md_wakeup_thread(mddev->thread);
6150 else if (mddev->safemode_delay)
6151 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
6152 }
6153}
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164int md_allow_write(mddev_t *mddev)
6165{
6166 if (!mddev->pers)
6167 return 0;
6168 if (mddev->ro)
6169 return 0;
6170 if (!mddev->pers->sync_request)
6171 return 0;
6172
6173 spin_lock_irq(&mddev->write_lock);
6174 if (mddev->in_sync) {
6175 mddev->in_sync = 0;
6176 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6177 if (mddev->safemode_delay &&
6178 mddev->safemode == 0)
6179 mddev->safemode = 1;
6180 spin_unlock_irq(&mddev->write_lock);
6181 md_update_sb(mddev, 0);
6182 sysfs_notify_dirent(mddev->sysfs_state);
6183 } else
6184 spin_unlock_irq(&mddev->write_lock);
6185
6186 if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
6187 return -EAGAIN;
6188 else
6189 return 0;
6190}
6191EXPORT_SYMBOL_GPL(md_allow_write);
6192
6193#define SYNC_MARKS 10
6194#define SYNC_MARK_STEP (3*HZ)
6195void md_do_sync(mddev_t *mddev)
6196{
6197 mddev_t *mddev2;
6198 unsigned int currspeed = 0,
6199 window;
6200 sector_t max_sectors,j, io_sectors;
6201 unsigned long mark[SYNC_MARKS];
6202 sector_t mark_cnt[SYNC_MARKS];
6203 int last_mark,m;
6204 struct list_head *tmp;
6205 sector_t last_check;
6206 int skipped = 0;
6207 mdk_rdev_t *rdev;
6208 char *desc;
6209
6210
6211 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
6212 return;
6213 if (mddev->ro)
6214 return;
6215
6216 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
6217 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
6218 desc = "data-check";
6219 else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
6220 desc = "requested-resync";
6221 else
6222 desc = "resync";
6223 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6224 desc = "reshape";
6225 else
6226 desc = "recovery";
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244 do {
6245 mddev->curr_resync = 2;
6246
6247 try_again:
6248 if (kthread_should_stop()) {
6249 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6250 goto skip;
6251 }
6252 for_each_mddev(mddev2, tmp) {
6253 if (mddev2 == mddev)
6254 continue;
6255 if (!mddev->parallel_resync
6256 && mddev2->curr_resync
6257 && match_mddev_units(mddev, mddev2)) {
6258 DEFINE_WAIT(wq);
6259 if (mddev < mddev2 && mddev->curr_resync == 2) {
6260
6261 mddev->curr_resync = 1;
6262 wake_up(&resync_wait);
6263 }
6264 if (mddev > mddev2 && mddev->curr_resync == 1)
6265
6266
6267
6268 continue;
6269
6270
6271
6272
6273 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
6274 if (!kthread_should_stop() &&
6275 mddev2->curr_resync >= mddev->curr_resync) {
6276 printk(KERN_INFO "md: delaying %s of %s"
6277 " until %s has finished (they"
6278 " share one or more physical units)\n",
6279 desc, mdname(mddev), mdname(mddev2));
6280 mddev_put(mddev2);
6281 if (signal_pending(current))
6282 flush_signals(current);
6283 schedule();
6284 finish_wait(&resync_wait, &wq);
6285 goto try_again;
6286 }
6287 finish_wait(&resync_wait, &wq);
6288 }
6289 }
6290 } while (mddev->curr_resync < 2);
6291
6292 j = 0;
6293 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
6294
6295
6296
6297 max_sectors = mddev->resync_max_sectors;
6298 mddev->resync_mismatches = 0;
6299
6300 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
6301 j = mddev->resync_min;
6302 else if (!mddev->bitmap)
6303 j = mddev->recovery_cp;
6304
6305 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6306 max_sectors = mddev->dev_sectors;
6307 else {
6308
6309 max_sectors = mddev->dev_sectors;
6310 j = MaxSector;
6311 list_for_each_entry(rdev, &mddev->disks, same_set)
6312 if (rdev->raid_disk >= 0 &&
6313 !test_bit(Faulty, &rdev->flags) &&
6314 !test_bit(In_sync, &rdev->flags) &&
6315 rdev->recovery_offset < j)
6316 j = rdev->recovery_offset;
6317 }
6318
6319 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
6320 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
6321 " %d KB/sec/disk.\n", speed_min(mddev));
6322 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
6323 "(but not more than %d KB/sec) for %s.\n",
6324 speed_max(mddev), desc);
6325
6326 is_mddev_idle(mddev, 1);
6327
6328 io_sectors = 0;
6329 for (m = 0; m < SYNC_MARKS; m++) {
6330 mark[m] = jiffies;
6331 mark_cnt[m] = io_sectors;
6332 }
6333 last_mark = 0;
6334 mddev->resync_mark = mark[last_mark];
6335 mddev->resync_mark_cnt = mark_cnt[last_mark];
6336
6337
6338
6339
6340 window = 32*(PAGE_SIZE/512);
6341 printk(KERN_INFO "md: using %dk window, over a total of %llu blocks.\n",
6342 window/2,(unsigned long long) max_sectors/2);
6343
6344 atomic_set(&mddev->recovery_active, 0);
6345 last_check = 0;
6346
6347 if (j>2) {
6348 printk(KERN_INFO
6349 "md: resuming %s of %s from checkpoint.\n",
6350 desc, mdname(mddev));
6351 mddev->curr_resync = j;
6352 }
6353
6354 while (j < max_sectors) {
6355 sector_t sectors;
6356
6357 skipped = 0;
6358
6359 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
6360 ((mddev->curr_resync > mddev->curr_resync_completed &&
6361 (mddev->curr_resync - mddev->curr_resync_completed)
6362 > (max_sectors >> 4)) ||
6363 (j - mddev->curr_resync_completed)*2
6364 >= mddev->resync_max - mddev->curr_resync_completed
6365 )) {
6366
6367 blk_unplug(mddev->queue);
6368 wait_event(mddev->recovery_wait,
6369 atomic_read(&mddev->recovery_active) == 0);
6370 mddev->curr_resync_completed =
6371 mddev->curr_resync;
6372 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6373 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6374 }
6375
6376 while (j >= mddev->resync_max && !kthread_should_stop()) {
6377
6378
6379
6380
6381 flush_signals(current);
6382 wait_event_interruptible(mddev->recovery_wait,
6383 mddev->resync_max > j
6384 || kthread_should_stop());
6385 }
6386
6387 if (kthread_should_stop())
6388 goto interrupted;
6389
6390 sectors = mddev->pers->sync_request(mddev, j, &skipped,
6391 currspeed < speed_min(mddev));
6392 if (sectors == 0) {
6393 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6394 goto out;
6395 }
6396
6397 if (!skipped) {
6398 io_sectors += sectors;
6399 atomic_add(sectors, &mddev->recovery_active);
6400 }
6401
6402 j += sectors;
6403 if (j>1) mddev->curr_resync = j;
6404 mddev->curr_mark_cnt = io_sectors;
6405 if (last_check == 0)
6406
6407
6408
6409 md_new_event(mddev);
6410
6411 if (last_check + window > io_sectors || j == max_sectors)
6412 continue;
6413
6414 last_check = io_sectors;
6415
6416 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6417 break;
6418
6419 repeat:
6420 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
6421
6422 int next = (last_mark+1) % SYNC_MARKS;
6423
6424 mddev->resync_mark = mark[next];
6425 mddev->resync_mark_cnt = mark_cnt[next];
6426 mark[next] = jiffies;
6427 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
6428 last_mark = next;
6429 }
6430
6431
6432 if (kthread_should_stop())
6433 goto interrupted;
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444 blk_unplug(mddev->queue);
6445 cond_resched();
6446
6447 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
6448 /((jiffies-mddev->resync_mark)/HZ +1) +1;
6449
6450 if (currspeed > speed_min(mddev)) {
6451 if ((currspeed > speed_max(mddev)) ||
6452 !is_mddev_idle(mddev, 0)) {
6453 msleep(500);
6454 goto repeat;
6455 }
6456 }
6457 }
6458 printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
6459
6460
6461
6462 out:
6463 blk_unplug(mddev->queue);
6464
6465 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
6466
6467
6468 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
6469
6470 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
6471 mddev->curr_resync > 2) {
6472 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
6473 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
6474 if (mddev->curr_resync >= mddev->recovery_cp) {
6475 printk(KERN_INFO
6476 "md: checkpointing %s of %s.\n",
6477 desc, mdname(mddev));
6478 mddev->recovery_cp = mddev->curr_resync;
6479 }
6480 } else
6481 mddev->recovery_cp = MaxSector;
6482 } else {
6483 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6484 mddev->curr_resync = MaxSector;
6485 list_for_each_entry(rdev, &mddev->disks, same_set)
6486 if (rdev->raid_disk >= 0 &&
6487 !test_bit(Faulty, &rdev->flags) &&
6488 !test_bit(In_sync, &rdev->flags) &&
6489 rdev->recovery_offset < mddev->curr_resync)
6490 rdev->recovery_offset = mddev->curr_resync;
6491 }
6492 }
6493 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6494
6495 skip:
6496 mddev->curr_resync = 0;
6497 mddev->curr_resync_completed = 0;
6498 mddev->resync_min = 0;
6499 mddev->resync_max = MaxSector;
6500 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6501 wake_up(&resync_wait);
6502 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
6503 md_wakeup_thread(mddev->thread);
6504 return;
6505
6506 interrupted:
6507
6508
6509
6510 printk(KERN_INFO
6511 "md: md_do_sync() got signal ... exiting\n");
6512 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6513 goto out;
6514
6515}
6516EXPORT_SYMBOL_GPL(md_do_sync);
6517
6518
6519static int remove_and_add_spares(mddev_t *mddev)
6520{
6521 mdk_rdev_t *rdev;
6522 int spares = 0;
6523
6524 mddev->curr_resync_completed = 0;
6525
6526 list_for_each_entry(rdev, &mddev->disks, same_set)
6527 if (rdev->raid_disk >= 0 &&
6528 !test_bit(Blocked, &rdev->flags) &&
6529 (test_bit(Faulty, &rdev->flags) ||
6530 ! test_bit(In_sync, &rdev->flags)) &&
6531 atomic_read(&rdev->nr_pending)==0) {
6532 if (mddev->pers->hot_remove_disk(
6533 mddev, rdev->raid_disk)==0) {
6534 char nm[20];
6535 sprintf(nm,"rd%d", rdev->raid_disk);
6536 sysfs_remove_link(&mddev->kobj, nm);
6537 rdev->raid_disk = -1;
6538 }
6539 }
6540
6541 if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
6542 list_for_each_entry(rdev, &mddev->disks, same_set) {
6543 if (rdev->raid_disk >= 0 &&
6544 !test_bit(In_sync, &rdev->flags) &&
6545 !test_bit(Blocked, &rdev->flags))
6546 spares++;
6547 if (rdev->raid_disk < 0
6548 && !test_bit(Faulty, &rdev->flags)) {
6549 rdev->recovery_offset = 0;
6550 if (mddev->pers->
6551 hot_add_disk(mddev, rdev) == 0) {
6552 char nm[20];
6553 sprintf(nm, "rd%d", rdev->raid_disk);
6554 if (sysfs_create_link(&mddev->kobj,
6555 &rdev->kobj, nm))
6556 printk(KERN_WARNING
6557 "md: cannot register "
6558 "%s for %s\n",
6559 nm, mdname(mddev));
6560 spares++;
6561 md_new_event(mddev);
6562 } else
6563 break;
6564 }
6565 }
6566 }
6567 return spares;
6568}
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591void md_check_recovery(mddev_t *mddev)
6592{
6593 mdk_rdev_t *rdev;
6594
6595
6596 if (mddev->bitmap)
6597 bitmap_daemon_work(mddev->bitmap);
6598
6599 if (mddev->ro)
6600 return;
6601
6602 if (signal_pending(current)) {
6603 if (mddev->pers->sync_request && !mddev->external) {
6604 printk(KERN_INFO "md: %s in immediate safe mode\n",
6605 mdname(mddev));
6606 mddev->safemode = 2;
6607 }
6608 flush_signals(current);
6609 }
6610
6611 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
6612 return;
6613 if ( ! (
6614 (mddev->flags && !mddev->external) ||
6615 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
6616 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
6617 (mddev->external == 0 && mddev->safemode == 1) ||
6618 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
6619 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
6620 ))
6621 return;
6622
6623 if (mddev_trylock(mddev)) {
6624 int spares = 0;
6625
6626 if (mddev->ro) {
6627
6628
6629
6630 remove_and_add_spares(mddev);
6631 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6632 goto unlock;
6633 }
6634
6635 if (!mddev->external) {
6636 int did_change = 0;
6637 spin_lock_irq(&mddev->write_lock);
6638 if (mddev->safemode &&
6639 !atomic_read(&mddev->writes_pending) &&
6640 !mddev->in_sync &&
6641 mddev->recovery_cp == MaxSector) {
6642 mddev->in_sync = 1;
6643 did_change = 1;
6644 if (mddev->persistent)
6645 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6646 }
6647 if (mddev->safemode == 1)
6648 mddev->safemode = 0;
6649 spin_unlock_irq(&mddev->write_lock);
6650 if (did_change)
6651 sysfs_notify_dirent(mddev->sysfs_state);
6652 }
6653
6654 if (mddev->flags)
6655 md_update_sb(mddev, 0);
6656
6657 list_for_each_entry(rdev, &mddev->disks, same_set)
6658 if (test_and_clear_bit(StateChanged, &rdev->flags))
6659 sysfs_notify_dirent(rdev->sysfs_state);
6660
6661
6662 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
6663 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
6664
6665 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6666 goto unlock;
6667 }
6668 if (mddev->sync_thread) {
6669
6670 md_unregister_thread(mddev->sync_thread);
6671 mddev->sync_thread = NULL;
6672 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
6673 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
6674
6675
6676 if (mddev->pers->spare_active(mddev))
6677 sysfs_notify(&mddev->kobj, NULL,
6678 "degraded");
6679 }
6680 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
6681 mddev->pers->finish_reshape)
6682 mddev->pers->finish_reshape(mddev);
6683 md_update_sb(mddev, 1);
6684
6685
6686
6687
6688 if (!mddev->degraded)
6689 list_for_each_entry(rdev, &mddev->disks, same_set)
6690 rdev->saved_raid_disk = -1;
6691
6692 mddev->recovery = 0;
6693
6694 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6695 sysfs_notify_dirent(mddev->sysfs_action);
6696 md_new_event(mddev);
6697 goto unlock;
6698 }
6699
6700
6701
6702 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
6703 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6704
6705
6706
6707 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
6708 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
6709
6710 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
6711 goto unlock;
6712
6713
6714
6715
6716
6717
6718
6719 if (mddev->reshape_position != MaxSector) {
6720 if (mddev->pers->check_reshape == NULL ||
6721 mddev->pers->check_reshape(mddev) != 0)
6722
6723 goto unlock;
6724 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
6725 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6726 } else if ((spares = remove_and_add_spares(mddev))) {
6727 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
6728 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
6729 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
6730 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6731 } else if (mddev->recovery_cp < MaxSector) {
6732 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
6733 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6734 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
6735
6736 goto unlock;
6737
6738 if (mddev->pers->sync_request) {
6739 if (spares && mddev->bitmap && ! mddev->bitmap->file) {
6740
6741
6742
6743
6744 bitmap_write_all(mddev->bitmap);
6745 }
6746 mddev->sync_thread = md_register_thread(md_do_sync,
6747 mddev,
6748 "%s_resync");
6749 if (!mddev->sync_thread) {
6750 printk(KERN_ERR "%s: could not start resync"
6751 " thread...\n",
6752 mdname(mddev));
6753
6754 mddev->recovery = 0;
6755 } else
6756 md_wakeup_thread(mddev->sync_thread);
6757 sysfs_notify_dirent(mddev->sysfs_action);
6758 md_new_event(mddev);
6759 }
6760 unlock:
6761 if (!mddev->sync_thread) {
6762 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
6763 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
6764 &mddev->recovery))
6765 if (mddev->sysfs_action)
6766 sysfs_notify_dirent(mddev->sysfs_action);
6767 }
6768 mddev_unlock(mddev);
6769 }
6770}
6771
6772void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
6773{
6774 sysfs_notify_dirent(rdev->sysfs_state);
6775 wait_event_timeout(rdev->blocked_wait,
6776 !test_bit(Blocked, &rdev->flags),
6777 msecs_to_jiffies(5000));
6778 rdev_dec_pending(rdev, mddev);
6779}
6780EXPORT_SYMBOL(md_wait_for_blocked_rdev);
6781
6782static int md_notify_reboot(struct notifier_block *this,
6783 unsigned long code, void *x)
6784{
6785 struct list_head *tmp;
6786 mddev_t *mddev;
6787
6788 if ((code == SYS_DOWN) || (code == SYS_HALT) || (code == SYS_POWER_OFF)) {
6789
6790 printk(KERN_INFO "md: stopping all md devices.\n");
6791
6792 for_each_mddev(mddev, tmp)
6793 if (mddev_trylock(mddev)) {
6794
6795
6796
6797
6798 do_md_stop(mddev, 1, 100);
6799 mddev_unlock(mddev);
6800 }
6801
6802
6803
6804
6805
6806
6807 mdelay(1000*1);
6808 }
6809 return NOTIFY_DONE;
6810}
6811
6812static struct notifier_block md_notifier = {
6813 .notifier_call = md_notify_reboot,
6814 .next = NULL,
6815 .priority = INT_MAX,
6816};
6817
6818static void md_geninit(void)
6819{
6820 dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
6821
6822 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
6823}
6824
6825static int __init md_init(void)
6826{
6827 if (register_blkdev(MD_MAJOR, "md"))
6828 return -1;
6829 if ((mdp_major=register_blkdev(0, "mdp"))<=0) {
6830 unregister_blkdev(MD_MAJOR, "md");
6831 return -1;
6832 }
6833 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
6834 md_probe, NULL, NULL);
6835 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
6836 md_probe, NULL, NULL);
6837
6838 register_reboot_notifier(&md_notifier);
6839 raid_table_header = register_sysctl_table(raid_root_table);
6840
6841 md_geninit();
6842 return 0;
6843}
6844
6845
6846#ifndef MODULE
6847
6848
6849
6850
6851
6852
6853static LIST_HEAD(all_detected_devices);
6854struct detected_devices_node {
6855 struct list_head list;
6856 dev_t dev;
6857};
6858
6859void md_autodetect_dev(dev_t dev)
6860{
6861 struct detected_devices_node *node_detected_dev;
6862
6863 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
6864 if (node_detected_dev) {
6865 node_detected_dev->dev = dev;
6866 list_add_tail(&node_detected_dev->list, &all_detected_devices);
6867 } else {
6868 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
6869 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
6870 }
6871}
6872
6873
6874static void autostart_arrays(int part)
6875{
6876 mdk_rdev_t *rdev;
6877 struct detected_devices_node *node_detected_dev;
6878 dev_t dev;
6879 int i_scanned, i_passed;
6880
6881 i_scanned = 0;
6882 i_passed = 0;
6883
6884 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
6885
6886 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
6887 i_scanned++;
6888 node_detected_dev = list_entry(all_detected_devices.next,
6889 struct detected_devices_node, list);
6890 list_del(&node_detected_dev->list);
6891 dev = node_detected_dev->dev;
6892 kfree(node_detected_dev);
6893 rdev = md_import_device(dev,0, 90);
6894 if (IS_ERR(rdev))
6895 continue;
6896
6897 if (test_bit(Faulty, &rdev->flags)) {
6898 MD_BUG();
6899 continue;
6900 }
6901 set_bit(AutoDetected, &rdev->flags);
6902 list_add(&rdev->same_set, &pending_raid_disks);
6903 i_passed++;
6904 }
6905
6906 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
6907 i_scanned, i_passed);
6908
6909 autorun_devices(part);
6910}
6911
6912#endif
6913
6914static __exit void md_exit(void)
6915{
6916 mddev_t *mddev;
6917 struct list_head *tmp;
6918
6919 blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
6920 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
6921
6922 unregister_blkdev(MD_MAJOR,"md");
6923 unregister_blkdev(mdp_major, "mdp");
6924 unregister_reboot_notifier(&md_notifier);
6925 unregister_sysctl_table(raid_table_header);
6926 remove_proc_entry("mdstat", NULL);
6927 for_each_mddev(mddev, tmp) {
6928 export_array(mddev);
6929 mddev->hold_active = 0;
6930 }
6931}
6932
6933subsys_initcall(md_init);
6934module_exit(md_exit)
6935
6936static int get_ro(char *buffer, struct kernel_param *kp)
6937{
6938 return sprintf(buffer, "%d", start_readonly);
6939}
6940static int set_ro(const char *val, struct kernel_param *kp)
6941{
6942 char *e;
6943 int num = simple_strtoul(val, &e, 10);
6944 if (*val && (*e == '\0' || *e == '\n')) {
6945 start_readonly = num;
6946 return 0;
6947 }
6948 return -EINVAL;
6949}
6950
6951module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
6952module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
6953
6954module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
6955
6956EXPORT_SYMBOL(register_md_personality);
6957EXPORT_SYMBOL(unregister_md_personality);
6958EXPORT_SYMBOL(md_error);
6959EXPORT_SYMBOL(md_done_sync);
6960EXPORT_SYMBOL(md_write_start);
6961EXPORT_SYMBOL(md_write_end);
6962EXPORT_SYMBOL(md_register_thread);
6963EXPORT_SYMBOL(md_unregister_thread);
6964EXPORT_SYMBOL(md_wakeup_thread);
6965EXPORT_SYMBOL(md_check_recovery);
6966MODULE_LICENSE("GPL");
6967MODULE_ALIAS("md");
6968MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
6969