1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/fs.h>
40#include <linux/poll.h>
41#include <linux/ctype.h>
42#include <linux/string.h>
43#include <linux/hdreg.h>
44#include <linux/proc_fs.h>
45#include <linux/random.h>
46#include <linux/module.h>
47#include <linux/reboot.h>
48#include <linux/file.h>
49#include <linux/compat.h>
50#include <linux/delay.h>
51#include <linux/raid/md_p.h>
52#include <linux/raid/md_u.h>
53#include <linux/slab.h>
54#include "md.h"
55#include "bitmap.h"
56
57#ifndef MODULE
58static void autostart_arrays(int part);
59#endif
60
61
62
63
64
65
66static LIST_HEAD(pers_list);
67static DEFINE_SPINLOCK(pers_lock);
68
69static void md_print_devices(void);
70
71static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
72static struct workqueue_struct *md_wq;
73static struct workqueue_struct *md_misc_wq;
74
75#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
76
77
78
79
80
81
82#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
83
84
85
86
87
88
89
90
91
92
93
94
95
96static int sysctl_speed_limit_min = 1000;
97static int sysctl_speed_limit_max = 200000;
98static inline int speed_min(struct mddev *mddev)
99{
100 return mddev->sync_speed_min ?
101 mddev->sync_speed_min : sysctl_speed_limit_min;
102}
103
104static inline int speed_max(struct mddev *mddev)
105{
106 return mddev->sync_speed_max ?
107 mddev->sync_speed_max : sysctl_speed_limit_max;
108}
109
110static struct ctl_table_header *raid_table_header;
111
112static ctl_table raid_table[] = {
113 {
114 .procname = "speed_limit_min",
115 .data = &sysctl_speed_limit_min,
116 .maxlen = sizeof(int),
117 .mode = S_IRUGO|S_IWUSR,
118 .proc_handler = proc_dointvec,
119 },
120 {
121 .procname = "speed_limit_max",
122 .data = &sysctl_speed_limit_max,
123 .maxlen = sizeof(int),
124 .mode = S_IRUGO|S_IWUSR,
125 .proc_handler = proc_dointvec,
126 },
127 { }
128};
129
130static ctl_table raid_dir_table[] = {
131 {
132 .procname = "raid",
133 .maxlen = 0,
134 .mode = S_IRUGO|S_IXUGO,
135 .child = raid_table,
136 },
137 { }
138};
139
140static ctl_table raid_root_table[] = {
141 {
142 .procname = "dev",
143 .maxlen = 0,
144 .mode = 0555,
145 .child = raid_dir_table,
146 },
147 { }
148};
149
150static const struct block_device_operations md_fops;
151
152static int start_readonly;
153
154
155
156
157
158static void mddev_bio_destructor(struct bio *bio)
159{
160 struct mddev *mddev, **mddevp;
161
162 mddevp = (void*)bio;
163 mddev = mddevp[-1];
164
165 bio_free(bio, mddev->bio_set);
166}
167
168struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
169 struct mddev *mddev)
170{
171 struct bio *b;
172 struct mddev **mddevp;
173
174 if (!mddev || !mddev->bio_set)
175 return bio_alloc(gfp_mask, nr_iovecs);
176
177 b = bio_alloc_bioset(gfp_mask, nr_iovecs,
178 mddev->bio_set);
179 if (!b)
180 return NULL;
181 mddevp = (void*)b;
182 mddevp[-1] = mddev;
183 b->bi_destructor = mddev_bio_destructor;
184 return b;
185}
186EXPORT_SYMBOL_GPL(bio_alloc_mddev);
187
188struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
189 struct mddev *mddev)
190{
191 struct bio *b;
192 struct mddev **mddevp;
193
194 if (!mddev || !mddev->bio_set)
195 return bio_clone(bio, gfp_mask);
196
197 b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs,
198 mddev->bio_set);
199 if (!b)
200 return NULL;
201 mddevp = (void*)b;
202 mddevp[-1] = mddev;
203 b->bi_destructor = mddev_bio_destructor;
204 __bio_clone(b, bio);
205 if (bio_integrity(bio)) {
206 int ret;
207
208 ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
209
210 if (ret < 0) {
211 bio_put(b);
212 return NULL;
213 }
214 }
215
216 return b;
217}
218EXPORT_SYMBOL_GPL(bio_clone_mddev);
219
220void md_trim_bio(struct bio *bio, int offset, int size)
221{
222
223
224
225
226 int i;
227 struct bio_vec *bvec;
228 int sofar = 0;
229
230 size <<= 9;
231 if (offset == 0 && size == bio->bi_size)
232 return;
233
234 bio->bi_sector += offset;
235 bio->bi_size = size;
236 offset <<= 9;
237 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
238
239 while (bio->bi_idx < bio->bi_vcnt &&
240 bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
241
242 offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
243 bio->bi_idx++;
244 }
245 if (bio->bi_idx < bio->bi_vcnt) {
246 bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
247 bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
248 }
249
250 if (bio->bi_idx) {
251 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
252 (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
253 bio->bi_vcnt -= bio->bi_idx;
254 bio->bi_idx = 0;
255 }
256
257 bio_for_each_segment(bvec, bio, i) {
258 if (sofar + bvec->bv_len > size)
259 bvec->bv_len = size - sofar;
260 if (bvec->bv_len == 0) {
261 bio->bi_vcnt = i;
262 break;
263 }
264 sofar += bvec->bv_len;
265 }
266}
267EXPORT_SYMBOL_GPL(md_trim_bio);
268
269
270
271
272
273
274
275
276
277
278
279static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
280static atomic_t md_event_count;
281void md_new_event(struct mddev *mddev)
282{
283 atomic_inc(&md_event_count);
284 wake_up(&md_event_waiters);
285}
286EXPORT_SYMBOL_GPL(md_new_event);
287
288
289
290
291static void md_new_event_inintr(struct mddev *mddev)
292{
293 atomic_inc(&md_event_count);
294 wake_up(&md_event_waiters);
295}
296
297
298
299
300
301static LIST_HEAD(all_mddevs);
302static DEFINE_SPINLOCK(all_mddevs_lock);
303
304
305
306
307
308
309
310
311
312#define for_each_mddev(_mddev,_tmp) \
313 \
314 for (({ spin_lock(&all_mddevs_lock); \
315 _tmp = all_mddevs.next; \
316 _mddev = NULL;}); \
317 ({ if (_tmp != &all_mddevs) \
318 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
319 spin_unlock(&all_mddevs_lock); \
320 if (_mddev) mddev_put(_mddev); \
321 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
322 _tmp != &all_mddevs;}); \
323 ({ spin_lock(&all_mddevs_lock); \
324 _tmp = _tmp->next;}) \
325 )
326
327
328
329
330
331
332
333
334
335static void md_make_request(struct request_queue *q, struct bio *bio)
336{
337 const int rw = bio_data_dir(bio);
338 struct mddev *mddev = q->queuedata;
339 int cpu;
340 unsigned int sectors;
341
342 if (mddev == NULL || mddev->pers == NULL
343 || !mddev->ready) {
344 bio_io_error(bio);
345 return;
346 }
347 smp_rmb();
348 rcu_read_lock();
349 if (mddev->suspended) {
350 DEFINE_WAIT(__wait);
351 for (;;) {
352 prepare_to_wait(&mddev->sb_wait, &__wait,
353 TASK_UNINTERRUPTIBLE);
354 if (!mddev->suspended)
355 break;
356 rcu_read_unlock();
357 schedule();
358 rcu_read_lock();
359 }
360 finish_wait(&mddev->sb_wait, &__wait);
361 }
362 atomic_inc(&mddev->active_io);
363 rcu_read_unlock();
364
365
366
367
368
369 sectors = bio_sectors(bio);
370 mddev->pers->make_request(mddev, bio);
371
372 cpu = part_stat_lock();
373 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
374 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
375 part_stat_unlock();
376
377 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
378 wake_up(&mddev->sb_wait);
379}
380
381
382
383
384
385
386
387void mddev_suspend(struct mddev *mddev)
388{
389 BUG_ON(mddev->suspended);
390 mddev->suspended = 1;
391 synchronize_rcu();
392 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
393 mddev->pers->quiesce(mddev, 1);
394
395 del_timer_sync(&mddev->safemode_timer);
396}
397EXPORT_SYMBOL_GPL(mddev_suspend);
398
399void mddev_resume(struct mddev *mddev)
400{
401 mddev->suspended = 0;
402 wake_up(&mddev->sb_wait);
403 mddev->pers->quiesce(mddev, 0);
404
405 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
406 md_wakeup_thread(mddev->thread);
407 md_wakeup_thread(mddev->sync_thread);
408}
409EXPORT_SYMBOL_GPL(mddev_resume);
410
411int mddev_congested(struct mddev *mddev, int bits)
412{
413 return mddev->suspended;
414}
415EXPORT_SYMBOL(mddev_congested);
416
417
418
419
420
421static void md_end_flush(struct bio *bio, int err)
422{
423 struct md_rdev *rdev = bio->bi_private;
424 struct mddev *mddev = rdev->mddev;
425
426 rdev_dec_pending(rdev, mddev);
427
428 if (atomic_dec_and_test(&mddev->flush_pending)) {
429
430 queue_work(md_wq, &mddev->flush_work);
431 }
432 bio_put(bio);
433}
434
435static void md_submit_flush_data(struct work_struct *ws);
436
437static void submit_flushes(struct work_struct *ws)
438{
439 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
440 struct md_rdev *rdev;
441
442 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
443 atomic_set(&mddev->flush_pending, 1);
444 rcu_read_lock();
445 rdev_for_each_rcu(rdev, mddev)
446 if (rdev->raid_disk >= 0 &&
447 !test_bit(Faulty, &rdev->flags)) {
448
449
450
451
452 struct bio *bi;
453 atomic_inc(&rdev->nr_pending);
454 atomic_inc(&rdev->nr_pending);
455 rcu_read_unlock();
456 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
457 bi->bi_end_io = md_end_flush;
458 bi->bi_private = rdev;
459 bi->bi_bdev = rdev->bdev;
460 atomic_inc(&mddev->flush_pending);
461 submit_bio(WRITE_FLUSH, bi);
462 rcu_read_lock();
463 rdev_dec_pending(rdev, mddev);
464 }
465 rcu_read_unlock();
466 if (atomic_dec_and_test(&mddev->flush_pending))
467 queue_work(md_wq, &mddev->flush_work);
468}
469
470static void md_submit_flush_data(struct work_struct *ws)
471{
472 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
473 struct bio *bio = mddev->flush_bio;
474
475 if (bio->bi_size == 0)
476
477 bio_endio(bio, 0);
478 else {
479 bio->bi_rw &= ~REQ_FLUSH;
480 mddev->pers->make_request(mddev, bio);
481 }
482
483 mddev->flush_bio = NULL;
484 wake_up(&mddev->sb_wait);
485}
486
487void md_flush_request(struct mddev *mddev, struct bio *bio)
488{
489 spin_lock_irq(&mddev->write_lock);
490 wait_event_lock_irq(mddev->sb_wait,
491 !mddev->flush_bio,
492 mddev->write_lock, );
493 mddev->flush_bio = bio;
494 spin_unlock_irq(&mddev->write_lock);
495
496 INIT_WORK(&mddev->flush_work, submit_flushes);
497 queue_work(md_wq, &mddev->flush_work);
498}
499EXPORT_SYMBOL(md_flush_request);
500
501void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
502{
503 struct mddev *mddev = cb->data;
504 md_wakeup_thread(mddev->thread);
505 kfree(cb);
506}
507EXPORT_SYMBOL(md_unplug);
508
509static inline struct mddev *mddev_get(struct mddev *mddev)
510{
511 atomic_inc(&mddev->active);
512 return mddev;
513}
514
515static void mddev_delayed_delete(struct work_struct *ws);
516
517static void mddev_put(struct mddev *mddev)
518{
519 struct bio_set *bs = NULL;
520
521 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
522 return;
523 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
524 mddev->ctime == 0 && !mddev->hold_active) {
525
526
527 list_del_init(&mddev->all_mddevs);
528 bs = mddev->bio_set;
529 mddev->bio_set = NULL;
530 if (mddev->gendisk) {
531
532
533
534
535
536 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
537 queue_work(md_misc_wq, &mddev->del_work);
538 } else
539 kfree(mddev);
540 }
541 spin_unlock(&all_mddevs_lock);
542 if (bs)
543 bioset_free(bs);
544}
545
546void mddev_init(struct mddev *mddev)
547{
548 mutex_init(&mddev->open_mutex);
549 mutex_init(&mddev->reconfig_mutex);
550 mutex_init(&mddev->bitmap_info.mutex);
551 INIT_LIST_HEAD(&mddev->disks);
552 INIT_LIST_HEAD(&mddev->all_mddevs);
553 init_timer(&mddev->safemode_timer);
554 atomic_set(&mddev->active, 1);
555 atomic_set(&mddev->openers, 0);
556 atomic_set(&mddev->active_io, 0);
557 spin_lock_init(&mddev->write_lock);
558 atomic_set(&mddev->flush_pending, 0);
559 init_waitqueue_head(&mddev->sb_wait);
560 init_waitqueue_head(&mddev->recovery_wait);
561 mddev->reshape_position = MaxSector;
562 mddev->reshape_backwards = 0;
563 mddev->resync_min = 0;
564 mddev->resync_max = MaxSector;
565 mddev->level = LEVEL_NONE;
566}
567EXPORT_SYMBOL_GPL(mddev_init);
568
569static struct mddev * mddev_find(dev_t unit)
570{
571 struct mddev *mddev, *new = NULL;
572
573 if (unit && MAJOR(unit) != MD_MAJOR)
574 unit &= ~((1<<MdpMinorShift)-1);
575
576 retry:
577 spin_lock(&all_mddevs_lock);
578
579 if (unit) {
580 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
581 if (mddev->unit == unit) {
582 mddev_get(mddev);
583 spin_unlock(&all_mddevs_lock);
584 kfree(new);
585 return mddev;
586 }
587
588 if (new) {
589 list_add(&new->all_mddevs, &all_mddevs);
590 spin_unlock(&all_mddevs_lock);
591 new->hold_active = UNTIL_IOCTL;
592 return new;
593 }
594 } else if (new) {
595
596 static int next_minor = 512;
597 int start = next_minor;
598 int is_free = 0;
599 int dev = 0;
600 while (!is_free) {
601 dev = MKDEV(MD_MAJOR, next_minor);
602 next_minor++;
603 if (next_minor > MINORMASK)
604 next_minor = 0;
605 if (next_minor == start) {
606
607 spin_unlock(&all_mddevs_lock);
608 kfree(new);
609 return NULL;
610 }
611
612 is_free = 1;
613 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
614 if (mddev->unit == dev) {
615 is_free = 0;
616 break;
617 }
618 }
619 new->unit = dev;
620 new->md_minor = MINOR(dev);
621 new->hold_active = UNTIL_STOP;
622 list_add(&new->all_mddevs, &all_mddevs);
623 spin_unlock(&all_mddevs_lock);
624 return new;
625 }
626 spin_unlock(&all_mddevs_lock);
627
628 new = kzalloc(sizeof(*new), GFP_KERNEL);
629 if (!new)
630 return NULL;
631
632 new->unit = unit;
633 if (MAJOR(unit) == MD_MAJOR)
634 new->md_minor = MINOR(unit);
635 else
636 new->md_minor = MINOR(unit) >> MdpMinorShift;
637
638 mddev_init(new);
639
640 goto retry;
641}
642
643static inline int mddev_lock(struct mddev * mddev)
644{
645 return mutex_lock_interruptible(&mddev->reconfig_mutex);
646}
647
648static inline int mddev_is_locked(struct mddev *mddev)
649{
650 return mutex_is_locked(&mddev->reconfig_mutex);
651}
652
653static inline int mddev_trylock(struct mddev * mddev)
654{
655 return mutex_trylock(&mddev->reconfig_mutex);
656}
657
658static struct attribute_group md_redundancy_group;
659
660static void mddev_unlock(struct mddev * mddev)
661{
662 if (mddev->to_remove) {
663
664
665
666
667
668
669
670
671
672
673
674
675 struct attribute_group *to_remove = mddev->to_remove;
676 mddev->to_remove = NULL;
677 mddev->sysfs_active = 1;
678 mutex_unlock(&mddev->reconfig_mutex);
679
680 if (mddev->kobj.sd) {
681 if (to_remove != &md_redundancy_group)
682 sysfs_remove_group(&mddev->kobj, to_remove);
683 if (mddev->pers == NULL ||
684 mddev->pers->sync_request == NULL) {
685 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
686 if (mddev->sysfs_action)
687 sysfs_put(mddev->sysfs_action);
688 mddev->sysfs_action = NULL;
689 }
690 }
691 mddev->sysfs_active = 0;
692 } else
693 mutex_unlock(&mddev->reconfig_mutex);
694
695
696
697
698 spin_lock(&pers_lock);
699 md_wakeup_thread(mddev->thread);
700 spin_unlock(&pers_lock);
701}
702
703static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr)
704{
705 struct md_rdev *rdev;
706
707 rdev_for_each(rdev, mddev)
708 if (rdev->desc_nr == nr)
709 return rdev;
710
711 return NULL;
712}
713
714static struct md_rdev * find_rdev(struct mddev * mddev, dev_t dev)
715{
716 struct md_rdev *rdev;
717
718 rdev_for_each(rdev, mddev)
719 if (rdev->bdev->bd_dev == dev)
720 return rdev;
721
722 return NULL;
723}
724
725static struct md_personality *find_pers(int level, char *clevel)
726{
727 struct md_personality *pers;
728 list_for_each_entry(pers, &pers_list, list) {
729 if (level != LEVEL_NONE && pers->level == level)
730 return pers;
731 if (strcmp(pers->name, clevel)==0)
732 return pers;
733 }
734 return NULL;
735}
736
737
738static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
739{
740 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
741 return MD_NEW_SIZE_SECTORS(num_sectors);
742}
743
744static int alloc_disk_sb(struct md_rdev * rdev)
745{
746 if (rdev->sb_page)
747 MD_BUG();
748
749 rdev->sb_page = alloc_page(GFP_KERNEL);
750 if (!rdev->sb_page) {
751 printk(KERN_ALERT "md: out of memory.\n");
752 return -ENOMEM;
753 }
754
755 return 0;
756}
757
758void md_rdev_clear(struct md_rdev *rdev)
759{
760 if (rdev->sb_page) {
761 put_page(rdev->sb_page);
762 rdev->sb_loaded = 0;
763 rdev->sb_page = NULL;
764 rdev->sb_start = 0;
765 rdev->sectors = 0;
766 }
767 if (rdev->bb_page) {
768 put_page(rdev->bb_page);
769 rdev->bb_page = NULL;
770 }
771 kfree(rdev->badblocks.page);
772 rdev->badblocks.page = NULL;
773}
774EXPORT_SYMBOL_GPL(md_rdev_clear);
775
776static void super_written(struct bio *bio, int error)
777{
778 struct md_rdev *rdev = bio->bi_private;
779 struct mddev *mddev = rdev->mddev;
780
781 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
782 printk("md: super_written gets error=%d, uptodate=%d\n",
783 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
784 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
785 md_error(mddev, rdev);
786 }
787
788 if (atomic_dec_and_test(&mddev->pending_writes))
789 wake_up(&mddev->sb_wait);
790 bio_put(bio);
791}
792
793void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
794 sector_t sector, int size, struct page *page)
795{
796
797
798
799
800
801
802 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
803
804 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
805 bio->bi_sector = sector;
806 bio_add_page(bio, page, size, 0);
807 bio->bi_private = rdev;
808 bio->bi_end_io = super_written;
809
810 atomic_inc(&mddev->pending_writes);
811 submit_bio(WRITE_FLUSH_FUA, bio);
812}
813
814void md_super_wait(struct mddev *mddev)
815{
816
817 DEFINE_WAIT(wq);
818 for(;;) {
819 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
820 if (atomic_read(&mddev->pending_writes)==0)
821 break;
822 schedule();
823 }
824 finish_wait(&mddev->sb_wait, &wq);
825}
826
827static void bi_complete(struct bio *bio, int error)
828{
829 complete((struct completion*)bio->bi_private);
830}
831
832int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
833 struct page *page, int rw, bool metadata_op)
834{
835 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
836 struct completion event;
837 int ret;
838
839 rw |= REQ_SYNC;
840
841 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
842 rdev->meta_bdev : rdev->bdev;
843 if (metadata_op)
844 bio->bi_sector = sector + rdev->sb_start;
845 else if (rdev->mddev->reshape_position != MaxSector &&
846 (rdev->mddev->reshape_backwards ==
847 (sector >= rdev->mddev->reshape_position)))
848 bio->bi_sector = sector + rdev->new_data_offset;
849 else
850 bio->bi_sector = sector + rdev->data_offset;
851 bio_add_page(bio, page, size, 0);
852 init_completion(&event);
853 bio->bi_private = &event;
854 bio->bi_end_io = bi_complete;
855 submit_bio(rw, bio);
856 wait_for_completion(&event);
857
858 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
859 bio_put(bio);
860 return ret;
861}
862EXPORT_SYMBOL_GPL(sync_page_io);
863
864static int read_disk_sb(struct md_rdev * rdev, int size)
865{
866 char b[BDEVNAME_SIZE];
867 if (!rdev->sb_page) {
868 MD_BUG();
869 return -EINVAL;
870 }
871 if (rdev->sb_loaded)
872 return 0;
873
874
875 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
876 goto fail;
877 rdev->sb_loaded = 1;
878 return 0;
879
880fail:
881 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
882 bdevname(rdev->bdev,b));
883 return -EINVAL;
884}
885
886static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
887{
888 return sb1->set_uuid0 == sb2->set_uuid0 &&
889 sb1->set_uuid1 == sb2->set_uuid1 &&
890 sb1->set_uuid2 == sb2->set_uuid2 &&
891 sb1->set_uuid3 == sb2->set_uuid3;
892}
893
894static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
895{
896 int ret;
897 mdp_super_t *tmp1, *tmp2;
898
899 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
900 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
901
902 if (!tmp1 || !tmp2) {
903 ret = 0;
904 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
905 goto abort;
906 }
907
908 *tmp1 = *sb1;
909 *tmp2 = *sb2;
910
911
912
913
914 tmp1->nr_disks = 0;
915 tmp2->nr_disks = 0;
916
917 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
918abort:
919 kfree(tmp1);
920 kfree(tmp2);
921 return ret;
922}
923
924
925static u32 md_csum_fold(u32 csum)
926{
927 csum = (csum & 0xffff) + (csum >> 16);
928 return (csum & 0xffff) + (csum >> 16);
929}
930
931static unsigned int calc_sb_csum(mdp_super_t * sb)
932{
933 u64 newcsum = 0;
934 u32 *sb32 = (u32*)sb;
935 int i;
936 unsigned int disk_csum, csum;
937
938 disk_csum = sb->sb_csum;
939 sb->sb_csum = 0;
940
941 for (i = 0; i < MD_SB_BYTES/4 ; i++)
942 newcsum += sb32[i];
943 csum = (newcsum & 0xffffffff) + (newcsum>>32);
944
945
946#ifdef CONFIG_ALPHA
947
948
949
950
951
952
953
954
955 sb->sb_csum = md_csum_fold(disk_csum);
956#else
957 sb->sb_csum = disk_csum;
958#endif
959 return csum;
960}
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993struct super_type {
994 char *name;
995 struct module *owner;
996 int (*load_super)(struct md_rdev *rdev,
997 struct md_rdev *refdev,
998 int minor_version);
999 int (*validate_super)(struct mddev *mddev,
1000 struct md_rdev *rdev);
1001 void (*sync_super)(struct mddev *mddev,
1002 struct md_rdev *rdev);
1003 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1004 sector_t num_sectors);
1005 int (*allow_new_offset)(struct md_rdev *rdev,
1006 unsigned long long new_offset);
1007};
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017int md_check_no_bitmap(struct mddev *mddev)
1018{
1019 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1020 return 0;
1021 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
1022 mdname(mddev), mddev->pers->name);
1023 return 1;
1024}
1025EXPORT_SYMBOL(md_check_no_bitmap);
1026
1027
1028
1029
1030static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1031{
1032 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1033 mdp_super_t *sb;
1034 int ret;
1035
1036
1037
1038
1039
1040
1041
1042 rdev->sb_start = calc_dev_sboffset(rdev);
1043
1044 ret = read_disk_sb(rdev, MD_SB_BYTES);
1045 if (ret) return ret;
1046
1047 ret = -EINVAL;
1048
1049 bdevname(rdev->bdev, b);
1050 sb = page_address(rdev->sb_page);
1051
1052 if (sb->md_magic != MD_SB_MAGIC) {
1053 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
1054 b);
1055 goto abort;
1056 }
1057
1058 if (sb->major_version != 0 ||
1059 sb->minor_version < 90 ||
1060 sb->minor_version > 91) {
1061 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
1062 sb->major_version, sb->minor_version,
1063 b);
1064 goto abort;
1065 }
1066
1067 if (sb->raid_disks <= 0)
1068 goto abort;
1069
1070 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1071 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
1072 b);
1073 goto abort;
1074 }
1075
1076 rdev->preferred_minor = sb->md_minor;
1077 rdev->data_offset = 0;
1078 rdev->new_data_offset = 0;
1079 rdev->sb_size = MD_SB_BYTES;
1080 rdev->badblocks.shift = -1;
1081
1082 if (sb->level == LEVEL_MULTIPATH)
1083 rdev->desc_nr = -1;
1084 else
1085 rdev->desc_nr = sb->this_disk.number;
1086
1087 if (!refdev) {
1088 ret = 1;
1089 } else {
1090 __u64 ev1, ev2;
1091 mdp_super_t *refsb = page_address(refdev->sb_page);
1092 if (!uuid_equal(refsb, sb)) {
1093 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1094 b, bdevname(refdev->bdev,b2));
1095 goto abort;
1096 }
1097 if (!sb_equal(refsb, sb)) {
1098 printk(KERN_WARNING "md: %s has same UUID"
1099 " but different superblock to %s\n",
1100 b, bdevname(refdev->bdev, b2));
1101 goto abort;
1102 }
1103 ev1 = md_event(sb);
1104 ev2 = md_event(refsb);
1105 if (ev1 > ev2)
1106 ret = 1;
1107 else
1108 ret = 0;
1109 }
1110 rdev->sectors = rdev->sb_start;
1111
1112
1113
1114
1115 if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1116 rdev->sectors = (2ULL << 32) - 2;
1117
1118 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1119
1120 ret = -EINVAL;
1121
1122 abort:
1123 return ret;
1124}
1125
1126
1127
1128
1129static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1130{
1131 mdp_disk_t *desc;
1132 mdp_super_t *sb = page_address(rdev->sb_page);
1133 __u64 ev1 = md_event(sb);
1134
1135 rdev->raid_disk = -1;
1136 clear_bit(Faulty, &rdev->flags);
1137 clear_bit(In_sync, &rdev->flags);
1138 clear_bit(WriteMostly, &rdev->flags);
1139
1140 if (mddev->raid_disks == 0) {
1141 mddev->major_version = 0;
1142 mddev->minor_version = sb->minor_version;
1143 mddev->patch_version = sb->patch_version;
1144 mddev->external = 0;
1145 mddev->chunk_sectors = sb->chunk_size >> 9;
1146 mddev->ctime = sb->ctime;
1147 mddev->utime = sb->utime;
1148 mddev->level = sb->level;
1149 mddev->clevel[0] = 0;
1150 mddev->layout = sb->layout;
1151 mddev->raid_disks = sb->raid_disks;
1152 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1153 mddev->events = ev1;
1154 mddev->bitmap_info.offset = 0;
1155 mddev->bitmap_info.space = 0;
1156
1157 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1158 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1159 mddev->reshape_backwards = 0;
1160
1161 if (mddev->minor_version >= 91) {
1162 mddev->reshape_position = sb->reshape_position;
1163 mddev->delta_disks = sb->delta_disks;
1164 mddev->new_level = sb->new_level;
1165 mddev->new_layout = sb->new_layout;
1166 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1167 if (mddev->delta_disks < 0)
1168 mddev->reshape_backwards = 1;
1169 } else {
1170 mddev->reshape_position = MaxSector;
1171 mddev->delta_disks = 0;
1172 mddev->new_level = mddev->level;
1173 mddev->new_layout = mddev->layout;
1174 mddev->new_chunk_sectors = mddev->chunk_sectors;
1175 }
1176
1177 if (sb->state & (1<<MD_SB_CLEAN))
1178 mddev->recovery_cp = MaxSector;
1179 else {
1180 if (sb->events_hi == sb->cp_events_hi &&
1181 sb->events_lo == sb->cp_events_lo) {
1182 mddev->recovery_cp = sb->recovery_cp;
1183 } else
1184 mddev->recovery_cp = 0;
1185 }
1186
1187 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1188 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1189 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1190 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1191
1192 mddev->max_disks = MD_SB_DISKS;
1193
1194 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1195 mddev->bitmap_info.file == NULL) {
1196 mddev->bitmap_info.offset =
1197 mddev->bitmap_info.default_offset;
1198 mddev->bitmap_info.space =
1199 mddev->bitmap_info.space;
1200 }
1201
1202 } else if (mddev->pers == NULL) {
1203
1204
1205 ++ev1;
1206 if (sb->disks[rdev->desc_nr].state & (
1207 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1208 if (ev1 < mddev->events)
1209 return -EINVAL;
1210 } else if (mddev->bitmap) {
1211
1212
1213
1214 if (ev1 < mddev->bitmap->events_cleared)
1215 return 0;
1216 } else {
1217 if (ev1 < mddev->events)
1218
1219 return 0;
1220 }
1221
1222 if (mddev->level != LEVEL_MULTIPATH) {
1223 desc = sb->disks + rdev->desc_nr;
1224
1225 if (desc->state & (1<<MD_DISK_FAULTY))
1226 set_bit(Faulty, &rdev->flags);
1227 else if (desc->state & (1<<MD_DISK_SYNC)
1228) {
1229 set_bit(In_sync, &rdev->flags);
1230 rdev->raid_disk = desc->raid_disk;
1231 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1232
1233
1234
1235 if (mddev->minor_version >= 91) {
1236 rdev->recovery_offset = 0;
1237 rdev->raid_disk = desc->raid_disk;
1238 }
1239 }
1240 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1241 set_bit(WriteMostly, &rdev->flags);
1242 } else
1243 set_bit(In_sync, &rdev->flags);
1244 return 0;
1245}
1246
1247
1248
1249
1250static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1251{
1252 mdp_super_t *sb;
1253 struct md_rdev *rdev2;
1254 int next_spare = mddev->raid_disks;
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267 int i;
1268 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1269
1270 rdev->sb_size = MD_SB_BYTES;
1271
1272 sb = page_address(rdev->sb_page);
1273
1274 memset(sb, 0, sizeof(*sb));
1275
1276 sb->md_magic = MD_SB_MAGIC;
1277 sb->major_version = mddev->major_version;
1278 sb->patch_version = mddev->patch_version;
1279 sb->gvalid_words = 0;
1280 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1281 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1282 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1283 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1284
1285 sb->ctime = mddev->ctime;
1286 sb->level = mddev->level;
1287 sb->size = mddev->dev_sectors / 2;
1288 sb->raid_disks = mddev->raid_disks;
1289 sb->md_minor = mddev->md_minor;
1290 sb->not_persistent = 0;
1291 sb->utime = mddev->utime;
1292 sb->state = 0;
1293 sb->events_hi = (mddev->events>>32);
1294 sb->events_lo = (u32)mddev->events;
1295
1296 if (mddev->reshape_position == MaxSector)
1297 sb->minor_version = 90;
1298 else {
1299 sb->minor_version = 91;
1300 sb->reshape_position = mddev->reshape_position;
1301 sb->new_level = mddev->new_level;
1302 sb->delta_disks = mddev->delta_disks;
1303 sb->new_layout = mddev->new_layout;
1304 sb->new_chunk = mddev->new_chunk_sectors << 9;
1305 }
1306 mddev->minor_version = sb->minor_version;
1307 if (mddev->in_sync)
1308 {
1309 sb->recovery_cp = mddev->recovery_cp;
1310 sb->cp_events_hi = (mddev->events>>32);
1311 sb->cp_events_lo = (u32)mddev->events;
1312 if (mddev->recovery_cp == MaxSector)
1313 sb->state = (1<< MD_SB_CLEAN);
1314 } else
1315 sb->recovery_cp = 0;
1316
1317 sb->layout = mddev->layout;
1318 sb->chunk_size = mddev->chunk_sectors << 9;
1319
1320 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1321 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1322
1323 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1324 rdev_for_each(rdev2, mddev) {
1325 mdp_disk_t *d;
1326 int desc_nr;
1327 int is_active = test_bit(In_sync, &rdev2->flags);
1328
1329 if (rdev2->raid_disk >= 0 &&
1330 sb->minor_version >= 91)
1331
1332
1333
1334
1335 is_active = 1;
1336 if (rdev2->raid_disk < 0 ||
1337 test_bit(Faulty, &rdev2->flags))
1338 is_active = 0;
1339 if (is_active)
1340 desc_nr = rdev2->raid_disk;
1341 else
1342 desc_nr = next_spare++;
1343 rdev2->desc_nr = desc_nr;
1344 d = &sb->disks[rdev2->desc_nr];
1345 nr_disks++;
1346 d->number = rdev2->desc_nr;
1347 d->major = MAJOR(rdev2->bdev->bd_dev);
1348 d->minor = MINOR(rdev2->bdev->bd_dev);
1349 if (is_active)
1350 d->raid_disk = rdev2->raid_disk;
1351 else
1352 d->raid_disk = rdev2->desc_nr;
1353 if (test_bit(Faulty, &rdev2->flags))
1354 d->state = (1<<MD_DISK_FAULTY);
1355 else if (is_active) {
1356 d->state = (1<<MD_DISK_ACTIVE);
1357 if (test_bit(In_sync, &rdev2->flags))
1358 d->state |= (1<<MD_DISK_SYNC);
1359 active++;
1360 working++;
1361 } else {
1362 d->state = 0;
1363 spare++;
1364 working++;
1365 }
1366 if (test_bit(WriteMostly, &rdev2->flags))
1367 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1368 }
1369
1370 for (i=0 ; i < mddev->raid_disks ; i++) {
1371 mdp_disk_t *d = &sb->disks[i];
1372 if (d->state == 0 && d->number == 0) {
1373 d->number = i;
1374 d->raid_disk = i;
1375 d->state = (1<<MD_DISK_REMOVED);
1376 d->state |= (1<<MD_DISK_FAULTY);
1377 failed++;
1378 }
1379 }
1380 sb->nr_disks = nr_disks;
1381 sb->active_disks = active;
1382 sb->working_disks = working;
1383 sb->failed_disks = failed;
1384 sb->spare_disks = spare;
1385
1386 sb->this_disk = sb->disks[rdev->desc_nr];
1387 sb->sb_csum = calc_sb_csum(sb);
1388}
1389
1390
1391
1392
1393static unsigned long long
1394super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1395{
1396 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1397 return 0;
1398 if (rdev->mddev->bitmap_info.offset)
1399 return 0;
1400 rdev->sb_start = calc_dev_sboffset(rdev);
1401 if (!num_sectors || num_sectors > rdev->sb_start)
1402 num_sectors = rdev->sb_start;
1403
1404
1405
1406 if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1407 num_sectors = (2ULL << 32) - 2;
1408 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1409 rdev->sb_page);
1410 md_super_wait(rdev->mddev);
1411 return num_sectors;
1412}
1413
1414static int
1415super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1416{
1417
1418 return new_offset == 0;
1419}
1420
1421
1422
1423
1424
1425static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1426{
1427 __le32 disk_csum;
1428 u32 csum;
1429 unsigned long long newcsum;
1430 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1431 __le32 *isuper = (__le32*)sb;
1432 int i;
1433
1434 disk_csum = sb->sb_csum;
1435 sb->sb_csum = 0;
1436 newcsum = 0;
1437 for (i=0; size>=4; size -= 4 )
1438 newcsum += le32_to_cpu(*isuper++);
1439
1440 if (size == 2)
1441 newcsum += le16_to_cpu(*(__le16*) isuper);
1442
1443 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1444 sb->sb_csum = disk_csum;
1445 return cpu_to_le32(csum);
1446}
1447
1448static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
1449 int acknowledged);
1450static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1451{
1452 struct mdp_superblock_1 *sb;
1453 int ret;
1454 sector_t sb_start;
1455 sector_t sectors;
1456 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1457 int bmask;
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467 switch(minor_version) {
1468 case 0:
1469 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1470 sb_start -= 8*2;
1471 sb_start &= ~(sector_t)(4*2-1);
1472 break;
1473 case 1:
1474 sb_start = 0;
1475 break;
1476 case 2:
1477 sb_start = 8;
1478 break;
1479 default:
1480 return -EINVAL;
1481 }
1482 rdev->sb_start = sb_start;
1483
1484
1485
1486
1487 ret = read_disk_sb(rdev, 4096);
1488 if (ret) return ret;
1489
1490
1491 sb = page_address(rdev->sb_page);
1492
1493 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1494 sb->major_version != cpu_to_le32(1) ||
1495 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1496 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1497 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1498 return -EINVAL;
1499
1500 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1501 printk("md: invalid superblock checksum on %s\n",
1502 bdevname(rdev->bdev,b));
1503 return -EINVAL;
1504 }
1505 if (le64_to_cpu(sb->data_size) < 10) {
1506 printk("md: data_size too small on %s\n",
1507 bdevname(rdev->bdev,b));
1508 return -EINVAL;
1509 }
1510 if (sb->pad0 ||
1511 sb->pad3[0] ||
1512 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1513
1514 return -EINVAL;
1515
1516 rdev->preferred_minor = 0xffff;
1517 rdev->data_offset = le64_to_cpu(sb->data_offset);
1518 rdev->new_data_offset = rdev->data_offset;
1519 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1520 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1521 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1522 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1523
1524 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1525 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1526 if (rdev->sb_size & bmask)
1527 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1528
1529 if (minor_version
1530 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1531 return -EINVAL;
1532 if (minor_version
1533 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1534 return -EINVAL;
1535
1536 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1537 rdev->desc_nr = -1;
1538 else
1539 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1540
1541 if (!rdev->bb_page) {
1542 rdev->bb_page = alloc_page(GFP_KERNEL);
1543 if (!rdev->bb_page)
1544 return -ENOMEM;
1545 }
1546 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1547 rdev->badblocks.count == 0) {
1548
1549
1550
1551 s32 offset;
1552 sector_t bb_sector;
1553 u64 *bbp;
1554 int i;
1555 int sectors = le16_to_cpu(sb->bblog_size);
1556 if (sectors > (PAGE_SIZE / 512))
1557 return -EINVAL;
1558 offset = le32_to_cpu(sb->bblog_offset);
1559 if (offset == 0)
1560 return -EINVAL;
1561 bb_sector = (long long)offset;
1562 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1563 rdev->bb_page, READ, true))
1564 return -EIO;
1565 bbp = (u64 *)page_address(rdev->bb_page);
1566 rdev->badblocks.shift = sb->bblog_shift;
1567 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1568 u64 bb = le64_to_cpu(*bbp);
1569 int count = bb & (0x3ff);
1570 u64 sector = bb >> 10;
1571 sector <<= sb->bblog_shift;
1572 count <<= sb->bblog_shift;
1573 if (bb + 1 == 0)
1574 break;
1575 if (md_set_badblocks(&rdev->badblocks,
1576 sector, count, 1) == 0)
1577 return -EINVAL;
1578 }
1579 } else if (sb->bblog_offset == 0)
1580 rdev->badblocks.shift = -1;
1581
1582 if (!refdev) {
1583 ret = 1;
1584 } else {
1585 __u64 ev1, ev2;
1586 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1587
1588 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1589 sb->level != refsb->level ||
1590 sb->layout != refsb->layout ||
1591 sb->chunksize != refsb->chunksize) {
1592 printk(KERN_WARNING "md: %s has strangely different"
1593 " superblock to %s\n",
1594 bdevname(rdev->bdev,b),
1595 bdevname(refdev->bdev,b2));
1596 return -EINVAL;
1597 }
1598 ev1 = le64_to_cpu(sb->events);
1599 ev2 = le64_to_cpu(refsb->events);
1600
1601 if (ev1 > ev2)
1602 ret = 1;
1603 else
1604 ret = 0;
1605 }
1606 if (minor_version) {
1607 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1608 sectors -= rdev->data_offset;
1609 } else
1610 sectors = rdev->sb_start;
1611 if (sectors < le64_to_cpu(sb->data_size))
1612 return -EINVAL;
1613 rdev->sectors = le64_to_cpu(sb->data_size);
1614 return ret;
1615}
1616
1617static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1618{
1619 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1620 __u64 ev1 = le64_to_cpu(sb->events);
1621
1622 rdev->raid_disk = -1;
1623 clear_bit(Faulty, &rdev->flags);
1624 clear_bit(In_sync, &rdev->flags);
1625 clear_bit(WriteMostly, &rdev->flags);
1626
1627 if (mddev->raid_disks == 0) {
1628 mddev->major_version = 1;
1629 mddev->patch_version = 0;
1630 mddev->external = 0;
1631 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1632 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1633 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1634 mddev->level = le32_to_cpu(sb->level);
1635 mddev->clevel[0] = 0;
1636 mddev->layout = le32_to_cpu(sb->layout);
1637 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1638 mddev->dev_sectors = le64_to_cpu(sb->size);
1639 mddev->events = ev1;
1640 mddev->bitmap_info.offset = 0;
1641 mddev->bitmap_info.space = 0;
1642
1643
1644
1645 mddev->bitmap_info.default_offset = 1024 >> 9;
1646 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1647 mddev->reshape_backwards = 0;
1648
1649 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1650 memcpy(mddev->uuid, sb->set_uuid, 16);
1651
1652 mddev->max_disks = (4096-256)/2;
1653
1654 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1655 mddev->bitmap_info.file == NULL) {
1656 mddev->bitmap_info.offset =
1657 (__s32)le32_to_cpu(sb->bitmap_offset);
1658
1659
1660
1661
1662
1663 if (mddev->minor_version > 0)
1664 mddev->bitmap_info.space = 0;
1665 else if (mddev->bitmap_info.offset > 0)
1666 mddev->bitmap_info.space =
1667 8 - mddev->bitmap_info.offset;
1668 else
1669 mddev->bitmap_info.space =
1670 -mddev->bitmap_info.offset;
1671 }
1672
1673 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1674 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1675 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1676 mddev->new_level = le32_to_cpu(sb->new_level);
1677 mddev->new_layout = le32_to_cpu(sb->new_layout);
1678 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1679 if (mddev->delta_disks < 0 ||
1680 (mddev->delta_disks == 0 &&
1681 (le32_to_cpu(sb->feature_map)
1682 & MD_FEATURE_RESHAPE_BACKWARDS)))
1683 mddev->reshape_backwards = 1;
1684 } else {
1685 mddev->reshape_position = MaxSector;
1686 mddev->delta_disks = 0;
1687 mddev->new_level = mddev->level;
1688 mddev->new_layout = mddev->layout;
1689 mddev->new_chunk_sectors = mddev->chunk_sectors;
1690 }
1691
1692 } else if (mddev->pers == NULL) {
1693
1694
1695 ++ev1;
1696 if (rdev->desc_nr >= 0 &&
1697 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1698 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < 0xfffe)
1699 if (ev1 < mddev->events)
1700 return -EINVAL;
1701 } else if (mddev->bitmap) {
1702
1703
1704
1705 if (ev1 < mddev->bitmap->events_cleared)
1706 return 0;
1707 } else {
1708 if (ev1 < mddev->events)
1709
1710 return 0;
1711 }
1712 if (mddev->level != LEVEL_MULTIPATH) {
1713 int role;
1714 if (rdev->desc_nr < 0 ||
1715 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1716 role = 0xffff;
1717 rdev->desc_nr = -1;
1718 } else
1719 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1720 switch(role) {
1721 case 0xffff:
1722 break;
1723 case 0xfffe:
1724 set_bit(Faulty, &rdev->flags);
1725 break;
1726 default:
1727 if ((le32_to_cpu(sb->feature_map) &
1728 MD_FEATURE_RECOVERY_OFFSET))
1729 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1730 else
1731 set_bit(In_sync, &rdev->flags);
1732 rdev->raid_disk = role;
1733 break;
1734 }
1735 if (sb->devflags & WriteMostly1)
1736 set_bit(WriteMostly, &rdev->flags);
1737 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1738 set_bit(Replacement, &rdev->flags);
1739 } else
1740 set_bit(In_sync, &rdev->flags);
1741
1742 return 0;
1743}
1744
1745static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1746{
1747 struct mdp_superblock_1 *sb;
1748 struct md_rdev *rdev2;
1749 int max_dev, i;
1750
1751
1752 sb = page_address(rdev->sb_page);
1753
1754 sb->feature_map = 0;
1755 sb->pad0 = 0;
1756 sb->recovery_offset = cpu_to_le64(0);
1757 memset(sb->pad3, 0, sizeof(sb->pad3));
1758
1759 sb->utime = cpu_to_le64((__u64)mddev->utime);
1760 sb->events = cpu_to_le64(mddev->events);
1761 if (mddev->in_sync)
1762 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1763 else
1764 sb->resync_offset = cpu_to_le64(0);
1765
1766 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1767
1768 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1769 sb->size = cpu_to_le64(mddev->dev_sectors);
1770 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1771 sb->level = cpu_to_le32(mddev->level);
1772 sb->layout = cpu_to_le32(mddev->layout);
1773
1774 if (test_bit(WriteMostly, &rdev->flags))
1775 sb->devflags |= WriteMostly1;
1776 else
1777 sb->devflags &= ~WriteMostly1;
1778 sb->data_offset = cpu_to_le64(rdev->data_offset);
1779 sb->data_size = cpu_to_le64(rdev->sectors);
1780
1781 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1782 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1783 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1784 }
1785
1786 if (rdev->raid_disk >= 0 &&
1787 !test_bit(In_sync, &rdev->flags)) {
1788 sb->feature_map |=
1789 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1790 sb->recovery_offset =
1791 cpu_to_le64(rdev->recovery_offset);
1792 }
1793 if (test_bit(Replacement, &rdev->flags))
1794 sb->feature_map |=
1795 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1796
1797 if (mddev->reshape_position != MaxSector) {
1798 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1799 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1800 sb->new_layout = cpu_to_le32(mddev->new_layout);
1801 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1802 sb->new_level = cpu_to_le32(mddev->new_level);
1803 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1804 if (mddev->delta_disks == 0 &&
1805 mddev->reshape_backwards)
1806 sb->feature_map
1807 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1808 if (rdev->new_data_offset != rdev->data_offset) {
1809 sb->feature_map
1810 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1811 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1812 - rdev->data_offset));
1813 }
1814 }
1815
1816 if (rdev->badblocks.count == 0)
1817 ;
1818 else if (sb->bblog_offset == 0)
1819
1820 md_error(mddev, rdev);
1821 else {
1822 struct badblocks *bb = &rdev->badblocks;
1823 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1824 u64 *p = bb->page;
1825 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1826 if (bb->changed) {
1827 unsigned seq;
1828
1829retry:
1830 seq = read_seqbegin(&bb->lock);
1831
1832 memset(bbp, 0xff, PAGE_SIZE);
1833
1834 for (i = 0 ; i < bb->count ; i++) {
1835 u64 internal_bb = *p++;
1836 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1837 | BB_LEN(internal_bb));
1838 *bbp++ = cpu_to_le64(store_bb);
1839 }
1840 bb->changed = 0;
1841 if (read_seqretry(&bb->lock, seq))
1842 goto retry;
1843
1844 bb->sector = (rdev->sb_start +
1845 (int)le32_to_cpu(sb->bblog_offset));
1846 bb->size = le16_to_cpu(sb->bblog_size);
1847 }
1848 }
1849
1850 max_dev = 0;
1851 rdev_for_each(rdev2, mddev)
1852 if (rdev2->desc_nr+1 > max_dev)
1853 max_dev = rdev2->desc_nr+1;
1854
1855 if (max_dev > le32_to_cpu(sb->max_dev)) {
1856 int bmask;
1857 sb->max_dev = cpu_to_le32(max_dev);
1858 rdev->sb_size = max_dev * 2 + 256;
1859 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1860 if (rdev->sb_size & bmask)
1861 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1862 } else
1863 max_dev = le32_to_cpu(sb->max_dev);
1864
1865 for (i=0; i<max_dev;i++)
1866 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1867
1868 rdev_for_each(rdev2, mddev) {
1869 i = rdev2->desc_nr;
1870 if (test_bit(Faulty, &rdev2->flags))
1871 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1872 else if (test_bit(In_sync, &rdev2->flags))
1873 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1874 else if (rdev2->raid_disk >= 0)
1875 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1876 else
1877 sb->dev_roles[i] = cpu_to_le16(0xffff);
1878 }
1879
1880 sb->sb_csum = calc_sb_1_csum(sb);
1881}
1882
1883static unsigned long long
1884super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1885{
1886 struct mdp_superblock_1 *sb;
1887 sector_t max_sectors;
1888 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1889 return 0;
1890 if (rdev->data_offset != rdev->new_data_offset)
1891 return 0;
1892 if (rdev->sb_start < rdev->data_offset) {
1893
1894 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1895 max_sectors -= rdev->data_offset;
1896 if (!num_sectors || num_sectors > max_sectors)
1897 num_sectors = max_sectors;
1898 } else if (rdev->mddev->bitmap_info.offset) {
1899
1900 return 0;
1901 } else {
1902
1903 sector_t sb_start;
1904 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1905 sb_start &= ~(sector_t)(4*2 - 1);
1906 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1907 if (!num_sectors || num_sectors > max_sectors)
1908 num_sectors = max_sectors;
1909 rdev->sb_start = sb_start;
1910 }
1911 sb = page_address(rdev->sb_page);
1912 sb->data_size = cpu_to_le64(num_sectors);
1913 sb->super_offset = rdev->sb_start;
1914 sb->sb_csum = calc_sb_1_csum(sb);
1915 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1916 rdev->sb_page);
1917 md_super_wait(rdev->mddev);
1918 return num_sectors;
1919
1920}
1921
1922static int
1923super_1_allow_new_offset(struct md_rdev *rdev,
1924 unsigned long long new_offset)
1925{
1926
1927 struct bitmap *bitmap;
1928 if (new_offset >= rdev->data_offset)
1929 return 1;
1930
1931
1932
1933 if (rdev->mddev->minor_version == 0)
1934 return 1;
1935
1936
1937
1938
1939
1940
1941
1942 if (rdev->sb_start + (32+4)*2 > new_offset)
1943 return 0;
1944 bitmap = rdev->mddev->bitmap;
1945 if (bitmap && !rdev->mddev->bitmap_info.file &&
1946 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1947 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1948 return 0;
1949 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1950 return 0;
1951
1952 return 1;
1953}
1954
1955static struct super_type super_types[] = {
1956 [0] = {
1957 .name = "0.90.0",
1958 .owner = THIS_MODULE,
1959 .load_super = super_90_load,
1960 .validate_super = super_90_validate,
1961 .sync_super = super_90_sync,
1962 .rdev_size_change = super_90_rdev_size_change,
1963 .allow_new_offset = super_90_allow_new_offset,
1964 },
1965 [1] = {
1966 .name = "md-1",
1967 .owner = THIS_MODULE,
1968 .load_super = super_1_load,
1969 .validate_super = super_1_validate,
1970 .sync_super = super_1_sync,
1971 .rdev_size_change = super_1_rdev_size_change,
1972 .allow_new_offset = super_1_allow_new_offset,
1973 },
1974};
1975
1976static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1977{
1978 if (mddev->sync_super) {
1979 mddev->sync_super(mddev, rdev);
1980 return;
1981 }
1982
1983 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1984
1985 super_types[mddev->major_version].sync_super(mddev, rdev);
1986}
1987
1988static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1989{
1990 struct md_rdev *rdev, *rdev2;
1991
1992 rcu_read_lock();
1993 rdev_for_each_rcu(rdev, mddev1)
1994 rdev_for_each_rcu(rdev2, mddev2)
1995 if (rdev->bdev->bd_contains ==
1996 rdev2->bdev->bd_contains) {
1997 rcu_read_unlock();
1998 return 1;
1999 }
2000 rcu_read_unlock();
2001 return 0;
2002}
2003
2004static LIST_HEAD(pending_raid_disks);
2005
2006
2007
2008
2009
2010
2011
2012
2013int md_integrity_register(struct mddev *mddev)
2014{
2015 struct md_rdev *rdev, *reference = NULL;
2016
2017 if (list_empty(&mddev->disks))
2018 return 0;
2019 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2020 return 0;
2021 rdev_for_each(rdev, mddev) {
2022
2023 if (test_bit(Faulty, &rdev->flags))
2024 continue;
2025 if (rdev->raid_disk < 0)
2026 continue;
2027 if (!reference) {
2028
2029 reference = rdev;
2030 continue;
2031 }
2032
2033 if (blk_integrity_compare(reference->bdev->bd_disk,
2034 rdev->bdev->bd_disk) < 0)
2035 return -EINVAL;
2036 }
2037 if (!reference || !bdev_get_integrity(reference->bdev))
2038 return 0;
2039
2040
2041
2042
2043 if (blk_integrity_register(mddev->gendisk,
2044 bdev_get_integrity(reference->bdev)) != 0) {
2045 printk(KERN_ERR "md: failed to register integrity for %s\n",
2046 mdname(mddev));
2047 return -EINVAL;
2048 }
2049 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
2050 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2051 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
2052 mdname(mddev));
2053 return -EINVAL;
2054 }
2055 return 0;
2056}
2057EXPORT_SYMBOL(md_integrity_register);
2058
2059
2060void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2061{
2062 struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
2063 struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
2064
2065 if (!bi_mddev)
2066 return;
2067 if (rdev->raid_disk < 0)
2068 return;
2069 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
2070 rdev->bdev->bd_disk) >= 0)
2071 return;
2072 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
2073 blk_integrity_unregister(mddev->gendisk);
2074}
2075EXPORT_SYMBOL(md_integrity_add_rdev);
2076
2077static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev)
2078{
2079 char b[BDEVNAME_SIZE];
2080 struct kobject *ko;
2081 char *s;
2082 int err;
2083
2084 if (rdev->mddev) {
2085 MD_BUG();
2086 return -EINVAL;
2087 }
2088
2089
2090 if (find_rdev(mddev, rdev->bdev->bd_dev))
2091 return -EEXIST;
2092
2093
2094 if (rdev->sectors && (mddev->dev_sectors == 0 ||
2095 rdev->sectors < mddev->dev_sectors)) {
2096 if (mddev->pers) {
2097
2098
2099
2100
2101 if (mddev->level > 0)
2102 return -ENOSPC;
2103 } else
2104 mddev->dev_sectors = rdev->sectors;
2105 }
2106
2107
2108
2109
2110
2111 if (rdev->desc_nr < 0) {
2112 int choice = 0;
2113 if (mddev->pers) choice = mddev->raid_disks;
2114 while (find_rdev_nr(mddev, choice))
2115 choice++;
2116 rdev->desc_nr = choice;
2117 } else {
2118 if (find_rdev_nr(mddev, rdev->desc_nr))
2119 return -EBUSY;
2120 }
2121 if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2122 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2123 mdname(mddev), mddev->max_disks);
2124 return -EBUSY;
2125 }
2126 bdevname(rdev->bdev,b);
2127 while ( (s=strchr(b, '/')) != NULL)
2128 *s = '!';
2129
2130 rdev->mddev = mddev;
2131 printk(KERN_INFO "md: bind<%s>\n", b);
2132
2133 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2134 goto fail;
2135
2136 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2137 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2138 ;
2139 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2140
2141 list_add_rcu(&rdev->same_set, &mddev->disks);
2142 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2143
2144
2145 mddev->recovery_disabled++;
2146
2147 return 0;
2148
2149 fail:
2150 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2151 b, mdname(mddev));
2152 return err;
2153}
2154
2155static void md_delayed_delete(struct work_struct *ws)
2156{
2157 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2158 kobject_del(&rdev->kobj);
2159 kobject_put(&rdev->kobj);
2160}
2161
2162static void unbind_rdev_from_array(struct md_rdev * rdev)
2163{
2164 char b[BDEVNAME_SIZE];
2165 if (!rdev->mddev) {
2166 MD_BUG();
2167 return;
2168 }
2169 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2170 list_del_rcu(&rdev->same_set);
2171 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2172 rdev->mddev = NULL;
2173 sysfs_remove_link(&rdev->kobj, "block");
2174 sysfs_put(rdev->sysfs_state);
2175 rdev->sysfs_state = NULL;
2176 rdev->badblocks.count = 0;
2177
2178
2179
2180
2181 synchronize_rcu();
2182 INIT_WORK(&rdev->del_work, md_delayed_delete);
2183 kobject_get(&rdev->kobj);
2184 queue_work(md_misc_wq, &rdev->del_work);
2185}
2186
2187
2188
2189
2190
2191
2192static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2193{
2194 int err = 0;
2195 struct block_device *bdev;
2196 char b[BDEVNAME_SIZE];
2197
2198 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2199 shared ? (struct md_rdev *)lock_rdev : rdev);
2200 if (IS_ERR(bdev)) {
2201 printk(KERN_ERR "md: could not open %s.\n",
2202 __bdevname(dev, b));
2203 return PTR_ERR(bdev);
2204 }
2205 rdev->bdev = bdev;
2206 return err;
2207}
2208
2209static void unlock_rdev(struct md_rdev *rdev)
2210{
2211 struct block_device *bdev = rdev->bdev;
2212 rdev->bdev = NULL;
2213 if (!bdev)
2214 MD_BUG();
2215 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2216}
2217
2218void md_autodetect_dev(dev_t dev);
2219
2220static void export_rdev(struct md_rdev * rdev)
2221{
2222 char b[BDEVNAME_SIZE];
2223 printk(KERN_INFO "md: export_rdev(%s)\n",
2224 bdevname(rdev->bdev,b));
2225 if (rdev->mddev)
2226 MD_BUG();
2227 md_rdev_clear(rdev);
2228#ifndef MODULE
2229 if (test_bit(AutoDetected, &rdev->flags))
2230 md_autodetect_dev(rdev->bdev->bd_dev);
2231#endif
2232 unlock_rdev(rdev);
2233 kobject_put(&rdev->kobj);
2234}
2235
2236static void kick_rdev_from_array(struct md_rdev * rdev)
2237{
2238 unbind_rdev_from_array(rdev);
2239 export_rdev(rdev);
2240}
2241
2242static void export_array(struct mddev *mddev)
2243{
2244 struct md_rdev *rdev, *tmp;
2245
2246 rdev_for_each_safe(rdev, tmp, mddev) {
2247 if (!rdev->mddev) {
2248 MD_BUG();
2249 continue;
2250 }
2251 kick_rdev_from_array(rdev);
2252 }
2253 if (!list_empty(&mddev->disks))
2254 MD_BUG();
2255 mddev->raid_disks = 0;
2256 mddev->major_version = 0;
2257}
2258
2259static void print_desc(mdp_disk_t *desc)
2260{
2261 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
2262 desc->major,desc->minor,desc->raid_disk,desc->state);
2263}
2264
2265static void print_sb_90(mdp_super_t *sb)
2266{
2267 int i;
2268
2269 printk(KERN_INFO
2270 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
2271 sb->major_version, sb->minor_version, sb->patch_version,
2272 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
2273 sb->ctime);
2274 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
2275 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
2276 sb->md_minor, sb->layout, sb->chunk_size);
2277 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
2278 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
2279 sb->utime, sb->state, sb->active_disks, sb->working_disks,
2280 sb->failed_disks, sb->spare_disks,
2281 sb->sb_csum, (unsigned long)sb->events_lo);
2282
2283 printk(KERN_INFO);
2284 for (i = 0; i < MD_SB_DISKS; i++) {
2285 mdp_disk_t *desc;
2286
2287 desc = sb->disks + i;
2288 if (desc->number || desc->major || desc->minor ||
2289 desc->raid_disk || (desc->state && (desc->state != 4))) {
2290 printk(" D %2d: ", i);
2291 print_desc(desc);
2292 }
2293 }
2294 printk(KERN_INFO "md: THIS: ");
2295 print_desc(&sb->this_disk);
2296}
2297
2298static void print_sb_1(struct mdp_superblock_1 *sb)
2299{
2300 __u8 *uuid;
2301
2302 uuid = sb->set_uuid;
2303 printk(KERN_INFO
2304 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
2305 "md: Name: \"%s\" CT:%llu\n",
2306 le32_to_cpu(sb->major_version),
2307 le32_to_cpu(sb->feature_map),
2308 uuid,
2309 sb->set_name,
2310 (unsigned long long)le64_to_cpu(sb->ctime)
2311 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
2312
2313 uuid = sb->device_uuid;
2314 printk(KERN_INFO
2315 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
2316 " RO:%llu\n"
2317 "md: Dev:%08x UUID: %pU\n"
2318 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
2319 "md: (MaxDev:%u) \n",
2320 le32_to_cpu(sb->level),
2321 (unsigned long long)le64_to_cpu(sb->size),
2322 le32_to_cpu(sb->raid_disks),
2323 le32_to_cpu(sb->layout),
2324 le32_to_cpu(sb->chunksize),
2325 (unsigned long long)le64_to_cpu(sb->data_offset),
2326 (unsigned long long)le64_to_cpu(sb->data_size),
2327 (unsigned long long)le64_to_cpu(sb->super_offset),
2328 (unsigned long long)le64_to_cpu(sb->recovery_offset),
2329 le32_to_cpu(sb->dev_number),
2330 uuid,
2331 sb->devflags,
2332 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
2333 (unsigned long long)le64_to_cpu(sb->events),
2334 (unsigned long long)le64_to_cpu(sb->resync_offset),
2335 le32_to_cpu(sb->sb_csum),
2336 le32_to_cpu(sb->max_dev)
2337 );
2338}
2339
2340static void print_rdev(struct md_rdev *rdev, int major_version)
2341{
2342 char b[BDEVNAME_SIZE];
2343 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
2344 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
2345 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
2346 rdev->desc_nr);
2347 if (rdev->sb_loaded) {
2348 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
2349 switch (major_version) {
2350 case 0:
2351 print_sb_90(page_address(rdev->sb_page));
2352 break;
2353 case 1:
2354 print_sb_1(page_address(rdev->sb_page));
2355 break;
2356 }
2357 } else
2358 printk(KERN_INFO "md: no rdev superblock!\n");
2359}
2360
2361static void md_print_devices(void)
2362{
2363 struct list_head *tmp;
2364 struct md_rdev *rdev;
2365 struct mddev *mddev;
2366 char b[BDEVNAME_SIZE];
2367
2368 printk("\n");
2369 printk("md: **********************************\n");
2370 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
2371 printk("md: **********************************\n");
2372 for_each_mddev(mddev, tmp) {
2373
2374 if (mddev->bitmap)
2375 bitmap_print_sb(mddev->bitmap);
2376 else
2377 printk("%s: ", mdname(mddev));
2378 rdev_for_each(rdev, mddev)
2379 printk("<%s>", bdevname(rdev->bdev,b));
2380 printk("\n");
2381
2382 rdev_for_each(rdev, mddev)
2383 print_rdev(rdev, mddev->major_version);
2384 }
2385 printk("md: **********************************\n");
2386 printk("\n");
2387}
2388
2389
2390static void sync_sbs(struct mddev * mddev, int nospares)
2391{
2392
2393
2394
2395
2396
2397
2398 struct md_rdev *rdev;
2399 rdev_for_each(rdev, mddev) {
2400 if (rdev->sb_events == mddev->events ||
2401 (nospares &&
2402 rdev->raid_disk < 0 &&
2403 rdev->sb_events+1 == mddev->events)) {
2404
2405 rdev->sb_loaded = 2;
2406 } else {
2407 sync_super(mddev, rdev);
2408 rdev->sb_loaded = 1;
2409 }
2410 }
2411}
2412
2413static void md_update_sb(struct mddev * mddev, int force_change)
2414{
2415 struct md_rdev *rdev;
2416 int sync_req;
2417 int nospares = 0;
2418 int any_badblocks_changed = 0;
2419
2420repeat:
2421
2422 rdev_for_each(rdev, mddev) {
2423 if (rdev->raid_disk >= 0 &&
2424 mddev->delta_disks >= 0 &&
2425 !test_bit(In_sync, &rdev->flags) &&
2426 mddev->curr_resync_completed > rdev->recovery_offset)
2427 rdev->recovery_offset = mddev->curr_resync_completed;
2428
2429 }
2430 if (!mddev->persistent) {
2431 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2432 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2433 if (!mddev->external) {
2434 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2435 rdev_for_each(rdev, mddev) {
2436 if (rdev->badblocks.changed) {
2437 rdev->badblocks.changed = 0;
2438 md_ack_all_badblocks(&rdev->badblocks);
2439 md_error(mddev, rdev);
2440 }
2441 clear_bit(Blocked, &rdev->flags);
2442 clear_bit(BlockedBadBlocks, &rdev->flags);
2443 wake_up(&rdev->blocked_wait);
2444 }
2445 }
2446 wake_up(&mddev->sb_wait);
2447 return;
2448 }
2449
2450 spin_lock_irq(&mddev->write_lock);
2451
2452 mddev->utime = get_seconds();
2453
2454 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2455 force_change = 1;
2456 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2457
2458
2459
2460
2461 nospares = 1;
2462 if (force_change)
2463 nospares = 0;
2464 if (mddev->degraded)
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474 nospares = 0;
2475
2476 sync_req = mddev->in_sync;
2477
2478
2479
2480 if (nospares
2481 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2482 && mddev->can_decrease_events
2483 && mddev->events != 1) {
2484 mddev->events--;
2485 mddev->can_decrease_events = 0;
2486 } else {
2487
2488 mddev->events ++;
2489 mddev->can_decrease_events = nospares;
2490 }
2491
2492 if (!mddev->events) {
2493
2494
2495
2496
2497
2498 MD_BUG();
2499 mddev->events --;
2500 }
2501
2502 rdev_for_each(rdev, mddev) {
2503 if (rdev->badblocks.changed)
2504 any_badblocks_changed++;
2505 if (test_bit(Faulty, &rdev->flags))
2506 set_bit(FaultRecorded, &rdev->flags);
2507 }
2508
2509 sync_sbs(mddev, nospares);
2510 spin_unlock_irq(&mddev->write_lock);
2511
2512 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2513 mdname(mddev), mddev->in_sync);
2514
2515 bitmap_update_sb(mddev->bitmap);
2516 rdev_for_each(rdev, mddev) {
2517 char b[BDEVNAME_SIZE];
2518
2519 if (rdev->sb_loaded != 1)
2520 continue;
2521
2522 if (!test_bit(Faulty, &rdev->flags) &&
2523 rdev->saved_raid_disk == -1) {
2524 md_super_write(mddev,rdev,
2525 rdev->sb_start, rdev->sb_size,
2526 rdev->sb_page);
2527 pr_debug("md: (write) %s's sb offset: %llu\n",
2528 bdevname(rdev->bdev, b),
2529 (unsigned long long)rdev->sb_start);
2530 rdev->sb_events = mddev->events;
2531 if (rdev->badblocks.size) {
2532 md_super_write(mddev, rdev,
2533 rdev->badblocks.sector,
2534 rdev->badblocks.size << 9,
2535 rdev->bb_page);
2536 rdev->badblocks.size = 0;
2537 }
2538
2539 } else if (test_bit(Faulty, &rdev->flags))
2540 pr_debug("md: %s (skipping faulty)\n",
2541 bdevname(rdev->bdev, b));
2542 else
2543 pr_debug("(skipping incremental s/r ");
2544
2545 if (mddev->level == LEVEL_MULTIPATH)
2546
2547 break;
2548 }
2549 md_super_wait(mddev);
2550
2551
2552 spin_lock_irq(&mddev->write_lock);
2553 if (mddev->in_sync != sync_req ||
2554 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2555
2556 spin_unlock_irq(&mddev->write_lock);
2557 goto repeat;
2558 }
2559 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2560 spin_unlock_irq(&mddev->write_lock);
2561 wake_up(&mddev->sb_wait);
2562 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2563 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2564
2565 rdev_for_each(rdev, mddev) {
2566 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2567 clear_bit(Blocked, &rdev->flags);
2568
2569 if (any_badblocks_changed)
2570 md_ack_all_badblocks(&rdev->badblocks);
2571 clear_bit(BlockedBadBlocks, &rdev->flags);
2572 wake_up(&rdev->blocked_wait);
2573 }
2574}
2575
2576
2577
2578
2579static int cmd_match(const char *cmd, const char *str)
2580{
2581
2582
2583
2584
2585 while (*cmd && *str && *cmd == *str) {
2586 cmd++;
2587 str++;
2588 }
2589 if (*cmd == '\n')
2590 cmd++;
2591 if (*str || *cmd)
2592 return 0;
2593 return 1;
2594}
2595
2596struct rdev_sysfs_entry {
2597 struct attribute attr;
2598 ssize_t (*show)(struct md_rdev *, char *);
2599 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2600};
2601
2602static ssize_t
2603state_show(struct md_rdev *rdev, char *page)
2604{
2605 char *sep = "";
2606 size_t len = 0;
2607
2608 if (test_bit(Faulty, &rdev->flags) ||
2609 rdev->badblocks.unacked_exist) {
2610 len+= sprintf(page+len, "%sfaulty",sep);
2611 sep = ",";
2612 }
2613 if (test_bit(In_sync, &rdev->flags)) {
2614 len += sprintf(page+len, "%sin_sync",sep);
2615 sep = ",";
2616 }
2617 if (test_bit(WriteMostly, &rdev->flags)) {
2618 len += sprintf(page+len, "%swrite_mostly",sep);
2619 sep = ",";
2620 }
2621 if (test_bit(Blocked, &rdev->flags) ||
2622 (rdev->badblocks.unacked_exist
2623 && !test_bit(Faulty, &rdev->flags))) {
2624 len += sprintf(page+len, "%sblocked", sep);
2625 sep = ",";
2626 }
2627 if (!test_bit(Faulty, &rdev->flags) &&
2628 !test_bit(In_sync, &rdev->flags)) {
2629 len += sprintf(page+len, "%sspare", sep);
2630 sep = ",";
2631 }
2632 if (test_bit(WriteErrorSeen, &rdev->flags)) {
2633 len += sprintf(page+len, "%swrite_error", sep);
2634 sep = ",";
2635 }
2636 if (test_bit(WantReplacement, &rdev->flags)) {
2637 len += sprintf(page+len, "%swant_replacement", sep);
2638 sep = ",";
2639 }
2640 if (test_bit(Replacement, &rdev->flags)) {
2641 len += sprintf(page+len, "%sreplacement", sep);
2642 sep = ",";
2643 }
2644
2645 return len+sprintf(page+len, "\n");
2646}
2647
2648static ssize_t
2649state_store(struct md_rdev *rdev, const char *buf, size_t len)
2650{
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662 int err = -EINVAL;
2663 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2664 md_error(rdev->mddev, rdev);
2665 if (test_bit(Faulty, &rdev->flags))
2666 err = 0;
2667 else
2668 err = -EBUSY;
2669 } else if (cmd_match(buf, "remove")) {
2670 if (rdev->raid_disk >= 0)
2671 err = -EBUSY;
2672 else {
2673 struct mddev *mddev = rdev->mddev;
2674 kick_rdev_from_array(rdev);
2675 if (mddev->pers)
2676 md_update_sb(mddev, 1);
2677 md_new_event(mddev);
2678 err = 0;
2679 }
2680 } else if (cmd_match(buf, "writemostly")) {
2681 set_bit(WriteMostly, &rdev->flags);
2682 err = 0;
2683 } else if (cmd_match(buf, "-writemostly")) {
2684 clear_bit(WriteMostly, &rdev->flags);
2685 err = 0;
2686 } else if (cmd_match(buf, "blocked")) {
2687 set_bit(Blocked, &rdev->flags);
2688 err = 0;
2689 } else if (cmd_match(buf, "-blocked")) {
2690 if (!test_bit(Faulty, &rdev->flags) &&
2691 rdev->badblocks.unacked_exist) {
2692
2693
2694
2695 md_error(rdev->mddev, rdev);
2696 }
2697 clear_bit(Blocked, &rdev->flags);
2698 clear_bit(BlockedBadBlocks, &rdev->flags);
2699 wake_up(&rdev->blocked_wait);
2700 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2701 md_wakeup_thread(rdev->mddev->thread);
2702
2703 err = 0;
2704 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2705 set_bit(In_sync, &rdev->flags);
2706 err = 0;
2707 } else if (cmd_match(buf, "write_error")) {
2708 set_bit(WriteErrorSeen, &rdev->flags);
2709 err = 0;
2710 } else if (cmd_match(buf, "-write_error")) {
2711 clear_bit(WriteErrorSeen, &rdev->flags);
2712 err = 0;
2713 } else if (cmd_match(buf, "want_replacement")) {
2714
2715
2716
2717
2718 if (rdev->raid_disk >= 0 &&
2719 !test_bit(Replacement, &rdev->flags))
2720 set_bit(WantReplacement, &rdev->flags);
2721 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2722 md_wakeup_thread(rdev->mddev->thread);
2723 err = 0;
2724 } else if (cmd_match(buf, "-want_replacement")) {
2725
2726
2727
2728 err = 0;
2729 clear_bit(WantReplacement, &rdev->flags);
2730 } else if (cmd_match(buf, "replacement")) {
2731
2732
2733
2734
2735 if (rdev->mddev->pers)
2736 err = -EBUSY;
2737 else {
2738 set_bit(Replacement, &rdev->flags);
2739 err = 0;
2740 }
2741 } else if (cmd_match(buf, "-replacement")) {
2742
2743 if (rdev->mddev->pers)
2744 err = -EBUSY;
2745 else {
2746 clear_bit(Replacement, &rdev->flags);
2747 err = 0;
2748 }
2749 }
2750 if (!err)
2751 sysfs_notify_dirent_safe(rdev->sysfs_state);
2752 return err ? err : len;
2753}
2754static struct rdev_sysfs_entry rdev_state =
2755__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2756
2757static ssize_t
2758errors_show(struct md_rdev *rdev, char *page)
2759{
2760 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2761}
2762
2763static ssize_t
2764errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2765{
2766 char *e;
2767 unsigned long n = simple_strtoul(buf, &e, 10);
2768 if (*buf && (*e == 0 || *e == '\n')) {
2769 atomic_set(&rdev->corrected_errors, n);
2770 return len;
2771 }
2772 return -EINVAL;
2773}
2774static struct rdev_sysfs_entry rdev_errors =
2775__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2776
2777static ssize_t
2778slot_show(struct md_rdev *rdev, char *page)
2779{
2780 if (rdev->raid_disk < 0)
2781 return sprintf(page, "none\n");
2782 else
2783 return sprintf(page, "%d\n", rdev->raid_disk);
2784}
2785
2786static ssize_t
2787slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2788{
2789 char *e;
2790 int err;
2791 int slot = simple_strtoul(buf, &e, 10);
2792 if (strncmp(buf, "none", 4)==0)
2793 slot = -1;
2794 else if (e==buf || (*e && *e!= '\n'))
2795 return -EINVAL;
2796 if (rdev->mddev->pers && slot == -1) {
2797
2798
2799
2800
2801
2802
2803
2804 if (rdev->raid_disk == -1)
2805 return -EEXIST;
2806
2807 if (rdev->mddev->pers->hot_remove_disk == NULL)
2808 return -EINVAL;
2809 err = rdev->mddev->pers->
2810 hot_remove_disk(rdev->mddev, rdev);
2811 if (err)
2812 return err;
2813 sysfs_unlink_rdev(rdev->mddev, rdev);
2814 rdev->raid_disk = -1;
2815 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2816 md_wakeup_thread(rdev->mddev->thread);
2817 } else if (rdev->mddev->pers) {
2818
2819
2820
2821
2822 if (rdev->raid_disk != -1)
2823 return -EBUSY;
2824
2825 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2826 return -EBUSY;
2827
2828 if (rdev->mddev->pers->hot_add_disk == NULL)
2829 return -EINVAL;
2830
2831 if (slot >= rdev->mddev->raid_disks &&
2832 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2833 return -ENOSPC;
2834
2835 rdev->raid_disk = slot;
2836 if (test_bit(In_sync, &rdev->flags))
2837 rdev->saved_raid_disk = slot;
2838 else
2839 rdev->saved_raid_disk = -1;
2840 clear_bit(In_sync, &rdev->flags);
2841 err = rdev->mddev->pers->
2842 hot_add_disk(rdev->mddev, rdev);
2843 if (err) {
2844 rdev->raid_disk = -1;
2845 return err;
2846 } else
2847 sysfs_notify_dirent_safe(rdev->sysfs_state);
2848 if (sysfs_link_rdev(rdev->mddev, rdev))
2849 ;
2850
2851 } else {
2852 if (slot >= rdev->mddev->raid_disks &&
2853 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2854 return -ENOSPC;
2855 rdev->raid_disk = slot;
2856
2857 clear_bit(Faulty, &rdev->flags);
2858 clear_bit(WriteMostly, &rdev->flags);
2859 set_bit(In_sync, &rdev->flags);
2860 sysfs_notify_dirent_safe(rdev->sysfs_state);
2861 }
2862 return len;
2863}
2864
2865
2866static struct rdev_sysfs_entry rdev_slot =
2867__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2868
2869static ssize_t
2870offset_show(struct md_rdev *rdev, char *page)
2871{
2872 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2873}
2874
2875static ssize_t
2876offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2877{
2878 unsigned long long offset;
2879 if (strict_strtoull(buf, 10, &offset) < 0)
2880 return -EINVAL;
2881 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2882 return -EBUSY;
2883 if (rdev->sectors && rdev->mddev->external)
2884
2885
2886 return -EBUSY;
2887 rdev->data_offset = offset;
2888 rdev->new_data_offset = offset;
2889 return len;
2890}
2891
2892static struct rdev_sysfs_entry rdev_offset =
2893__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2894
2895static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2896{
2897 return sprintf(page, "%llu\n",
2898 (unsigned long long)rdev->new_data_offset);
2899}
2900
2901static ssize_t new_offset_store(struct md_rdev *rdev,
2902 const char *buf, size_t len)
2903{
2904 unsigned long long new_offset;
2905 struct mddev *mddev = rdev->mddev;
2906
2907 if (strict_strtoull(buf, 10, &new_offset) < 0)
2908 return -EINVAL;
2909
2910 if (mddev->sync_thread)
2911 return -EBUSY;
2912 if (new_offset == rdev->data_offset)
2913
2914 ;
2915 else if (new_offset > rdev->data_offset) {
2916
2917 if (new_offset - rdev->data_offset
2918 + mddev->dev_sectors > rdev->sectors)
2919 return -E2BIG;
2920 }
2921
2922
2923
2924
2925
2926 if (new_offset < rdev->data_offset &&
2927 mddev->reshape_backwards)
2928 return -EINVAL;
2929
2930
2931
2932
2933 if (new_offset > rdev->data_offset &&
2934 !mddev->reshape_backwards)
2935 return -EINVAL;
2936
2937 if (mddev->pers && mddev->persistent &&
2938 !super_types[mddev->major_version]
2939 .allow_new_offset(rdev, new_offset))
2940 return -E2BIG;
2941 rdev->new_data_offset = new_offset;
2942 if (new_offset > rdev->data_offset)
2943 mddev->reshape_backwards = 1;
2944 else if (new_offset < rdev->data_offset)
2945 mddev->reshape_backwards = 0;
2946
2947 return len;
2948}
2949static struct rdev_sysfs_entry rdev_new_offset =
2950__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2951
2952static ssize_t
2953rdev_size_show(struct md_rdev *rdev, char *page)
2954{
2955 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2956}
2957
2958static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2959{
2960
2961 if (s1+l1 <= s2)
2962 return 0;
2963 if (s2+l2 <= s1)
2964 return 0;
2965 return 1;
2966}
2967
2968static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2969{
2970 unsigned long long blocks;
2971 sector_t new;
2972
2973 if (strict_strtoull(buf, 10, &blocks) < 0)
2974 return -EINVAL;
2975
2976 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2977 return -EINVAL;
2978
2979 new = blocks * 2;
2980 if (new != blocks * 2)
2981 return -EINVAL;
2982
2983 *sectors = new;
2984 return 0;
2985}
2986
2987static ssize_t
2988rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2989{
2990 struct mddev *my_mddev = rdev->mddev;
2991 sector_t oldsectors = rdev->sectors;
2992 sector_t sectors;
2993
2994 if (strict_blocks_to_sectors(buf, §ors) < 0)
2995 return -EINVAL;
2996 if (rdev->data_offset != rdev->new_data_offset)
2997 return -EINVAL;
2998 if (my_mddev->pers && rdev->raid_disk >= 0) {
2999 if (my_mddev->persistent) {
3000 sectors = super_types[my_mddev->major_version].
3001 rdev_size_change(rdev, sectors);
3002 if (!sectors)
3003 return -EBUSY;
3004 } else if (!sectors)
3005 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3006 rdev->data_offset;
3007 }
3008 if (sectors < my_mddev->dev_sectors)
3009 return -EINVAL;
3010
3011 rdev->sectors = sectors;
3012 if (sectors > oldsectors && my_mddev->external) {
3013
3014
3015
3016
3017
3018 struct mddev *mddev;
3019 int overlap = 0;
3020 struct list_head *tmp;
3021
3022 mddev_unlock(my_mddev);
3023 for_each_mddev(mddev, tmp) {
3024 struct md_rdev *rdev2;
3025
3026 mddev_lock(mddev);
3027 rdev_for_each(rdev2, mddev)
3028 if (rdev->bdev == rdev2->bdev &&
3029 rdev != rdev2 &&
3030 overlaps(rdev->data_offset, rdev->sectors,
3031 rdev2->data_offset,
3032 rdev2->sectors)) {
3033 overlap = 1;
3034 break;
3035 }
3036 mddev_unlock(mddev);
3037 if (overlap) {
3038 mddev_put(mddev);
3039 break;
3040 }
3041 }
3042 mddev_lock(my_mddev);
3043 if (overlap) {
3044
3045
3046
3047
3048
3049
3050 rdev->sectors = oldsectors;
3051 return -EBUSY;
3052 }
3053 }
3054 return len;
3055}
3056
3057static struct rdev_sysfs_entry rdev_size =
3058__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3059
3060
3061static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3062{
3063 unsigned long long recovery_start = rdev->recovery_offset;
3064
3065 if (test_bit(In_sync, &rdev->flags) ||
3066 recovery_start == MaxSector)
3067 return sprintf(page, "none\n");
3068
3069 return sprintf(page, "%llu\n", recovery_start);
3070}
3071
3072static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3073{
3074 unsigned long long recovery_start;
3075
3076 if (cmd_match(buf, "none"))
3077 recovery_start = MaxSector;
3078 else if (strict_strtoull(buf, 10, &recovery_start))
3079 return -EINVAL;
3080
3081 if (rdev->mddev->pers &&
3082 rdev->raid_disk >= 0)
3083 return -EBUSY;
3084
3085 rdev->recovery_offset = recovery_start;
3086 if (recovery_start == MaxSector)
3087 set_bit(In_sync, &rdev->flags);
3088 else
3089 clear_bit(In_sync, &rdev->flags);
3090 return len;
3091}
3092
3093static struct rdev_sysfs_entry rdev_recovery_start =
3094__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3095
3096
3097static ssize_t
3098badblocks_show(struct badblocks *bb, char *page, int unack);
3099static ssize_t
3100badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
3101
3102static ssize_t bb_show(struct md_rdev *rdev, char *page)
3103{
3104 return badblocks_show(&rdev->badblocks, page, 0);
3105}
3106static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3107{
3108 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3109
3110 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3111 wake_up(&rdev->blocked_wait);
3112 return rv;
3113}
3114static struct rdev_sysfs_entry rdev_bad_blocks =
3115__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3116
3117
3118static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3119{
3120 return badblocks_show(&rdev->badblocks, page, 1);
3121}
3122static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3123{
3124 return badblocks_store(&rdev->badblocks, page, len, 1);
3125}
3126static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3127__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3128
3129static struct attribute *rdev_default_attrs[] = {
3130 &rdev_state.attr,
3131 &rdev_errors.attr,
3132 &rdev_slot.attr,
3133 &rdev_offset.attr,
3134 &rdev_new_offset.attr,
3135 &rdev_size.attr,
3136 &rdev_recovery_start.attr,
3137 &rdev_bad_blocks.attr,
3138 &rdev_unack_bad_blocks.attr,
3139 NULL,
3140};
3141static ssize_t
3142rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3143{
3144 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3145 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3146 struct mddev *mddev = rdev->mddev;
3147 ssize_t rv;
3148
3149 if (!entry->show)
3150 return -EIO;
3151
3152 rv = mddev ? mddev_lock(mddev) : -EBUSY;
3153 if (!rv) {
3154 if (rdev->mddev == NULL)
3155 rv = -EBUSY;
3156 else
3157 rv = entry->show(rdev, page);
3158 mddev_unlock(mddev);
3159 }
3160 return rv;
3161}
3162
3163static ssize_t
3164rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3165 const char *page, size_t length)
3166{
3167 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3168 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3169 ssize_t rv;
3170 struct mddev *mddev = rdev->mddev;
3171
3172 if (!entry->store)
3173 return -EIO;
3174 if (!capable(CAP_SYS_ADMIN))
3175 return -EACCES;
3176 rv = mddev ? mddev_lock(mddev): -EBUSY;
3177 if (!rv) {
3178 if (rdev->mddev == NULL)
3179 rv = -EBUSY;
3180 else
3181 rv = entry->store(rdev, page, length);
3182 mddev_unlock(mddev);
3183 }
3184 return rv;
3185}
3186
3187static void rdev_free(struct kobject *ko)
3188{
3189 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3190 kfree(rdev);
3191}
3192static const struct sysfs_ops rdev_sysfs_ops = {
3193 .show = rdev_attr_show,
3194 .store = rdev_attr_store,
3195};
3196static struct kobj_type rdev_ktype = {
3197 .release = rdev_free,
3198 .sysfs_ops = &rdev_sysfs_ops,
3199 .default_attrs = rdev_default_attrs,
3200};
3201
3202int md_rdev_init(struct md_rdev *rdev)
3203{
3204 rdev->desc_nr = -1;
3205 rdev->saved_raid_disk = -1;
3206 rdev->raid_disk = -1;
3207 rdev->flags = 0;
3208 rdev->data_offset = 0;
3209 rdev->new_data_offset = 0;
3210 rdev->sb_events = 0;
3211 rdev->last_read_error.tv_sec = 0;
3212 rdev->last_read_error.tv_nsec = 0;
3213 rdev->sb_loaded = 0;
3214 rdev->bb_page = NULL;
3215 atomic_set(&rdev->nr_pending, 0);
3216 atomic_set(&rdev->read_errors, 0);
3217 atomic_set(&rdev->corrected_errors, 0);
3218
3219 INIT_LIST_HEAD(&rdev->same_set);
3220 init_waitqueue_head(&rdev->blocked_wait);
3221
3222
3223
3224
3225
3226 rdev->badblocks.count = 0;
3227 rdev->badblocks.shift = 0;
3228 rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
3229 seqlock_init(&rdev->badblocks.lock);
3230 if (rdev->badblocks.page == NULL)
3231 return -ENOMEM;
3232
3233 return 0;
3234}
3235EXPORT_SYMBOL_GPL(md_rdev_init);
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3247{
3248 char b[BDEVNAME_SIZE];
3249 int err;
3250 struct md_rdev *rdev;
3251 sector_t size;
3252
3253 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3254 if (!rdev) {
3255 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3256 return ERR_PTR(-ENOMEM);
3257 }
3258
3259 err = md_rdev_init(rdev);
3260 if (err)
3261 goto abort_free;
3262 err = alloc_disk_sb(rdev);
3263 if (err)
3264 goto abort_free;
3265
3266 err = lock_rdev(rdev, newdev, super_format == -2);
3267 if (err)
3268 goto abort_free;
3269
3270 kobject_init(&rdev->kobj, &rdev_ktype);
3271
3272 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3273 if (!size) {
3274 printk(KERN_WARNING
3275 "md: %s has zero or unknown size, marking faulty!\n",
3276 bdevname(rdev->bdev,b));
3277 err = -EINVAL;
3278 goto abort_free;
3279 }
3280
3281 if (super_format >= 0) {
3282 err = super_types[super_format].
3283 load_super(rdev, NULL, super_minor);
3284 if (err == -EINVAL) {
3285 printk(KERN_WARNING
3286 "md: %s does not have a valid v%d.%d "
3287 "superblock, not importing!\n",
3288 bdevname(rdev->bdev,b),
3289 super_format, super_minor);
3290 goto abort_free;
3291 }
3292 if (err < 0) {
3293 printk(KERN_WARNING
3294 "md: could not read %s's sb, not importing!\n",
3295 bdevname(rdev->bdev,b));
3296 goto abort_free;
3297 }
3298 }
3299 if (super_format == -1)
3300
3301 rdev->badblocks.shift = -1;
3302
3303 return rdev;
3304
3305abort_free:
3306 if (rdev->bdev)
3307 unlock_rdev(rdev);
3308 md_rdev_clear(rdev);
3309 kfree(rdev);
3310 return ERR_PTR(err);
3311}
3312
3313
3314
3315
3316
3317
3318static void analyze_sbs(struct mddev * mddev)
3319{
3320 int i;
3321 struct md_rdev *rdev, *freshest, *tmp;
3322 char b[BDEVNAME_SIZE];
3323
3324 freshest = NULL;
3325 rdev_for_each_safe(rdev, tmp, mddev)
3326 switch (super_types[mddev->major_version].
3327 load_super(rdev, freshest, mddev->minor_version)) {
3328 case 1:
3329 freshest = rdev;
3330 break;
3331 case 0:
3332 break;
3333 default:
3334 printk( KERN_ERR \
3335 "md: fatal superblock inconsistency in %s"
3336 " -- removing from array\n",
3337 bdevname(rdev->bdev,b));
3338 kick_rdev_from_array(rdev);
3339 }
3340
3341
3342 super_types[mddev->major_version].
3343 validate_super(mddev, freshest);
3344
3345 i = 0;
3346 rdev_for_each_safe(rdev, tmp, mddev) {
3347 if (mddev->max_disks &&
3348 (rdev->desc_nr >= mddev->max_disks ||
3349 i > mddev->max_disks)) {
3350 printk(KERN_WARNING
3351 "md: %s: %s: only %d devices permitted\n",
3352 mdname(mddev), bdevname(rdev->bdev, b),
3353 mddev->max_disks);
3354 kick_rdev_from_array(rdev);
3355 continue;
3356 }
3357 if (rdev != freshest)
3358 if (super_types[mddev->major_version].
3359 validate_super(mddev, rdev)) {
3360 printk(KERN_WARNING "md: kicking non-fresh %s"
3361 " from array!\n",
3362 bdevname(rdev->bdev,b));
3363 kick_rdev_from_array(rdev);
3364 continue;
3365 }
3366 if (mddev->level == LEVEL_MULTIPATH) {
3367 rdev->desc_nr = i++;
3368 rdev->raid_disk = rdev->desc_nr;
3369 set_bit(In_sync, &rdev->flags);
3370 } else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
3371 rdev->raid_disk = -1;
3372 clear_bit(In_sync, &rdev->flags);
3373 }
3374 }
3375}
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3388{
3389 unsigned long result = 0;
3390 long decimals = -1;
3391 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3392 if (*cp == '.')
3393 decimals = 0;
3394 else if (decimals < scale) {
3395 unsigned int value;
3396 value = *cp - '0';
3397 result = result * 10 + value;
3398 if (decimals >= 0)
3399 decimals++;
3400 }
3401 cp++;
3402 }
3403 if (*cp == '\n')
3404 cp++;
3405 if (*cp)
3406 return -EINVAL;
3407 if (decimals < 0)
3408 decimals = 0;
3409 while (decimals < scale) {
3410 result *= 10;
3411 decimals ++;
3412 }
3413 *res = result;
3414 return 0;
3415}
3416
3417
3418static void md_safemode_timeout(unsigned long data);
3419
3420static ssize_t
3421safe_delay_show(struct mddev *mddev, char *page)
3422{
3423 int msec = (mddev->safemode_delay*1000)/HZ;
3424 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3425}
3426static ssize_t
3427safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3428{
3429 unsigned long msec;
3430
3431 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3432 return -EINVAL;
3433 if (msec == 0)
3434 mddev->safemode_delay = 0;
3435 else {
3436 unsigned long old_delay = mddev->safemode_delay;
3437 mddev->safemode_delay = (msec*HZ)/1000;
3438 if (mddev->safemode_delay == 0)
3439 mddev->safemode_delay = 1;
3440 if (mddev->safemode_delay < old_delay)
3441 md_safemode_timeout((unsigned long)mddev);
3442 }
3443 return len;
3444}
3445static struct md_sysfs_entry md_safe_delay =
3446__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3447
3448static ssize_t
3449level_show(struct mddev *mddev, char *page)
3450{
3451 struct md_personality *p = mddev->pers;
3452 if (p)
3453 return sprintf(page, "%s\n", p->name);
3454 else if (mddev->clevel[0])
3455 return sprintf(page, "%s\n", mddev->clevel);
3456 else if (mddev->level != LEVEL_NONE)
3457 return sprintf(page, "%d\n", mddev->level);
3458 else
3459 return 0;
3460}
3461
3462static ssize_t
3463level_store(struct mddev *mddev, const char *buf, size_t len)
3464{
3465 char clevel[16];
3466 ssize_t rv = len;
3467 struct md_personality *pers;
3468 long level;
3469 void *priv;
3470 struct md_rdev *rdev;
3471
3472 if (mddev->pers == NULL) {
3473 if (len == 0)
3474 return 0;
3475 if (len >= sizeof(mddev->clevel))
3476 return -ENOSPC;
3477 strncpy(mddev->clevel, buf, len);
3478 if (mddev->clevel[len-1] == '\n')
3479 len--;
3480 mddev->clevel[len] = 0;
3481 mddev->level = LEVEL_NONE;
3482 return rv;
3483 }
3484
3485
3486
3487
3488
3489
3490
3491 if (mddev->sync_thread ||
3492 mddev->reshape_position != MaxSector ||
3493 mddev->sysfs_active)
3494 return -EBUSY;
3495
3496 if (!mddev->pers->quiesce) {
3497 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3498 mdname(mddev), mddev->pers->name);
3499 return -EINVAL;
3500 }
3501
3502
3503 if (len == 0 || len >= sizeof(clevel))
3504 return -EINVAL;
3505 strncpy(clevel, buf, len);
3506 if (clevel[len-1] == '\n')
3507 len--;
3508 clevel[len] = 0;
3509 if (strict_strtol(clevel, 10, &level))
3510 level = LEVEL_NONE;
3511
3512 if (request_module("md-%s", clevel) != 0)
3513 request_module("md-level-%s", clevel);
3514 spin_lock(&pers_lock);
3515 pers = find_pers(level, clevel);
3516 if (!pers || !try_module_get(pers->owner)) {
3517 spin_unlock(&pers_lock);
3518 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3519 return -EINVAL;
3520 }
3521 spin_unlock(&pers_lock);
3522
3523 if (pers == mddev->pers) {
3524
3525 module_put(pers->owner);
3526 return rv;
3527 }
3528 if (!pers->takeover) {
3529 module_put(pers->owner);
3530 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3531 mdname(mddev), clevel);
3532 return -EINVAL;
3533 }
3534
3535 rdev_for_each(rdev, mddev)
3536 rdev->new_raid_disk = rdev->raid_disk;
3537
3538
3539
3540
3541 priv = pers->takeover(mddev);
3542 if (IS_ERR(priv)) {
3543 mddev->new_level = mddev->level;
3544 mddev->new_layout = mddev->layout;
3545 mddev->new_chunk_sectors = mddev->chunk_sectors;
3546 mddev->raid_disks -= mddev->delta_disks;
3547 mddev->delta_disks = 0;
3548 mddev->reshape_backwards = 0;
3549 module_put(pers->owner);
3550 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3551 mdname(mddev), clevel);
3552 return PTR_ERR(priv);
3553 }
3554
3555
3556 mddev_suspend(mddev);
3557 mddev->pers->stop(mddev);
3558
3559 if (mddev->pers->sync_request == NULL &&
3560 pers->sync_request != NULL) {
3561
3562 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3563 printk(KERN_WARNING
3564 "md: cannot register extra attributes for %s\n",
3565 mdname(mddev));
3566 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
3567 }
3568 if (mddev->pers->sync_request != NULL &&
3569 pers->sync_request == NULL) {
3570
3571 if (mddev->to_remove == NULL)
3572 mddev->to_remove = &md_redundancy_group;
3573 }
3574
3575 if (mddev->pers->sync_request == NULL &&
3576 mddev->external) {
3577
3578
3579
3580
3581
3582
3583
3584 mddev->in_sync = 0;
3585 mddev->safemode_delay = 0;
3586 mddev->safemode = 0;
3587 }
3588
3589 rdev_for_each(rdev, mddev) {
3590 if (rdev->raid_disk < 0)
3591 continue;
3592 if (rdev->new_raid_disk >= mddev->raid_disks)
3593 rdev->new_raid_disk = -1;
3594 if (rdev->new_raid_disk == rdev->raid_disk)
3595 continue;
3596 sysfs_unlink_rdev(mddev, rdev);
3597 }
3598 rdev_for_each(rdev, mddev) {
3599 if (rdev->raid_disk < 0)
3600 continue;
3601 if (rdev->new_raid_disk == rdev->raid_disk)
3602 continue;
3603 rdev->raid_disk = rdev->new_raid_disk;
3604 if (rdev->raid_disk < 0)
3605 clear_bit(In_sync, &rdev->flags);
3606 else {
3607 if (sysfs_link_rdev(mddev, rdev))
3608 printk(KERN_WARNING "md: cannot register rd%d"
3609 " for %s after level change\n",
3610 rdev->raid_disk, mdname(mddev));
3611 }
3612 }
3613
3614 module_put(mddev->pers->owner);
3615 mddev->pers = pers;
3616 mddev->private = priv;
3617 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3618 mddev->level = mddev->new_level;
3619 mddev->layout = mddev->new_layout;
3620 mddev->chunk_sectors = mddev->new_chunk_sectors;
3621 mddev->delta_disks = 0;
3622 mddev->reshape_backwards = 0;
3623 mddev->degraded = 0;
3624 if (mddev->pers->sync_request == NULL) {
3625
3626
3627
3628 mddev->in_sync = 1;
3629 del_timer_sync(&mddev->safemode_timer);
3630 }
3631 pers->run(mddev);
3632 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3633 mddev_resume(mddev);
3634 sysfs_notify(&mddev->kobj, NULL, "level");
3635 md_new_event(mddev);
3636 return rv;
3637}
3638
3639static struct md_sysfs_entry md_level =
3640__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3641
3642
3643static ssize_t
3644layout_show(struct mddev *mddev, char *page)
3645{
3646
3647 if (mddev->reshape_position != MaxSector &&
3648 mddev->layout != mddev->new_layout)
3649 return sprintf(page, "%d (%d)\n",
3650 mddev->new_layout, mddev->layout);
3651 return sprintf(page, "%d\n", mddev->layout);
3652}
3653
3654static ssize_t
3655layout_store(struct mddev *mddev, const char *buf, size_t len)
3656{
3657 char *e;
3658 unsigned long n = simple_strtoul(buf, &e, 10);
3659
3660 if (!*buf || (*e && *e != '\n'))
3661 return -EINVAL;
3662
3663 if (mddev->pers) {
3664 int err;
3665 if (mddev->pers->check_reshape == NULL)
3666 return -EBUSY;
3667 mddev->new_layout = n;
3668 err = mddev->pers->check_reshape(mddev);
3669 if (err) {
3670 mddev->new_layout = mddev->layout;
3671 return err;
3672 }
3673 } else {
3674 mddev->new_layout = n;
3675 if (mddev->reshape_position == MaxSector)
3676 mddev->layout = n;
3677 }
3678 return len;
3679}
3680static struct md_sysfs_entry md_layout =
3681__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3682
3683
3684static ssize_t
3685raid_disks_show(struct mddev *mddev, char *page)
3686{
3687 if (mddev->raid_disks == 0)
3688 return 0;
3689 if (mddev->reshape_position != MaxSector &&
3690 mddev->delta_disks != 0)
3691 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3692 mddev->raid_disks - mddev->delta_disks);
3693 return sprintf(page, "%d\n", mddev->raid_disks);
3694}
3695
3696static int update_raid_disks(struct mddev *mddev, int raid_disks);
3697
3698static ssize_t
3699raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3700{
3701 char *e;
3702 int rv = 0;
3703 unsigned long n = simple_strtoul(buf, &e, 10);
3704
3705 if (!*buf || (*e && *e != '\n'))
3706 return -EINVAL;
3707
3708 if (mddev->pers)
3709 rv = update_raid_disks(mddev, n);
3710 else if (mddev->reshape_position != MaxSector) {
3711 struct md_rdev *rdev;
3712 int olddisks = mddev->raid_disks - mddev->delta_disks;
3713
3714 rdev_for_each(rdev, mddev) {
3715 if (olddisks < n &&
3716 rdev->data_offset < rdev->new_data_offset)
3717 return -EINVAL;
3718 if (olddisks > n &&
3719 rdev->data_offset > rdev->new_data_offset)
3720 return -EINVAL;
3721 }
3722 mddev->delta_disks = n - olddisks;
3723 mddev->raid_disks = n;
3724 mddev->reshape_backwards = (mddev->delta_disks < 0);
3725 } else
3726 mddev->raid_disks = n;
3727 return rv ? rv : len;
3728}
3729static struct md_sysfs_entry md_raid_disks =
3730__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3731
3732static ssize_t
3733chunk_size_show(struct mddev *mddev, char *page)
3734{
3735 if (mddev->reshape_position != MaxSector &&
3736 mddev->chunk_sectors != mddev->new_chunk_sectors)
3737 return sprintf(page, "%d (%d)\n",
3738 mddev->new_chunk_sectors << 9,
3739 mddev->chunk_sectors << 9);
3740 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3741}
3742
3743static ssize_t
3744chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3745{
3746 char *e;
3747 unsigned long n = simple_strtoul(buf, &e, 10);
3748
3749 if (!*buf || (*e && *e != '\n'))
3750 return -EINVAL;
3751
3752 if (mddev->pers) {
3753 int err;
3754 if (mddev->pers->check_reshape == NULL)
3755 return -EBUSY;
3756 mddev->new_chunk_sectors = n >> 9;
3757 err = mddev->pers->check_reshape(mddev);
3758 if (err) {
3759 mddev->new_chunk_sectors = mddev->chunk_sectors;
3760 return err;
3761 }
3762 } else {
3763 mddev->new_chunk_sectors = n >> 9;
3764 if (mddev->reshape_position == MaxSector)
3765 mddev->chunk_sectors = n >> 9;
3766 }
3767 return len;
3768}
3769static struct md_sysfs_entry md_chunk_size =
3770__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3771
3772static ssize_t
3773resync_start_show(struct mddev *mddev, char *page)
3774{
3775 if (mddev->recovery_cp == MaxSector)
3776 return sprintf(page, "none\n");
3777 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3778}
3779
3780static ssize_t
3781resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3782{
3783 char *e;
3784 unsigned long long n = simple_strtoull(buf, &e, 10);
3785
3786 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3787 return -EBUSY;
3788 if (cmd_match(buf, "none"))
3789 n = MaxSector;
3790 else if (!*buf || (*e && *e != '\n'))
3791 return -EINVAL;
3792
3793 mddev->recovery_cp = n;
3794 return len;
3795}
3796static struct md_sysfs_entry md_resync_start =
3797__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3836 write_pending, active_idle, bad_word};
3837static char *array_states[] = {
3838 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3839 "write-pending", "active-idle", NULL };
3840
3841static int match_word(const char *word, char **list)
3842{
3843 int n;
3844 for (n=0; list[n]; n++)
3845 if (cmd_match(word, list[n]))
3846 break;
3847 return n;
3848}
3849
3850static ssize_t
3851array_state_show(struct mddev *mddev, char *page)
3852{
3853 enum array_state st = inactive;
3854
3855 if (mddev->pers)
3856 switch(mddev->ro) {
3857 case 1:
3858 st = readonly;
3859 break;
3860 case 2:
3861 st = read_auto;
3862 break;
3863 case 0:
3864 if (mddev->in_sync)
3865 st = clean;
3866 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3867 st = write_pending;
3868 else if (mddev->safemode)
3869 st = active_idle;
3870 else
3871 st = active;
3872 }
3873 else {
3874 if (list_empty(&mddev->disks) &&
3875 mddev->raid_disks == 0 &&
3876 mddev->dev_sectors == 0)
3877 st = clear;
3878 else
3879 st = inactive;
3880 }
3881 return sprintf(page, "%s\n", array_states[st]);
3882}
3883
3884static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
3885static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
3886static int do_md_run(struct mddev * mddev);
3887static int restart_array(struct mddev *mddev);
3888
3889static ssize_t
3890array_state_store(struct mddev *mddev, const char *buf, size_t len)
3891{
3892 int err = -EINVAL;
3893 enum array_state st = match_word(buf, array_states);
3894 switch(st) {
3895 case bad_word:
3896 break;
3897 case clear:
3898
3899 err = do_md_stop(mddev, 0, NULL);
3900 break;
3901 case inactive:
3902
3903 if (mddev->pers)
3904 err = do_md_stop(mddev, 2, NULL);
3905 else
3906 err = 0;
3907 break;
3908 case suspended:
3909 break;
3910 case readonly:
3911 if (mddev->pers)
3912 err = md_set_readonly(mddev, NULL);
3913 else {
3914 mddev->ro = 1;
3915 set_disk_ro(mddev->gendisk, 1);
3916 err = do_md_run(mddev);
3917 }
3918 break;
3919 case read_auto:
3920 if (mddev->pers) {
3921 if (mddev->ro == 0)
3922 err = md_set_readonly(mddev, NULL);
3923 else if (mddev->ro == 1)
3924 err = restart_array(mddev);
3925 if (err == 0) {
3926 mddev->ro = 2;
3927 set_disk_ro(mddev->gendisk, 0);
3928 }
3929 } else {
3930 mddev->ro = 2;
3931 err = do_md_run(mddev);
3932 }
3933 break;
3934 case clean:
3935 if (mddev->pers) {
3936 restart_array(mddev);
3937 spin_lock_irq(&mddev->write_lock);
3938 if (atomic_read(&mddev->writes_pending) == 0) {
3939 if (mddev->in_sync == 0) {
3940 mddev->in_sync = 1;
3941 if (mddev->safemode == 1)
3942 mddev->safemode = 0;
3943 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3944 }
3945 err = 0;
3946 } else
3947 err = -EBUSY;
3948 spin_unlock_irq(&mddev->write_lock);
3949 } else
3950 err = -EINVAL;
3951 break;
3952 case active:
3953 if (mddev->pers) {
3954 restart_array(mddev);
3955 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3956 wake_up(&mddev->sb_wait);
3957 err = 0;
3958 } else {
3959 mddev->ro = 0;
3960 set_disk_ro(mddev->gendisk, 0);
3961 err = do_md_run(mddev);
3962 }
3963 break;
3964 case write_pending:
3965 case active_idle:
3966
3967 break;
3968 }
3969 if (err)
3970 return err;
3971 else {
3972 if (mddev->hold_active == UNTIL_IOCTL)
3973 mddev->hold_active = 0;
3974 sysfs_notify_dirent_safe(mddev->sysfs_state);
3975 return len;
3976 }
3977}
3978static struct md_sysfs_entry md_array_state =
3979__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3980
3981static ssize_t
3982max_corrected_read_errors_show(struct mddev *mddev, char *page) {
3983 return sprintf(page, "%d\n",
3984 atomic_read(&mddev->max_corr_read_errors));
3985}
3986
3987static ssize_t
3988max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
3989{
3990 char *e;
3991 unsigned long n = simple_strtoul(buf, &e, 10);
3992
3993 if (*buf && (*e == 0 || *e == '\n')) {
3994 atomic_set(&mddev->max_corr_read_errors, n);
3995 return len;
3996 }
3997 return -EINVAL;
3998}
3999
4000static struct md_sysfs_entry max_corr_read_errors =
4001__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4002 max_corrected_read_errors_store);
4003
4004static ssize_t
4005null_show(struct mddev *mddev, char *page)
4006{
4007 return -EINVAL;
4008}
4009
4010static ssize_t
4011new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4012{
4013
4014
4015
4016
4017
4018
4019
4020 char *e;
4021 int major = simple_strtoul(buf, &e, 10);
4022 int minor;
4023 dev_t dev;
4024 struct md_rdev *rdev;
4025 int err;
4026
4027 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4028 return -EINVAL;
4029 minor = simple_strtoul(e+1, &e, 10);
4030 if (*e && *e != '\n')
4031 return -EINVAL;
4032 dev = MKDEV(major, minor);
4033 if (major != MAJOR(dev) ||
4034 minor != MINOR(dev))
4035 return -EOVERFLOW;
4036
4037
4038 if (mddev->persistent) {
4039 rdev = md_import_device(dev, mddev->major_version,
4040 mddev->minor_version);
4041 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4042 struct md_rdev *rdev0
4043 = list_entry(mddev->disks.next,
4044 struct md_rdev, same_set);
4045 err = super_types[mddev->major_version]
4046 .load_super(rdev, rdev0, mddev->minor_version);
4047 if (err < 0)
4048 goto out;
4049 }
4050 } else if (mddev->external)
4051 rdev = md_import_device(dev, -2, -1);
4052 else
4053 rdev = md_import_device(dev, -1, -1);
4054
4055 if (IS_ERR(rdev))
4056 return PTR_ERR(rdev);
4057 err = bind_rdev_to_array(rdev, mddev);
4058 out:
4059 if (err)
4060 export_rdev(rdev);
4061 return err ? err : len;
4062}
4063
4064static struct md_sysfs_entry md_new_device =
4065__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4066
4067static ssize_t
4068bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4069{
4070 char *end;
4071 unsigned long chunk, end_chunk;
4072
4073 if (!mddev->bitmap)
4074 goto out;
4075
4076 while (*buf) {
4077 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4078 if (buf == end) break;
4079 if (*end == '-') {
4080 buf = end + 1;
4081 end_chunk = simple_strtoul(buf, &end, 0);
4082 if (buf == end) break;
4083 }
4084 if (*end && !isspace(*end)) break;
4085 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4086 buf = skip_spaces(end);
4087 }
4088 bitmap_unplug(mddev->bitmap);
4089out:
4090 return len;
4091}
4092
4093static struct md_sysfs_entry md_bitmap =
4094__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4095
4096static ssize_t
4097size_show(struct mddev *mddev, char *page)
4098{
4099 return sprintf(page, "%llu\n",
4100 (unsigned long long)mddev->dev_sectors / 2);
4101}
4102
4103static int update_size(struct mddev *mddev, sector_t num_sectors);
4104
4105static ssize_t
4106size_store(struct mddev *mddev, const char *buf, size_t len)
4107{
4108
4109
4110
4111
4112 sector_t sectors;
4113 int err = strict_blocks_to_sectors(buf, §ors);
4114
4115 if (err < 0)
4116 return err;
4117 if (mddev->pers) {
4118 err = update_size(mddev, sectors);
4119 md_update_sb(mddev, 1);
4120 } else {
4121 if (mddev->dev_sectors == 0 ||
4122 mddev->dev_sectors > sectors)
4123 mddev->dev_sectors = sectors;
4124 else
4125 err = -ENOSPC;
4126 }
4127 return err ? err : len;
4128}
4129
4130static struct md_sysfs_entry md_size =
4131__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4132
4133
4134
4135
4136
4137
4138
4139
4140static ssize_t
4141metadata_show(struct mddev *mddev, char *page)
4142{
4143 if (mddev->persistent)
4144 return sprintf(page, "%d.%d\n",
4145 mddev->major_version, mddev->minor_version);
4146 else if (mddev->external)
4147 return sprintf(page, "external:%s\n", mddev->metadata_type);
4148 else
4149 return sprintf(page, "none\n");
4150}
4151
4152static ssize_t
4153metadata_store(struct mddev *mddev, const char *buf, size_t len)
4154{
4155 int major, minor;
4156 char *e;
4157
4158
4159
4160
4161 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4162 ;
4163 else if (!list_empty(&mddev->disks))
4164 return -EBUSY;
4165
4166 if (cmd_match(buf, "none")) {
4167 mddev->persistent = 0;
4168 mddev->external = 0;
4169 mddev->major_version = 0;
4170 mddev->minor_version = 90;
4171 return len;
4172 }
4173 if (strncmp(buf, "external:", 9) == 0) {
4174 size_t namelen = len-9;
4175 if (namelen >= sizeof(mddev->metadata_type))
4176 namelen = sizeof(mddev->metadata_type)-1;
4177 strncpy(mddev->metadata_type, buf+9, namelen);
4178 mddev->metadata_type[namelen] = 0;
4179 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4180 mddev->metadata_type[--namelen] = 0;
4181 mddev->persistent = 0;
4182 mddev->external = 1;
4183 mddev->major_version = 0;
4184 mddev->minor_version = 90;
4185 return len;
4186 }
4187 major = simple_strtoul(buf, &e, 10);
4188 if (e==buf || *e != '.')
4189 return -EINVAL;
4190 buf = e+1;
4191 minor = simple_strtoul(buf, &e, 10);
4192 if (e==buf || (*e && *e != '\n') )
4193 return -EINVAL;
4194 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4195 return -ENOENT;
4196 mddev->major_version = major;
4197 mddev->minor_version = minor;
4198 mddev->persistent = 1;
4199 mddev->external = 0;
4200 return len;
4201}
4202
4203static struct md_sysfs_entry md_metadata =
4204__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4205
4206static ssize_t
4207action_show(struct mddev *mddev, char *page)
4208{
4209 char *type = "idle";
4210 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4211 type = "frozen";
4212 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4213 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
4214 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4215 type = "reshape";
4216 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4217 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4218 type = "resync";
4219 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
4220 type = "check";
4221 else
4222 type = "repair";
4223 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
4224 type = "recover";
4225 }
4226 return sprintf(page, "%s\n", type);
4227}
4228
4229static void reap_sync_thread(struct mddev *mddev);
4230
4231static ssize_t
4232action_store(struct mddev *mddev, const char *page, size_t len)
4233{
4234 if (!mddev->pers || !mddev->pers->sync_request)
4235 return -EINVAL;
4236
4237 if (cmd_match(page, "frozen"))
4238 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4239 else
4240 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4241
4242 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4243 if (mddev->sync_thread) {
4244 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4245 reap_sync_thread(mddev);
4246 }
4247 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4248 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
4249 return -EBUSY;
4250 else if (cmd_match(page, "resync"))
4251 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4252 else if (cmd_match(page, "recover")) {
4253 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4254 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4255 } else if (cmd_match(page, "reshape")) {
4256 int err;
4257 if (mddev->pers->start_reshape == NULL)
4258 return -EINVAL;
4259 err = mddev->pers->start_reshape(mddev);
4260 if (err)
4261 return err;
4262 sysfs_notify(&mddev->kobj, NULL, "degraded");
4263 } else {
4264 if (cmd_match(page, "check"))
4265 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4266 else if (!cmd_match(page, "repair"))
4267 return -EINVAL;
4268 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4269 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4270 }
4271 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4272 md_wakeup_thread(mddev->thread);
4273 sysfs_notify_dirent_safe(mddev->sysfs_action);
4274 return len;
4275}
4276
4277static ssize_t
4278mismatch_cnt_show(struct mddev *mddev, char *page)
4279{
4280 return sprintf(page, "%llu\n",
4281 (unsigned long long) mddev->resync_mismatches);
4282}
4283
4284static struct md_sysfs_entry md_scan_mode =
4285__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4286
4287
4288static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4289
4290static ssize_t
4291sync_min_show(struct mddev *mddev, char *page)
4292{
4293 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4294 mddev->sync_speed_min ? "local": "system");
4295}
4296
4297static ssize_t
4298sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4299{
4300 int min;
4301 char *e;
4302 if (strncmp(buf, "system", 6)==0) {
4303 mddev->sync_speed_min = 0;
4304 return len;
4305 }
4306 min = simple_strtoul(buf, &e, 10);
4307 if (buf == e || (*e && *e != '\n') || min <= 0)
4308 return -EINVAL;
4309 mddev->sync_speed_min = min;
4310 return len;
4311}
4312
4313static struct md_sysfs_entry md_sync_min =
4314__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4315
4316static ssize_t
4317sync_max_show(struct mddev *mddev, char *page)
4318{
4319 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4320 mddev->sync_speed_max ? "local": "system");
4321}
4322
4323static ssize_t
4324sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4325{
4326 int max;
4327 char *e;
4328 if (strncmp(buf, "system", 6)==0) {
4329 mddev->sync_speed_max = 0;
4330 return len;
4331 }
4332 max = simple_strtoul(buf, &e, 10);
4333 if (buf == e || (*e && *e != '\n') || max <= 0)
4334 return -EINVAL;
4335 mddev->sync_speed_max = max;
4336 return len;
4337}
4338
4339static struct md_sysfs_entry md_sync_max =
4340__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4341
4342static ssize_t
4343degraded_show(struct mddev *mddev, char *page)
4344{
4345 return sprintf(page, "%d\n", mddev->degraded);
4346}
4347static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4348
4349static ssize_t
4350sync_force_parallel_show(struct mddev *mddev, char *page)
4351{
4352 return sprintf(page, "%d\n", mddev->parallel_resync);
4353}
4354
4355static ssize_t
4356sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4357{
4358 long n;
4359
4360 if (strict_strtol(buf, 10, &n))
4361 return -EINVAL;
4362
4363 if (n != 0 && n != 1)
4364 return -EINVAL;
4365
4366 mddev->parallel_resync = n;
4367
4368 if (mddev->sync_thread)
4369 wake_up(&resync_wait);
4370
4371 return len;
4372}
4373
4374
4375static struct md_sysfs_entry md_sync_force_parallel =
4376__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4377 sync_force_parallel_show, sync_force_parallel_store);
4378
4379static ssize_t
4380sync_speed_show(struct mddev *mddev, char *page)
4381{
4382 unsigned long resync, dt, db;
4383 if (mddev->curr_resync == 0)
4384 return sprintf(page, "none\n");
4385 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4386 dt = (jiffies - mddev->resync_mark) / HZ;
4387 if (!dt) dt++;
4388 db = resync - mddev->resync_mark_cnt;
4389 return sprintf(page, "%lu\n", db/dt/2);
4390}
4391
4392static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4393
4394static ssize_t
4395sync_completed_show(struct mddev *mddev, char *page)
4396{
4397 unsigned long long max_sectors, resync;
4398
4399 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4400 return sprintf(page, "none\n");
4401
4402 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4403 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4404 max_sectors = mddev->resync_max_sectors;
4405 else
4406 max_sectors = mddev->dev_sectors;
4407
4408 resync = mddev->curr_resync_completed;
4409 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4410}
4411
4412static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
4413
4414static ssize_t
4415min_sync_show(struct mddev *mddev, char *page)
4416{
4417 return sprintf(page, "%llu\n",
4418 (unsigned long long)mddev->resync_min);
4419}
4420static ssize_t
4421min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4422{
4423 unsigned long long min;
4424 if (strict_strtoull(buf, 10, &min))
4425 return -EINVAL;
4426 if (min > mddev->resync_max)
4427 return -EINVAL;
4428 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4429 return -EBUSY;
4430
4431
4432 if (mddev->chunk_sectors) {
4433 sector_t temp = min;
4434 if (sector_div(temp, mddev->chunk_sectors))
4435 return -EINVAL;
4436 }
4437 mddev->resync_min = min;
4438
4439 return len;
4440}
4441
4442static struct md_sysfs_entry md_min_sync =
4443__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4444
4445static ssize_t
4446max_sync_show(struct mddev *mddev, char *page)
4447{
4448 if (mddev->resync_max == MaxSector)
4449 return sprintf(page, "max\n");
4450 else
4451 return sprintf(page, "%llu\n",
4452 (unsigned long long)mddev->resync_max);
4453}
4454static ssize_t
4455max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4456{
4457 if (strncmp(buf, "max", 3) == 0)
4458 mddev->resync_max = MaxSector;
4459 else {
4460 unsigned long long max;
4461 if (strict_strtoull(buf, 10, &max))
4462 return -EINVAL;
4463 if (max < mddev->resync_min)
4464 return -EINVAL;
4465 if (max < mddev->resync_max &&
4466 mddev->ro == 0 &&
4467 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4468 return -EBUSY;
4469
4470
4471 if (mddev->chunk_sectors) {
4472 sector_t temp = max;
4473 if (sector_div(temp, mddev->chunk_sectors))
4474 return -EINVAL;
4475 }
4476 mddev->resync_max = max;
4477 }
4478 wake_up(&mddev->recovery_wait);
4479 return len;
4480}
4481
4482static struct md_sysfs_entry md_max_sync =
4483__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4484
4485static ssize_t
4486suspend_lo_show(struct mddev *mddev, char *page)
4487{
4488 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4489}
4490
4491static ssize_t
4492suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4493{
4494 char *e;
4495 unsigned long long new = simple_strtoull(buf, &e, 10);
4496 unsigned long long old = mddev->suspend_lo;
4497
4498 if (mddev->pers == NULL ||
4499 mddev->pers->quiesce == NULL)
4500 return -EINVAL;
4501 if (buf == e || (*e && *e != '\n'))
4502 return -EINVAL;
4503
4504 mddev->suspend_lo = new;
4505 if (new >= old)
4506
4507 mddev->pers->quiesce(mddev, 2);
4508 else {
4509
4510 mddev->pers->quiesce(mddev, 1);
4511 mddev->pers->quiesce(mddev, 0);
4512 }
4513 return len;
4514}
4515static struct md_sysfs_entry md_suspend_lo =
4516__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4517
4518
4519static ssize_t
4520suspend_hi_show(struct mddev *mddev, char *page)
4521{
4522 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4523}
4524
4525static ssize_t
4526suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4527{
4528 char *e;
4529 unsigned long long new = simple_strtoull(buf, &e, 10);
4530 unsigned long long old = mddev->suspend_hi;
4531
4532 if (mddev->pers == NULL ||
4533 mddev->pers->quiesce == NULL)
4534 return -EINVAL;
4535 if (buf == e || (*e && *e != '\n'))
4536 return -EINVAL;
4537
4538 mddev->suspend_hi = new;
4539 if (new <= old)
4540
4541 mddev->pers->quiesce(mddev, 2);
4542 else {
4543
4544 mddev->pers->quiesce(mddev, 1);
4545 mddev->pers->quiesce(mddev, 0);
4546 }
4547 return len;
4548}
4549static struct md_sysfs_entry md_suspend_hi =
4550__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4551
4552static ssize_t
4553reshape_position_show(struct mddev *mddev, char *page)
4554{
4555 if (mddev->reshape_position != MaxSector)
4556 return sprintf(page, "%llu\n",
4557 (unsigned long long)mddev->reshape_position);
4558 strcpy(page, "none\n");
4559 return 5;
4560}
4561
4562static ssize_t
4563reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4564{
4565 struct md_rdev *rdev;
4566 char *e;
4567 unsigned long long new = simple_strtoull(buf, &e, 10);
4568 if (mddev->pers)
4569 return -EBUSY;
4570 if (buf == e || (*e && *e != '\n'))
4571 return -EINVAL;
4572 mddev->reshape_position = new;
4573 mddev->delta_disks = 0;
4574 mddev->reshape_backwards = 0;
4575 mddev->new_level = mddev->level;
4576 mddev->new_layout = mddev->layout;
4577 mddev->new_chunk_sectors = mddev->chunk_sectors;
4578 rdev_for_each(rdev, mddev)
4579 rdev->new_data_offset = rdev->data_offset;
4580 return len;
4581}
4582
4583static struct md_sysfs_entry md_reshape_position =
4584__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4585 reshape_position_store);
4586
4587static ssize_t
4588reshape_direction_show(struct mddev *mddev, char *page)
4589{
4590 return sprintf(page, "%s\n",
4591 mddev->reshape_backwards ? "backwards" : "forwards");
4592}
4593
4594static ssize_t
4595reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4596{
4597 int backwards = 0;
4598 if (cmd_match(buf, "forwards"))
4599 backwards = 0;
4600 else if (cmd_match(buf, "backwards"))
4601 backwards = 1;
4602 else
4603 return -EINVAL;
4604 if (mddev->reshape_backwards == backwards)
4605 return len;
4606
4607
4608 if (mddev->delta_disks)
4609 return -EBUSY;
4610
4611 if (mddev->persistent &&
4612 mddev->major_version == 0)
4613 return -EINVAL;
4614
4615 mddev->reshape_backwards = backwards;
4616 return len;
4617}
4618
4619static struct md_sysfs_entry md_reshape_direction =
4620__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4621 reshape_direction_store);
4622
4623static ssize_t
4624array_size_show(struct mddev *mddev, char *page)
4625{
4626 if (mddev->external_size)
4627 return sprintf(page, "%llu\n",
4628 (unsigned long long)mddev->array_sectors/2);
4629 else
4630 return sprintf(page, "default\n");
4631}
4632
4633static ssize_t
4634array_size_store(struct mddev *mddev, const char *buf, size_t len)
4635{
4636 sector_t sectors;
4637
4638 if (strncmp(buf, "default", 7) == 0) {
4639 if (mddev->pers)
4640 sectors = mddev->pers->size(mddev, 0, 0);
4641 else
4642 sectors = mddev->array_sectors;
4643
4644 mddev->external_size = 0;
4645 } else {
4646 if (strict_blocks_to_sectors(buf, §ors) < 0)
4647 return -EINVAL;
4648 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4649 return -E2BIG;
4650
4651 mddev->external_size = 1;
4652 }
4653
4654 mddev->array_sectors = sectors;
4655 if (mddev->pers) {
4656 set_capacity(mddev->gendisk, mddev->array_sectors);
4657 revalidate_disk(mddev->gendisk);
4658 }
4659 return len;
4660}
4661
4662static struct md_sysfs_entry md_array_size =
4663__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4664 array_size_store);
4665
4666static struct attribute *md_default_attrs[] = {
4667 &md_level.attr,
4668 &md_layout.attr,
4669 &md_raid_disks.attr,
4670 &md_chunk_size.attr,
4671 &md_size.attr,
4672 &md_resync_start.attr,
4673 &md_metadata.attr,
4674 &md_new_device.attr,
4675 &md_safe_delay.attr,
4676 &md_array_state.attr,
4677 &md_reshape_position.attr,
4678 &md_reshape_direction.attr,
4679 &md_array_size.attr,
4680 &max_corr_read_errors.attr,
4681 NULL,
4682};
4683
4684static struct attribute *md_redundancy_attrs[] = {
4685 &md_scan_mode.attr,
4686 &md_mismatches.attr,
4687 &md_sync_min.attr,
4688 &md_sync_max.attr,
4689 &md_sync_speed.attr,
4690 &md_sync_force_parallel.attr,
4691 &md_sync_completed.attr,
4692 &md_min_sync.attr,
4693 &md_max_sync.attr,
4694 &md_suspend_lo.attr,
4695 &md_suspend_hi.attr,
4696 &md_bitmap.attr,
4697 &md_degraded.attr,
4698 NULL,
4699};
4700static struct attribute_group md_redundancy_group = {
4701 .name = NULL,
4702 .attrs = md_redundancy_attrs,
4703};
4704
4705
4706static ssize_t
4707md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4708{
4709 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4710 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4711 ssize_t rv;
4712
4713 if (!entry->show)
4714 return -EIO;
4715 spin_lock(&all_mddevs_lock);
4716 if (list_empty(&mddev->all_mddevs)) {
4717 spin_unlock(&all_mddevs_lock);
4718 return -EBUSY;
4719 }
4720 mddev_get(mddev);
4721 spin_unlock(&all_mddevs_lock);
4722
4723 rv = mddev_lock(mddev);
4724 if (!rv) {
4725 rv = entry->show(mddev, page);
4726 mddev_unlock(mddev);
4727 }
4728 mddev_put(mddev);
4729 return rv;
4730}
4731
4732static ssize_t
4733md_attr_store(struct kobject *kobj, struct attribute *attr,
4734 const char *page, size_t length)
4735{
4736 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4737 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4738 ssize_t rv;
4739
4740 if (!entry->store)
4741 return -EIO;
4742 if (!capable(CAP_SYS_ADMIN))
4743 return -EACCES;
4744 spin_lock(&all_mddevs_lock);
4745 if (list_empty(&mddev->all_mddevs)) {
4746 spin_unlock(&all_mddevs_lock);
4747 return -EBUSY;
4748 }
4749 mddev_get(mddev);
4750 spin_unlock(&all_mddevs_lock);
4751 rv = mddev_lock(mddev);
4752 if (!rv) {
4753 rv = entry->store(mddev, page, length);
4754 mddev_unlock(mddev);
4755 }
4756 mddev_put(mddev);
4757 return rv;
4758}
4759
4760static void md_free(struct kobject *ko)
4761{
4762 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4763
4764 if (mddev->sysfs_state)
4765 sysfs_put(mddev->sysfs_state);
4766
4767 if (mddev->gendisk) {
4768 del_gendisk(mddev->gendisk);
4769 put_disk(mddev->gendisk);
4770 }
4771 if (mddev->queue)
4772 blk_cleanup_queue(mddev->queue);
4773
4774 kfree(mddev);
4775}
4776
4777static const struct sysfs_ops md_sysfs_ops = {
4778 .show = md_attr_show,
4779 .store = md_attr_store,
4780};
4781static struct kobj_type md_ktype = {
4782 .release = md_free,
4783 .sysfs_ops = &md_sysfs_ops,
4784 .default_attrs = md_default_attrs,
4785};
4786
4787int mdp_major = 0;
4788
4789static void mddev_delayed_delete(struct work_struct *ws)
4790{
4791 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4792
4793 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4794 kobject_del(&mddev->kobj);
4795 kobject_put(&mddev->kobj);
4796}
4797
4798static int md_alloc(dev_t dev, char *name)
4799{
4800 static DEFINE_MUTEX(disks_mutex);
4801 struct mddev *mddev = mddev_find(dev);
4802 struct gendisk *disk;
4803 int partitioned;
4804 int shift;
4805 int unit;
4806 int error;
4807
4808 if (!mddev)
4809 return -ENODEV;
4810
4811 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
4812 shift = partitioned ? MdpMinorShift : 0;
4813 unit = MINOR(mddev->unit) >> shift;
4814
4815
4816
4817
4818 flush_workqueue(md_misc_wq);
4819
4820 mutex_lock(&disks_mutex);
4821 error = -EEXIST;
4822 if (mddev->gendisk)
4823 goto abort;
4824
4825 if (name) {
4826
4827
4828 struct mddev *mddev2;
4829 spin_lock(&all_mddevs_lock);
4830
4831 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
4832 if (mddev2->gendisk &&
4833 strcmp(mddev2->gendisk->disk_name, name) == 0) {
4834 spin_unlock(&all_mddevs_lock);
4835 goto abort;
4836 }
4837 spin_unlock(&all_mddevs_lock);
4838 }
4839
4840 error = -ENOMEM;
4841 mddev->queue = blk_alloc_queue(GFP_KERNEL);
4842 if (!mddev->queue)
4843 goto abort;
4844 mddev->queue->queuedata = mddev;
4845
4846 blk_queue_make_request(mddev->queue, md_make_request);
4847 blk_set_stacking_limits(&mddev->queue->limits);
4848
4849 disk = alloc_disk(1 << shift);
4850 if (!disk) {
4851 blk_cleanup_queue(mddev->queue);
4852 mddev->queue = NULL;
4853 goto abort;
4854 }
4855 disk->major = MAJOR(mddev->unit);
4856 disk->first_minor = unit << shift;
4857 if (name)
4858 strcpy(disk->disk_name, name);
4859 else if (partitioned)
4860 sprintf(disk->disk_name, "md_d%d", unit);
4861 else
4862 sprintf(disk->disk_name, "md%d", unit);
4863 disk->fops = &md_fops;
4864 disk->private_data = mddev;
4865 disk->queue = mddev->queue;
4866 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4867
4868
4869
4870
4871 disk->flags |= GENHD_FL_EXT_DEVT;
4872 mddev->gendisk = disk;
4873
4874
4875
4876 mutex_lock(&mddev->open_mutex);
4877 add_disk(disk);
4878
4879 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
4880 &disk_to_dev(disk)->kobj, "%s", "md");
4881 if (error) {
4882
4883
4884
4885 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
4886 disk->disk_name);
4887 error = 0;
4888 }
4889 if (mddev->kobj.sd &&
4890 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4891 printk(KERN_DEBUG "pointless warning\n");
4892 mutex_unlock(&mddev->open_mutex);
4893 abort:
4894 mutex_unlock(&disks_mutex);
4895 if (!error && mddev->kobj.sd) {
4896 kobject_uevent(&mddev->kobj, KOBJ_ADD);
4897 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4898 }
4899 mddev_put(mddev);
4900 return error;
4901}
4902
4903static struct kobject *md_probe(dev_t dev, int *part, void *data)
4904{
4905 md_alloc(dev, NULL);
4906 return NULL;
4907}
4908
4909static int add_named_array(const char *val, struct kernel_param *kp)
4910{
4911
4912
4913
4914
4915 int len = strlen(val);
4916 char buf[DISK_NAME_LEN];
4917
4918 while (len && val[len-1] == '\n')
4919 len--;
4920 if (len >= DISK_NAME_LEN)
4921 return -E2BIG;
4922 strlcpy(buf, val, len+1);
4923 if (strncmp(buf, "md_", 3) != 0)
4924 return -EINVAL;
4925 return md_alloc(0, buf);
4926}
4927
4928static void md_safemode_timeout(unsigned long data)
4929{
4930 struct mddev *mddev = (struct mddev *) data;
4931
4932 if (!atomic_read(&mddev->writes_pending)) {
4933 mddev->safemode = 1;
4934 if (mddev->external)
4935 sysfs_notify_dirent_safe(mddev->sysfs_state);
4936 }
4937 md_wakeup_thread(mddev->thread);
4938}
4939
4940static int start_dirty_degraded;
4941
4942int md_run(struct mddev *mddev)
4943{
4944 int err;
4945 struct md_rdev *rdev;
4946 struct md_personality *pers;
4947
4948 if (list_empty(&mddev->disks))
4949
4950 return -EINVAL;
4951
4952 if (mddev->pers)
4953 return -EBUSY;
4954
4955 if (mddev->sysfs_active)
4956 return -EBUSY;
4957
4958
4959
4960
4961 if (!mddev->raid_disks) {
4962 if (!mddev->persistent)
4963 return -EINVAL;
4964 analyze_sbs(mddev);
4965 }
4966
4967 if (mddev->level != LEVEL_NONE)
4968 request_module("md-level-%d", mddev->level);
4969 else if (mddev->clevel[0])
4970 request_module("md-%s", mddev->clevel);
4971
4972
4973
4974
4975
4976
4977 rdev_for_each(rdev, mddev) {
4978 if (test_bit(Faulty, &rdev->flags))
4979 continue;
4980 sync_blockdev(rdev->bdev);
4981 invalidate_bdev(rdev->bdev);
4982
4983
4984
4985
4986
4987 if (rdev->meta_bdev) {
4988 ;
4989 } else if (rdev->data_offset < rdev->sb_start) {
4990 if (mddev->dev_sectors &&
4991 rdev->data_offset + mddev->dev_sectors
4992 > rdev->sb_start) {
4993 printk("md: %s: data overlaps metadata\n",
4994 mdname(mddev));
4995 return -EINVAL;
4996 }
4997 } else {
4998 if (rdev->sb_start + rdev->sb_size/512
4999 > rdev->data_offset) {
5000 printk("md: %s: metadata overlaps data\n",
5001 mdname(mddev));
5002 return -EINVAL;
5003 }
5004 }
5005 sysfs_notify_dirent_safe(rdev->sysfs_state);
5006 }
5007
5008 if (mddev->bio_set == NULL)
5009 mddev->bio_set = bioset_create(BIO_POOL_SIZE,
5010 sizeof(struct mddev *));
5011
5012 spin_lock(&pers_lock);
5013 pers = find_pers(mddev->level, mddev->clevel);
5014 if (!pers || !try_module_get(pers->owner)) {
5015 spin_unlock(&pers_lock);
5016 if (mddev->level != LEVEL_NONE)
5017 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
5018 mddev->level);
5019 else
5020 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
5021 mddev->clevel);
5022 return -EINVAL;
5023 }
5024 mddev->pers = pers;
5025 spin_unlock(&pers_lock);
5026 if (mddev->level != pers->level) {
5027 mddev->level = pers->level;
5028 mddev->new_level = pers->level;
5029 }
5030 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5031
5032 if (mddev->reshape_position != MaxSector &&
5033 pers->start_reshape == NULL) {
5034
5035 mddev->pers = NULL;
5036 module_put(pers->owner);
5037 return -EINVAL;
5038 }
5039
5040 if (pers->sync_request) {
5041
5042
5043
5044 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5045 struct md_rdev *rdev2;
5046 int warned = 0;
5047
5048 rdev_for_each(rdev, mddev)
5049 rdev_for_each(rdev2, mddev) {
5050 if (rdev < rdev2 &&
5051 rdev->bdev->bd_contains ==
5052 rdev2->bdev->bd_contains) {
5053 printk(KERN_WARNING
5054 "%s: WARNING: %s appears to be"
5055 " on the same physical disk as"
5056 " %s.\n",
5057 mdname(mddev),
5058 bdevname(rdev->bdev,b),
5059 bdevname(rdev2->bdev,b2));
5060 warned = 1;
5061 }
5062 }
5063
5064 if (warned)
5065 printk(KERN_WARNING
5066 "True protection against single-disk"
5067 " failure might be compromised.\n");
5068 }
5069
5070 mddev->recovery = 0;
5071
5072 mddev->resync_max_sectors = mddev->dev_sectors;
5073
5074 mddev->ok_start_degraded = start_dirty_degraded;
5075
5076 if (start_readonly && mddev->ro == 0)
5077 mddev->ro = 2;
5078
5079 err = mddev->pers->run(mddev);
5080 if (err)
5081 printk(KERN_ERR "md: pers->run() failed ...\n");
5082 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
5083 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
5084 " but 'external_size' not in effect?\n", __func__);
5085 printk(KERN_ERR
5086 "md: invalid array_size %llu > default size %llu\n",
5087 (unsigned long long)mddev->array_sectors / 2,
5088 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
5089 err = -EINVAL;
5090 mddev->pers->stop(mddev);
5091 }
5092 if (err == 0 && mddev->pers->sync_request &&
5093 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5094 err = bitmap_create(mddev);
5095 if (err) {
5096 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
5097 mdname(mddev), err);
5098 mddev->pers->stop(mddev);
5099 }
5100 }
5101 if (err) {
5102 module_put(mddev->pers->owner);
5103 mddev->pers = NULL;
5104 bitmap_destroy(mddev);
5105 return err;
5106 }
5107 if (mddev->pers->sync_request) {
5108 if (mddev->kobj.sd &&
5109 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5110 printk(KERN_WARNING
5111 "md: cannot register extra attributes for %s\n",
5112 mdname(mddev));
5113 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5114 } else if (mddev->ro == 2)
5115 mddev->ro = 0;
5116
5117 atomic_set(&mddev->writes_pending,0);
5118 atomic_set(&mddev->max_corr_read_errors,
5119 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5120 mddev->safemode = 0;
5121 mddev->safemode_timer.function = md_safemode_timeout;
5122 mddev->safemode_timer.data = (unsigned long) mddev;
5123 mddev->safemode_delay = (200 * HZ)/1000 +1;
5124 mddev->in_sync = 1;
5125 smp_wmb();
5126 mddev->ready = 1;
5127 rdev_for_each(rdev, mddev)
5128 if (rdev->raid_disk >= 0)
5129 if (sysfs_link_rdev(mddev, rdev))
5130 ;
5131
5132 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5133
5134 if (mddev->flags)
5135 md_update_sb(mddev, 0);
5136
5137 md_new_event(mddev);
5138 sysfs_notify_dirent_safe(mddev->sysfs_state);
5139 sysfs_notify_dirent_safe(mddev->sysfs_action);
5140 sysfs_notify(&mddev->kobj, NULL, "degraded");
5141 return 0;
5142}
5143EXPORT_SYMBOL_GPL(md_run);
5144
5145static int do_md_run(struct mddev *mddev)
5146{
5147 int err;
5148
5149 err = md_run(mddev);
5150 if (err)
5151 goto out;
5152 err = bitmap_load(mddev);
5153 if (err) {
5154 bitmap_destroy(mddev);
5155 goto out;
5156 }
5157
5158 md_wakeup_thread(mddev->thread);
5159 md_wakeup_thread(mddev->sync_thread);
5160
5161 set_capacity(mddev->gendisk, mddev->array_sectors);
5162 revalidate_disk(mddev->gendisk);
5163 mddev->changed = 1;
5164 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5165out:
5166 return err;
5167}
5168
5169static int restart_array(struct mddev *mddev)
5170{
5171 struct gendisk *disk = mddev->gendisk;
5172
5173
5174 if (list_empty(&mddev->disks))
5175 return -ENXIO;
5176 if (!mddev->pers)
5177 return -EINVAL;
5178 if (!mddev->ro)
5179 return -EBUSY;
5180 mddev->safemode = 0;
5181 mddev->ro = 0;
5182 set_disk_ro(disk, 0);
5183 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5184 mdname(mddev));
5185
5186 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5187 md_wakeup_thread(mddev->thread);
5188 md_wakeup_thread(mddev->sync_thread);
5189 sysfs_notify_dirent_safe(mddev->sysfs_state);
5190 return 0;
5191}
5192
5193
5194
5195static int deny_bitmap_write_access(struct file * file)
5196{
5197 struct inode *inode = file->f_mapping->host;
5198
5199 spin_lock(&inode->i_lock);
5200 if (atomic_read(&inode->i_writecount) > 1) {
5201 spin_unlock(&inode->i_lock);
5202 return -ETXTBSY;
5203 }
5204 atomic_set(&inode->i_writecount, -1);
5205 spin_unlock(&inode->i_lock);
5206
5207 return 0;
5208}
5209
5210void restore_bitmap_write_access(struct file *file)
5211{
5212 struct inode *inode = file->f_mapping->host;
5213
5214 spin_lock(&inode->i_lock);
5215 atomic_set(&inode->i_writecount, 1);
5216 spin_unlock(&inode->i_lock);
5217}
5218
5219static void md_clean(struct mddev *mddev)
5220{
5221 mddev->array_sectors = 0;
5222 mddev->external_size = 0;
5223 mddev->dev_sectors = 0;
5224 mddev->raid_disks = 0;
5225 mddev->recovery_cp = 0;
5226 mddev->resync_min = 0;
5227 mddev->resync_max = MaxSector;
5228 mddev->reshape_position = MaxSector;
5229 mddev->external = 0;
5230 mddev->persistent = 0;
5231 mddev->level = LEVEL_NONE;
5232 mddev->clevel[0] = 0;
5233 mddev->flags = 0;
5234 mddev->ro = 0;
5235 mddev->metadata_type[0] = 0;
5236 mddev->chunk_sectors = 0;
5237 mddev->ctime = mddev->utime = 0;
5238 mddev->layout = 0;
5239 mddev->max_disks = 0;
5240 mddev->events = 0;
5241 mddev->can_decrease_events = 0;
5242 mddev->delta_disks = 0;
5243 mddev->reshape_backwards = 0;
5244