1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#ifndef __KERNEL__
17#include "jfs_user.h"
18#else
19#include <linux/time.h>
20#include <linux/fs.h>
21#include <linux/jbd2.h>
22#include <linux/errno.h>
23#include <linux/crc32.h>
24#include <linux/blkdev.h>
25#endif
26
27
28
29
30
31struct recovery_info
32{
33 tid_t start_transaction;
34 tid_t end_transaction;
35
36 int nr_replays;
37 int nr_revokes;
38 int nr_revoke_hits;
39};
40
41enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
42static int do_one_pass(journal_t *journal,
43 struct recovery_info *info, enum passtype pass);
44static int scan_revoke_records(journal_t *, struct buffer_head *,
45 tid_t, struct recovery_info *);
46
47#ifdef __KERNEL__
48
49
50static void journal_brelse_array(struct buffer_head *b[], int n)
51{
52 while (--n >= 0)
53 brelse (b[n]);
54}
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69#define MAXBUF 8
70static int do_readahead(journal_t *journal, unsigned int start)
71{
72 int err;
73 unsigned int max, nbufs, next;
74 unsigned long long blocknr;
75 struct buffer_head *bh;
76
77 struct buffer_head * bufs[MAXBUF];
78
79
80 max = start + (128 * 1024 / journal->j_blocksize);
81 if (max > journal->j_maxlen)
82 max = journal->j_maxlen;
83
84
85
86
87 nbufs = 0;
88
89 for (next = start; next < max; next++) {
90 err = jbd2_journal_bmap(journal, next, &blocknr);
91
92 if (err) {
93 printk(KERN_ERR "JBD2: bad block at offset %u\n",
94 next);
95 goto failed;
96 }
97
98 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
99 if (!bh) {
100 err = -ENOMEM;
101 goto failed;
102 }
103
104 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
105 bufs[nbufs++] = bh;
106 if (nbufs == MAXBUF) {
107 ll_rw_block(READ, nbufs, bufs);
108 journal_brelse_array(bufs, nbufs);
109 nbufs = 0;
110 }
111 } else
112 brelse(bh);
113 }
114
115 if (nbufs)
116 ll_rw_block(READ, nbufs, bufs);
117 err = 0;
118
119failed:
120 if (nbufs)
121 journal_brelse_array(bufs, nbufs);
122 return err;
123}
124
125#endif
126
127
128
129
130
131
132static int jread(struct buffer_head **bhp, journal_t *journal,
133 unsigned int offset)
134{
135 int err;
136 unsigned long long blocknr;
137 struct buffer_head *bh;
138
139 *bhp = NULL;
140
141 if (offset >= journal->j_maxlen) {
142 printk(KERN_ERR "JBD2: corrupted journal superblock\n");
143 return -EIO;
144 }
145
146 err = jbd2_journal_bmap(journal, offset, &blocknr);
147
148 if (err) {
149 printk(KERN_ERR "JBD2: bad block at offset %u\n",
150 offset);
151 return err;
152 }
153
154 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
155 if (!bh)
156 return -ENOMEM;
157
158 if (!buffer_uptodate(bh)) {
159
160
161 if (!buffer_req(bh))
162 do_readahead(journal, offset);
163 wait_on_buffer(bh);
164 }
165
166 if (!buffer_uptodate(bh)) {
167 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
168 offset);
169 brelse(bh);
170 return -EIO;
171 }
172
173 *bhp = bh;
174 return 0;
175}
176
177static int jbd2_descr_block_csum_verify(journal_t *j,
178 void *buf)
179{
180 struct jbd2_journal_block_tail *tail;
181 __u32 provided, calculated;
182
183 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
184 return 1;
185
186 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
187 sizeof(struct jbd2_journal_block_tail));
188 provided = tail->t_checksum;
189 tail->t_checksum = 0;
190 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
191 tail->t_checksum = provided;
192
193 provided = be32_to_cpu(provided);
194 return provided == calculated;
195}
196
197
198
199
200
201static int count_tags(journal_t *journal, struct buffer_head *bh)
202{
203 char * tagp;
204 journal_block_tag_t * tag;
205 int nr = 0, size = journal->j_blocksize;
206 int tag_bytes = journal_tag_bytes(journal);
207
208 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
209 size -= sizeof(struct jbd2_journal_block_tail);
210
211 tagp = &bh->b_data[sizeof(journal_header_t)];
212
213 while ((tagp - bh->b_data + tag_bytes) <= size) {
214 tag = (journal_block_tag_t *) tagp;
215
216 nr++;
217 tagp += tag_bytes;
218 if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
219 tagp += 16;
220
221 if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
222 break;
223 }
224
225 return nr;
226}
227
228
229
230#define wrap(journal, var) \
231do { \
232 if (var >= (journal)->j_last) \
233 var -= ((journal)->j_last - (journal)->j_first); \
234} while (0)
235
236
237
238
239
240
241
242
243
244
245
246
247
248int jbd2_journal_recover(journal_t *journal)
249{
250 int err, err2;
251 journal_superblock_t * sb;
252
253 struct recovery_info info;
254
255 memset(&info, 0, sizeof(info));
256 sb = journal->j_superblock;
257
258
259
260
261
262
263
264 if (!sb->s_start) {
265 jbd_debug(1, "No recovery required, last transaction %d\n",
266 be32_to_cpu(sb->s_sequence));
267 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
268 return 0;
269 }
270
271 err = do_one_pass(journal, &info, PASS_SCAN);
272 if (!err)
273 err = do_one_pass(journal, &info, PASS_REVOKE);
274 if (!err)
275 err = do_one_pass(journal, &info, PASS_REPLAY);
276
277 jbd_debug(1, "JBD2: recovery, exit status %d, "
278 "recovered transactions %u to %u\n",
279 err, info.start_transaction, info.end_transaction);
280 jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
281 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
282
283
284
285 journal->j_transaction_sequence = ++info.end_transaction;
286
287 jbd2_journal_clear_revoke(journal);
288 err2 = sync_blockdev(journal->j_fs_dev);
289 if (!err)
290 err = err2;
291
292 if (journal->j_flags & JBD2_BARRIER)
293 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
294 return err;
295}
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310int jbd2_journal_skip_recovery(journal_t *journal)
311{
312 int err;
313
314 struct recovery_info info;
315
316 memset (&info, 0, sizeof(info));
317
318 err = do_one_pass(journal, &info, PASS_SCAN);
319
320 if (err) {
321 printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
322 ++journal->j_transaction_sequence;
323 } else {
324#ifdef CONFIG_JBD2_DEBUG
325 int dropped = info.end_transaction -
326 be32_to_cpu(journal->j_superblock->s_sequence);
327 jbd_debug(1,
328 "JBD2: ignoring %d transaction%s from the journal.\n",
329 dropped, (dropped == 1) ? "" : "s");
330#endif
331 journal->j_transaction_sequence = ++info.end_transaction;
332 }
333
334 journal->j_tail = 0;
335 return err;
336}
337
338static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
339{
340 unsigned long long block = be32_to_cpu(tag->t_blocknr);
341 if (tag_bytes > JBD2_TAG_SIZE32)
342 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
343 return block;
344}
345
346
347
348
349
350static int calc_chksums(journal_t *journal, struct buffer_head *bh,
351 unsigned long *next_log_block, __u32 *crc32_sum)
352{
353 int i, num_blks, err;
354 unsigned long io_block;
355 struct buffer_head *obh;
356
357 num_blks = count_tags(journal, bh);
358
359 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
360
361 for (i = 0; i < num_blks; i++) {
362 io_block = (*next_log_block)++;
363 wrap(journal, *next_log_block);
364 err = jread(&obh, journal, io_block);
365 if (err) {
366 printk(KERN_ERR "JBD2: IO error %d recovering block "
367 "%lu in log\n", err, io_block);
368 return 1;
369 } else {
370 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
371 obh->b_size);
372 }
373 put_bh(obh);
374 }
375 return 0;
376}
377
378static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
379{
380 struct commit_header *h;
381 __u32 provided, calculated;
382
383 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
384 return 1;
385
386 h = buf;
387 provided = h->h_chksum[0];
388 h->h_chksum[0] = 0;
389 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
390 h->h_chksum[0] = provided;
391
392 provided = be32_to_cpu(provided);
393 return provided == calculated;
394}
395
396static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
397 void *buf, __u32 sequence)
398{
399 __u32 provided, calculated;
400
401 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
402 return 1;
403
404 sequence = cpu_to_be32(sequence);
405 calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence,
406 sizeof(sequence));
407 calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize);
408 provided = be32_to_cpu(tag->t_checksum);
409
410 return provided == cpu_to_be32(calculated);
411}
412
413static int do_one_pass(journal_t *journal,
414 struct recovery_info *info, enum passtype pass)
415{
416 unsigned int first_commit_ID, next_commit_ID;
417 unsigned long next_log_block;
418 int err, success = 0;
419 journal_superblock_t * sb;
420 journal_header_t * tmp;
421 struct buffer_head * bh;
422 unsigned int sequence;
423 int blocktype;
424 int tag_bytes = journal_tag_bytes(journal);
425 __u32 crc32_sum = ~0;
426 int descr_csum_size = 0;
427
428
429
430
431
432
433
434 sb = journal->j_superblock;
435 next_commit_ID = be32_to_cpu(sb->s_sequence);
436 next_log_block = be32_to_cpu(sb->s_start);
437
438 first_commit_ID = next_commit_ID;
439 if (pass == PASS_SCAN)
440 info->start_transaction = first_commit_ID;
441
442 jbd_debug(1, "Starting recovery pass %d\n", pass);
443
444
445
446
447
448
449
450
451 while (1) {
452 int flags;
453 char * tagp;
454 journal_block_tag_t * tag;
455 struct buffer_head * obh;
456 struct buffer_head * nbh;
457
458 cond_resched();
459
460
461
462
463
464 if (pass != PASS_SCAN)
465 if (tid_geq(next_commit_ID, info->end_transaction))
466 break;
467
468 jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
469 next_commit_ID, next_log_block, journal->j_last);
470
471
472
473
474
475 jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
476 err = jread(&bh, journal, next_log_block);
477 if (err)
478 goto failed;
479
480 next_log_block++;
481 wrap(journal, next_log_block);
482
483
484
485
486
487
488
489 tmp = (journal_header_t *)bh->b_data;
490
491 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
492 brelse(bh);
493 break;
494 }
495
496 blocktype = be32_to_cpu(tmp->h_blocktype);
497 sequence = be32_to_cpu(tmp->h_sequence);
498 jbd_debug(3, "Found magic %d, sequence %d\n",
499 blocktype, sequence);
500
501 if (sequence != next_commit_ID) {
502 brelse(bh);
503 break;
504 }
505
506
507
508
509
510 switch(blocktype) {
511 case JBD2_DESCRIPTOR_BLOCK:
512
513 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
514 JBD2_FEATURE_INCOMPAT_CSUM_V2))
515 descr_csum_size =
516 sizeof(struct jbd2_journal_block_tail);
517 if (descr_csum_size > 0 &&
518 !jbd2_descr_block_csum_verify(journal,
519 bh->b_data)) {
520 err = -EIO;
521 goto failed;
522 }
523
524
525
526
527
528 if (pass != PASS_REPLAY) {
529 if (pass == PASS_SCAN &&
530 JBD2_HAS_COMPAT_FEATURE(journal,
531 JBD2_FEATURE_COMPAT_CHECKSUM) &&
532 !info->end_transaction) {
533 if (calc_chksums(journal, bh,
534 &next_log_block,
535 &crc32_sum)) {
536 put_bh(bh);
537 break;
538 }
539 put_bh(bh);
540 continue;
541 }
542 next_log_block += count_tags(journal, bh);
543 wrap(journal, next_log_block);
544 put_bh(bh);
545 continue;
546 }
547
548
549
550
551
552 tagp = &bh->b_data[sizeof(journal_header_t)];
553 while ((tagp - bh->b_data + tag_bytes)
554 <= journal->j_blocksize - descr_csum_size) {
555 unsigned long io_block;
556
557 tag = (journal_block_tag_t *) tagp;
558 flags = be16_to_cpu(tag->t_flags);
559
560 io_block = next_log_block++;
561 wrap(journal, next_log_block);
562 err = jread(&obh, journal, io_block);
563 if (err) {
564
565
566 success = err;
567 printk(KERN_ERR
568 "JBD2: IO error %d recovering "
569 "block %ld in log\n",
570 err, io_block);
571 } else {
572 unsigned long long blocknr;
573
574 J_ASSERT(obh != NULL);
575 blocknr = read_tag_block(tag_bytes,
576 tag);
577
578
579
580
581 if (jbd2_journal_test_revoke
582 (journal, blocknr,
583 next_commit_ID)) {
584 brelse(obh);
585 ++info->nr_revoke_hits;
586 goto skip_write;
587 }
588
589
590 if (!jbd2_block_tag_csum_verify(
591 journal, tag, obh->b_data,
592 be32_to_cpu(tmp->h_sequence))) {
593 brelse(obh);
594 success = -EIO;
595 printk(KERN_ERR "JBD: Invalid "
596 "checksum recovering "
597 "block %llu in log\n",
598 blocknr);
599 continue;
600 }
601
602
603
604 nbh = __getblk(journal->j_fs_dev,
605 blocknr,
606 journal->j_blocksize);
607 if (nbh == NULL) {
608 printk(KERN_ERR
609 "JBD2: Out of memory "
610 "during recovery.\n");
611 err = -ENOMEM;
612 brelse(bh);
613 brelse(obh);
614 goto failed;
615 }
616
617 lock_buffer(nbh);
618 memcpy(nbh->b_data, obh->b_data,
619 journal->j_blocksize);
620 if (flags & JBD2_FLAG_ESCAPE) {
621 *((__be32 *)nbh->b_data) =
622 cpu_to_be32(JBD2_MAGIC_NUMBER);
623 }
624
625 BUFFER_TRACE(nbh, "marking dirty");
626 set_buffer_uptodate(nbh);
627 mark_buffer_dirty(nbh);
628 BUFFER_TRACE(nbh, "marking uptodate");
629 ++info->nr_replays;
630
631 unlock_buffer(nbh);
632 brelse(obh);
633 brelse(nbh);
634 }
635
636 skip_write:
637 tagp += tag_bytes;
638 if (!(flags & JBD2_FLAG_SAME_UUID))
639 tagp += 16;
640
641 if (flags & JBD2_FLAG_LAST_TAG)
642 break;
643 }
644
645 brelse(bh);
646 continue;
647
648 case JBD2_COMMIT_BLOCK:
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684 if (pass == PASS_SCAN &&
685 JBD2_HAS_COMPAT_FEATURE(journal,
686 JBD2_FEATURE_COMPAT_CHECKSUM)) {
687 int chksum_err, chksum_seen;
688 struct commit_header *cbh =
689 (struct commit_header *)bh->b_data;
690 unsigned found_chksum =
691 be32_to_cpu(cbh->h_chksum[0]);
692
693 chksum_err = chksum_seen = 0;
694
695 if (info->end_transaction) {
696 journal->j_failed_commit =
697 info->end_transaction;
698 brelse(bh);
699 break;
700 }
701
702 if (crc32_sum == found_chksum &&
703 cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
704 cbh->h_chksum_size ==
705 JBD2_CRC32_CHKSUM_SIZE)
706 chksum_seen = 1;
707 else if (!(cbh->h_chksum_type == 0 &&
708 cbh->h_chksum_size == 0 &&
709 found_chksum == 0 &&
710 !chksum_seen))
711
712
713
714
715
716
717
718
719
720
721 chksum_err = 1;
722
723 if (chksum_err) {
724 info->end_transaction = next_commit_ID;
725
726 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
727 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
728 journal->j_failed_commit =
729 next_commit_ID;
730 brelse(bh);
731 break;
732 }
733 }
734 crc32_sum = ~0;
735 }
736 if (pass == PASS_SCAN &&
737 !jbd2_commit_block_csum_verify(journal,
738 bh->b_data)) {
739 info->end_transaction = next_commit_ID;
740
741 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
742 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
743 journal->j_failed_commit =
744 next_commit_ID;
745 brelse(bh);
746 break;
747 }
748 }
749 brelse(bh);
750 next_commit_ID++;
751 continue;
752
753 case JBD2_REVOKE_BLOCK:
754
755
756 if (pass != PASS_REVOKE) {
757 brelse(bh);
758 continue;
759 }
760
761 err = scan_revoke_records(journal, bh,
762 next_commit_ID, info);
763 brelse(bh);
764 if (err)
765 goto failed;
766 continue;
767
768 default:
769 jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
770 blocktype);
771 brelse(bh);
772 goto done;
773 }
774 }
775
776 done:
777
778
779
780
781
782
783
784 if (pass == PASS_SCAN) {
785 if (!info->end_transaction)
786 info->end_transaction = next_commit_ID;
787 } else {
788
789
790 if (info->end_transaction != next_commit_ID) {
791 printk(KERN_ERR "JBD2: recovery pass %d ended at "
792 "transaction %u, expected %u\n",
793 pass, next_commit_ID, info->end_transaction);
794 if (!success)
795 success = -EIO;
796 }
797 }
798
799 return success;
800
801 failed:
802 return err;
803}
804
805static int jbd2_revoke_block_csum_verify(journal_t *j,
806 void *buf)
807{
808 struct jbd2_journal_revoke_tail *tail;
809 __u32 provided, calculated;
810
811 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
812 return 1;
813
814 tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
815 sizeof(struct jbd2_journal_revoke_tail));
816 provided = tail->r_checksum;
817 tail->r_checksum = 0;
818 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
819 tail->r_checksum = provided;
820
821 provided = be32_to_cpu(provided);
822 return provided == calculated;
823}
824
825
826
827static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
828 tid_t sequence, struct recovery_info *info)
829{
830 jbd2_journal_revoke_header_t *header;
831 int offset, max;
832 int record_len = 4;
833
834 header = (jbd2_journal_revoke_header_t *) bh->b_data;
835 offset = sizeof(jbd2_journal_revoke_header_t);
836 max = be32_to_cpu(header->r_count);
837
838 if (!jbd2_revoke_block_csum_verify(journal, header))
839 return -EINVAL;
840
841 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
842 record_len = 8;
843
844 while (offset + record_len <= max) {
845 unsigned long long blocknr;
846 int err;
847
848 if (record_len == 4)
849 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
850 else
851 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
852 offset += record_len;
853 err = jbd2_journal_set_revoke(journal, blocknr, sequence);
854 if (err)
855 return err;
856 ++info->nr_revokes;
857 }
858 return 0;
859}
860