1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#ifndef __KERNEL__
17#include "jfs_user.h"
18#else
19#include <linux/time.h>
20#include <linux/fs.h>
21#include <linux/jbd2.h>
22#include <linux/errno.h>
23#include <linux/crc32.h>
24#include <linux/blkdev.h>
25#endif
26
27
28
29
30
31struct recovery_info
32{
33 tid_t start_transaction;
34 tid_t end_transaction;
35
36 int nr_replays;
37 int nr_revokes;
38 int nr_revoke_hits;
39};
40
41enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
42static int do_one_pass(journal_t *journal,
43 struct recovery_info *info, enum passtype pass);
44static int scan_revoke_records(journal_t *, struct buffer_head *,
45 tid_t, struct recovery_info *);
46
47#ifdef __KERNEL__
48
49
50static void journal_brelse_array(struct buffer_head *b[], int n)
51{
52 while (--n >= 0)
53 brelse (b[n]);
54}
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69#define MAXBUF 8
70static int do_readahead(journal_t *journal, unsigned int start)
71{
72 int err;
73 unsigned int max, nbufs, next;
74 unsigned long long blocknr;
75 struct buffer_head *bh;
76
77 struct buffer_head * bufs[MAXBUF];
78
79
80 max = start + (128 * 1024 / journal->j_blocksize);
81 if (max > journal->j_maxlen)
82 max = journal->j_maxlen;
83
84
85
86
87 nbufs = 0;
88
89 for (next = start; next < max; next++) {
90 err = jbd2_journal_bmap(journal, next, &blocknr);
91
92 if (err) {
93 printk(KERN_ERR "JBD2: bad block at offset %u\n",
94 next);
95 goto failed;
96 }
97
98 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
99 if (!bh) {
100 err = -ENOMEM;
101 goto failed;
102 }
103
104 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
105 bufs[nbufs++] = bh;
106 if (nbufs == MAXBUF) {
107 ll_rw_block(READ, nbufs, bufs);
108 journal_brelse_array(bufs, nbufs);
109 nbufs = 0;
110 }
111 } else
112 brelse(bh);
113 }
114
115 if (nbufs)
116 ll_rw_block(READ, nbufs, bufs);
117 err = 0;
118
119failed:
120 if (nbufs)
121 journal_brelse_array(bufs, nbufs);
122 return err;
123}
124
125#endif
126
127
128
129
130
131
132static int jread(struct buffer_head **bhp, journal_t *journal,
133 unsigned int offset)
134{
135 int err;
136 unsigned long long blocknr;
137 struct buffer_head *bh;
138
139 *bhp = NULL;
140
141 if (offset >= journal->j_maxlen) {
142 printk(KERN_ERR "JBD2: corrupted journal superblock\n");
143 return -EIO;
144 }
145
146 err = jbd2_journal_bmap(journal, offset, &blocknr);
147
148 if (err) {
149 printk(KERN_ERR "JBD2: bad block at offset %u\n",
150 offset);
151 return err;
152 }
153
154 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
155 if (!bh)
156 return -ENOMEM;
157
158 if (!buffer_uptodate(bh)) {
159
160
161 if (!buffer_req(bh))
162 do_readahead(journal, offset);
163 wait_on_buffer(bh);
164 }
165
166 if (!buffer_uptodate(bh)) {
167 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
168 offset);
169 brelse(bh);
170 return -EIO;
171 }
172
173 *bhp = bh;
174 return 0;
175}
176
177static int jbd2_descr_block_csum_verify(journal_t *j,
178 void *buf)
179{
180 struct jbd2_journal_block_tail *tail;
181 __u32 provided, calculated;
182
183 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
184 return 1;
185
186 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
187 sizeof(struct jbd2_journal_block_tail));
188 provided = tail->t_checksum;
189 tail->t_checksum = 0;
190 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
191 tail->t_checksum = provided;
192
193 provided = be32_to_cpu(provided);
194 return provided == calculated;
195}
196
197
198
199
200
201static int count_tags(journal_t *journal, struct buffer_head *bh)
202{
203 char * tagp;
204 journal_block_tag_t * tag;
205 int nr = 0, size = journal->j_blocksize;
206 int tag_bytes = journal_tag_bytes(journal);
207
208 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
209 size -= sizeof(struct jbd2_journal_block_tail);
210
211 tagp = &bh->b_data[sizeof(journal_header_t)];
212
213 while ((tagp - bh->b_data + tag_bytes) <= size) {
214 tag = (journal_block_tag_t *) tagp;
215
216 nr++;
217 tagp += tag_bytes;
218 if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
219 tagp += 16;
220
221 if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
222 break;
223 }
224
225 return nr;
226}
227
228
229
230#define wrap(journal, var) \
231do { \
232 if (var >= (journal)->j_last) \
233 var -= ((journal)->j_last - (journal)->j_first); \
234} while (0)
235
236
237
238
239
240
241
242
243
244
245
246
247
248int jbd2_journal_recover(journal_t *journal)
249{
250 int err, err2;
251 journal_superblock_t * sb;
252
253 struct recovery_info info;
254
255 memset(&info, 0, sizeof(info));
256 sb = journal->j_superblock;
257
258
259
260
261
262
263
264 if (!sb->s_start) {
265 jbd_debug(1, "No recovery required, last transaction %d\n",
266 be32_to_cpu(sb->s_sequence));
267 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
268 return 0;
269 }
270
271 err = do_one_pass(journal, &info, PASS_SCAN);
272 if (!err)
273 err = do_one_pass(journal, &info, PASS_REVOKE);
274 if (!err)
275 err = do_one_pass(journal, &info, PASS_REPLAY);
276
277 jbd_debug(1, "JBD2: recovery, exit status %d, "
278 "recovered transactions %u to %u\n",
279 err, info.start_transaction, info.end_transaction);
280 jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
281 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
282
283
284
285 journal->j_transaction_sequence = ++info.end_transaction;
286
287 jbd2_journal_clear_revoke(journal);
288 err2 = sync_blockdev(journal->j_fs_dev);
289 if (!err)
290 err = err2;
291
292 if (journal->j_flags & JBD2_BARRIER) {
293 err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
294 if (!err)
295 err = err2;
296 }
297 return err;
298}
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313int jbd2_journal_skip_recovery(journal_t *journal)
314{
315 int err;
316
317 struct recovery_info info;
318
319 memset (&info, 0, sizeof(info));
320
321 err = do_one_pass(journal, &info, PASS_SCAN);
322
323 if (err) {
324 printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
325 ++journal->j_transaction_sequence;
326 } else {
327#ifdef CONFIG_JBD2_DEBUG
328 int dropped = info.end_transaction -
329 be32_to_cpu(journal->j_superblock->s_sequence);
330 jbd_debug(1,
331 "JBD2: ignoring %d transaction%s from the journal.\n",
332 dropped, (dropped == 1) ? "" : "s");
333#endif
334 journal->j_transaction_sequence = ++info.end_transaction;
335 }
336
337 journal->j_tail = 0;
338 return err;
339}
340
341static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
342{
343 unsigned long long block = be32_to_cpu(tag->t_blocknr);
344 if (tag_bytes > JBD2_TAG_SIZE32)
345 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
346 return block;
347}
348
349
350
351
352
353static int calc_chksums(journal_t *journal, struct buffer_head *bh,
354 unsigned long *next_log_block, __u32 *crc32_sum)
355{
356 int i, num_blks, err;
357 unsigned long io_block;
358 struct buffer_head *obh;
359
360 num_blks = count_tags(journal, bh);
361
362 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
363
364 for (i = 0; i < num_blks; i++) {
365 io_block = (*next_log_block)++;
366 wrap(journal, *next_log_block);
367 err = jread(&obh, journal, io_block);
368 if (err) {
369 printk(KERN_ERR "JBD2: IO error %d recovering block "
370 "%lu in log\n", err, io_block);
371 return 1;
372 } else {
373 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
374 obh->b_size);
375 }
376 put_bh(obh);
377 }
378 return 0;
379}
380
381static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
382{
383 struct commit_header *h;
384 __u32 provided, calculated;
385
386 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
387 return 1;
388
389 h = buf;
390 provided = h->h_chksum[0];
391 h->h_chksum[0] = 0;
392 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
393 h->h_chksum[0] = provided;
394
395 provided = be32_to_cpu(provided);
396 return provided == calculated;
397}
398
399static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
400 void *buf, __u32 sequence)
401{
402 __u32 provided, calculated;
403
404 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
405 return 1;
406
407 sequence = cpu_to_be32(sequence);
408 calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence,
409 sizeof(sequence));
410 calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize);
411 provided = be32_to_cpu(tag->t_checksum);
412
413 return provided == cpu_to_be32(calculated);
414}
415
416static int do_one_pass(journal_t *journal,
417 struct recovery_info *info, enum passtype pass)
418{
419 unsigned int first_commit_ID, next_commit_ID;
420 unsigned long next_log_block;
421 int err, success = 0;
422 journal_superblock_t * sb;
423 journal_header_t * tmp;
424 struct buffer_head * bh;
425 unsigned int sequence;
426 int blocktype;
427 int tag_bytes = journal_tag_bytes(journal);
428 __u32 crc32_sum = ~0;
429 int descr_csum_size = 0;
430
431
432
433
434
435
436
437 sb = journal->j_superblock;
438 next_commit_ID = be32_to_cpu(sb->s_sequence);
439 next_log_block = be32_to_cpu(sb->s_start);
440
441 first_commit_ID = next_commit_ID;
442 if (pass == PASS_SCAN)
443 info->start_transaction = first_commit_ID;
444
445 jbd_debug(1, "Starting recovery pass %d\n", pass);
446
447
448
449
450
451
452
453
454 while (1) {
455 int flags;
456 char * tagp;
457 journal_block_tag_t * tag;
458 struct buffer_head * obh;
459 struct buffer_head * nbh;
460
461 cond_resched();
462
463
464
465
466
467 if (pass != PASS_SCAN)
468 if (tid_geq(next_commit_ID, info->end_transaction))
469 break;
470
471 jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
472 next_commit_ID, next_log_block, journal->j_last);
473
474
475
476
477
478 jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
479 err = jread(&bh, journal, next_log_block);
480 if (err)
481 goto failed;
482
483 next_log_block++;
484 wrap(journal, next_log_block);
485
486
487
488
489
490
491
492 tmp = (journal_header_t *)bh->b_data;
493
494 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
495 brelse(bh);
496 break;
497 }
498
499 blocktype = be32_to_cpu(tmp->h_blocktype);
500 sequence = be32_to_cpu(tmp->h_sequence);
501 jbd_debug(3, "Found magic %d, sequence %d\n",
502 blocktype, sequence);
503
504 if (sequence != next_commit_ID) {
505 brelse(bh);
506 break;
507 }
508
509
510
511
512
513 switch(blocktype) {
514 case JBD2_DESCRIPTOR_BLOCK:
515
516 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
517 JBD2_FEATURE_INCOMPAT_CSUM_V2))
518 descr_csum_size =
519 sizeof(struct jbd2_journal_block_tail);
520 if (descr_csum_size > 0 &&
521 !jbd2_descr_block_csum_verify(journal,
522 bh->b_data)) {
523 err = -EIO;
524 goto failed;
525 }
526
527
528
529
530
531 if (pass != PASS_REPLAY) {
532 if (pass == PASS_SCAN &&
533 JBD2_HAS_COMPAT_FEATURE(journal,
534 JBD2_FEATURE_COMPAT_CHECKSUM) &&
535 !info->end_transaction) {
536 if (calc_chksums(journal, bh,
537 &next_log_block,
538 &crc32_sum)) {
539 put_bh(bh);
540 break;
541 }
542 put_bh(bh);
543 continue;
544 }
545 next_log_block += count_tags(journal, bh);
546 wrap(journal, next_log_block);
547 put_bh(bh);
548 continue;
549 }
550
551
552
553
554
555 tagp = &bh->b_data[sizeof(journal_header_t)];
556 while ((tagp - bh->b_data + tag_bytes)
557 <= journal->j_blocksize - descr_csum_size) {
558 unsigned long io_block;
559
560 tag = (journal_block_tag_t *) tagp;
561 flags = be16_to_cpu(tag->t_flags);
562
563 io_block = next_log_block++;
564 wrap(journal, next_log_block);
565 err = jread(&obh, journal, io_block);
566 if (err) {
567
568
569 success = err;
570 printk(KERN_ERR
571 "JBD2: IO error %d recovering "
572 "block %ld in log\n",
573 err, io_block);
574 } else {
575 unsigned long long blocknr;
576
577 J_ASSERT(obh != NULL);
578 blocknr = read_tag_block(tag_bytes,
579 tag);
580
581
582
583
584 if (jbd2_journal_test_revoke
585 (journal, blocknr,
586 next_commit_ID)) {
587 brelse(obh);
588 ++info->nr_revoke_hits;
589 goto skip_write;
590 }
591
592
593 if (!jbd2_block_tag_csum_verify(
594 journal, tag, obh->b_data,
595 be32_to_cpu(tmp->h_sequence))) {
596 brelse(obh);
597 success = -EIO;
598 printk(KERN_ERR "JBD: Invalid "
599 "checksum recovering "
600 "block %llu in log\n",
601 blocknr);
602 continue;
603 }
604
605
606
607 nbh = __getblk(journal->j_fs_dev,
608 blocknr,
609 journal->j_blocksize);
610 if (nbh == NULL) {
611 printk(KERN_ERR
612 "JBD2: Out of memory "
613 "during recovery.\n");
614 err = -ENOMEM;
615 brelse(bh);
616 brelse(obh);
617 goto failed;
618 }
619
620 lock_buffer(nbh);
621 memcpy(nbh->b_data, obh->b_data,
622 journal->j_blocksize);
623 if (flags & JBD2_FLAG_ESCAPE) {
624 *((__be32 *)nbh->b_data) =
625 cpu_to_be32(JBD2_MAGIC_NUMBER);
626 }
627
628 BUFFER_TRACE(nbh, "marking dirty");
629 set_buffer_uptodate(nbh);
630 mark_buffer_dirty(nbh);
631 BUFFER_TRACE(nbh, "marking uptodate");
632 ++info->nr_replays;
633
634 unlock_buffer(nbh);
635 brelse(obh);
636 brelse(nbh);
637 }
638
639 skip_write:
640 tagp += tag_bytes;
641 if (!(flags & JBD2_FLAG_SAME_UUID))
642 tagp += 16;
643
644 if (flags & JBD2_FLAG_LAST_TAG)
645 break;
646 }
647
648 brelse(bh);
649 continue;
650
651 case JBD2_COMMIT_BLOCK:
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687 if (pass == PASS_SCAN &&
688 JBD2_HAS_COMPAT_FEATURE(journal,
689 JBD2_FEATURE_COMPAT_CHECKSUM)) {
690 int chksum_err, chksum_seen;
691 struct commit_header *cbh =
692 (struct commit_header *)bh->b_data;
693 unsigned found_chksum =
694 be32_to_cpu(cbh->h_chksum[0]);
695
696 chksum_err = chksum_seen = 0;
697
698 if (info->end_transaction) {
699 journal->j_failed_commit =
700 info->end_transaction;
701 brelse(bh);
702 break;
703 }
704
705 if (crc32_sum == found_chksum &&
706 cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
707 cbh->h_chksum_size ==
708 JBD2_CRC32_CHKSUM_SIZE)
709 chksum_seen = 1;
710 else if (!(cbh->h_chksum_type == 0 &&
711 cbh->h_chksum_size == 0 &&
712 found_chksum == 0 &&
713 !chksum_seen))
714
715
716
717
718
719
720
721
722
723
724 chksum_err = 1;
725
726 if (chksum_err) {
727 info->end_transaction = next_commit_ID;
728
729 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
730 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
731 journal->j_failed_commit =
732 next_commit_ID;
733 brelse(bh);
734 break;
735 }
736 }
737 crc32_sum = ~0;
738 }
739 if (pass == PASS_SCAN &&
740 !jbd2_commit_block_csum_verify(journal,
741 bh->b_data)) {
742 info->end_transaction = next_commit_ID;
743
744 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
745 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
746 journal->j_failed_commit =
747 next_commit_ID;
748 brelse(bh);
749 break;
750 }
751 }
752 brelse(bh);
753 next_commit_ID++;
754 continue;
755
756 case JBD2_REVOKE_BLOCK:
757
758
759 if (pass != PASS_REVOKE) {
760 brelse(bh);
761 continue;
762 }
763
764 err = scan_revoke_records(journal, bh,
765 next_commit_ID, info);
766 brelse(bh);
767 if (err)
768 goto failed;
769 continue;
770
771 default:
772 jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
773 blocktype);
774 brelse(bh);
775 goto done;
776 }
777 }
778
779 done:
780
781
782
783
784
785
786
787 if (pass == PASS_SCAN) {
788 if (!info->end_transaction)
789 info->end_transaction = next_commit_ID;
790 } else {
791
792
793 if (info->end_transaction != next_commit_ID) {
794 printk(KERN_ERR "JBD2: recovery pass %d ended at "
795 "transaction %u, expected %u\n",
796 pass, next_commit_ID, info->end_transaction);
797 if (!success)
798 success = -EIO;
799 }
800 }
801
802 return success;
803
804 failed:
805 return err;
806}
807
808static int jbd2_revoke_block_csum_verify(journal_t *j,
809 void *buf)
810{
811 struct jbd2_journal_revoke_tail *tail;
812 __u32 provided, calculated;
813
814 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
815 return 1;
816
817 tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
818 sizeof(struct jbd2_journal_revoke_tail));
819 provided = tail->r_checksum;
820 tail->r_checksum = 0;
821 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
822 tail->r_checksum = provided;
823
824 provided = be32_to_cpu(provided);
825 return provided == calculated;
826}
827
828
829
830static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
831 tid_t sequence, struct recovery_info *info)
832{
833 jbd2_journal_revoke_header_t *header;
834 int offset, max;
835 int record_len = 4;
836
837 header = (jbd2_journal_revoke_header_t *) bh->b_data;
838 offset = sizeof(jbd2_journal_revoke_header_t);
839 max = be32_to_cpu(header->r_count);
840
841 if (!jbd2_revoke_block_csum_verify(journal, header))
842 return -EINVAL;
843
844 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
845 record_len = 8;
846
847 while (offset + record_len <= max) {
848 unsigned long long blocknr;
849 int err;
850
851 if (record_len == 4)
852 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
853 else
854 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
855 offset += record_len;
856 err = jbd2_journal_set_revoke(journal, blocknr, sequence);
857 if (err)
858 return err;
859 ++info->nr_revokes;
860 }
861 return 0;
862}
863