1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#ifndef __KERNEL__
17#include "jfs_user.h"
18#else
19#include <linux/time.h>
20#include <linux/fs.h>
21#include <linux/jbd2.h>
22#include <linux/errno.h>
23#include <linux/crc32.h>
24#endif
25
26
27
28
29
30struct recovery_info
31{
32 tid_t start_transaction;
33 tid_t end_transaction;
34
35 int nr_replays;
36 int nr_revokes;
37 int nr_revoke_hits;
38};
39
40enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
41static int do_one_pass(journal_t *journal,
42 struct recovery_info *info, enum passtype pass);
43static int scan_revoke_records(journal_t *, struct buffer_head *,
44 tid_t, struct recovery_info *);
45
46#ifdef __KERNEL__
47
48
49static void journal_brelse_array(struct buffer_head *b[], int n)
50{
51 while (--n >= 0)
52 brelse (b[n]);
53}
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68#define MAXBUF 8
69static int do_readahead(journal_t *journal, unsigned int start)
70{
71 int err;
72 unsigned int max, nbufs, next;
73 unsigned long long blocknr;
74 struct buffer_head *bh;
75
76 struct buffer_head * bufs[MAXBUF];
77
78
79 max = start + (128 * 1024 / journal->j_blocksize);
80 if (max > journal->j_maxlen)
81 max = journal->j_maxlen;
82
83
84
85
86 nbufs = 0;
87
88 for (next = start; next < max; next++) {
89 err = jbd2_journal_bmap(journal, next, &blocknr);
90
91 if (err) {
92 printk(KERN_ERR "JBD2: bad block at offset %u\n",
93 next);
94 goto failed;
95 }
96
97 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
98 if (!bh) {
99 err = -ENOMEM;
100 goto failed;
101 }
102
103 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
104 bufs[nbufs++] = bh;
105 if (nbufs == MAXBUF) {
106 ll_rw_block(READ, nbufs, bufs);
107 journal_brelse_array(bufs, nbufs);
108 nbufs = 0;
109 }
110 } else
111 brelse(bh);
112 }
113
114 if (nbufs)
115 ll_rw_block(READ, nbufs, bufs);
116 err = 0;
117
118failed:
119 if (nbufs)
120 journal_brelse_array(bufs, nbufs);
121 return err;
122}
123
124#endif
125
126
127
128
129
130
131static int jread(struct buffer_head **bhp, journal_t *journal,
132 unsigned int offset)
133{
134 int err;
135 unsigned long long blocknr;
136 struct buffer_head *bh;
137
138 *bhp = NULL;
139
140 if (offset >= journal->j_maxlen) {
141 printk(KERN_ERR "JBD2: corrupted journal superblock\n");
142 return -EIO;
143 }
144
145 err = jbd2_journal_bmap(journal, offset, &blocknr);
146
147 if (err) {
148 printk(KERN_ERR "JBD2: bad block at offset %u\n",
149 offset);
150 return err;
151 }
152
153 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
154 if (!bh)
155 return -ENOMEM;
156
157 if (!buffer_uptodate(bh)) {
158
159
160 if (!buffer_req(bh))
161 do_readahead(journal, offset);
162 wait_on_buffer(bh);
163 }
164
165 if (!buffer_uptodate(bh)) {
166 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
167 offset);
168 brelse(bh);
169 return -EIO;
170 }
171
172 *bhp = bh;
173 return 0;
174}
175
176
177
178
179
180
181static int count_tags(journal_t *journal, struct buffer_head *bh)
182{
183 char * tagp;
184 journal_block_tag_t * tag;
185 int nr = 0, size = journal->j_blocksize;
186 int tag_bytes = journal_tag_bytes(journal);
187
188 tagp = &bh->b_data[sizeof(journal_header_t)];
189
190 while ((tagp - bh->b_data + tag_bytes) <= size) {
191 tag = (journal_block_tag_t *) tagp;
192
193 nr++;
194 tagp += tag_bytes;
195 if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
196 tagp += 16;
197
198 if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG))
199 break;
200 }
201
202 return nr;
203}
204
205
206
207#define wrap(journal, var) \
208do { \
209 if (var >= (journal)->j_last) \
210 var -= ((journal)->j_last - (journal)->j_first); \
211} while (0)
212
213
214
215
216
217
218
219
220
221
222
223
224
225int jbd2_journal_recover(journal_t *journal)
226{
227 int err, err2;
228 journal_superblock_t * sb;
229
230 struct recovery_info info;
231
232 memset(&info, 0, sizeof(info));
233 sb = journal->j_superblock;
234
235
236
237
238
239
240
241 if (!sb->s_start) {
242 jbd_debug(1, "No recovery required, last transaction %d\n",
243 be32_to_cpu(sb->s_sequence));
244 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
245 return 0;
246 }
247
248 err = do_one_pass(journal, &info, PASS_SCAN);
249 if (!err)
250 err = do_one_pass(journal, &info, PASS_REVOKE);
251 if (!err)
252 err = do_one_pass(journal, &info, PASS_REPLAY);
253
254 jbd_debug(1, "JBD2: recovery, exit status %d, "
255 "recovered transactions %u to %u\n",
256 err, info.start_transaction, info.end_transaction);
257 jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
258 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
259
260
261
262 journal->j_transaction_sequence = ++info.end_transaction;
263
264 jbd2_journal_clear_revoke(journal);
265 err2 = sync_blockdev(journal->j_fs_dev);
266 if (!err)
267 err = err2;
268
269 return err;
270}
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285int jbd2_journal_skip_recovery(journal_t *journal)
286{
287 int err;
288
289 struct recovery_info info;
290
291 memset (&info, 0, sizeof(info));
292
293 err = do_one_pass(journal, &info, PASS_SCAN);
294
295 if (err) {
296 printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
297 ++journal->j_transaction_sequence;
298 } else {
299#ifdef CONFIG_JBD2_DEBUG
300 int dropped = info.end_transaction -
301 be32_to_cpu(journal->j_superblock->s_sequence);
302 jbd_debug(1,
303 "JBD2: ignoring %d transaction%s from the journal.\n",
304 dropped, (dropped == 1) ? "" : "s");
305#endif
306 journal->j_transaction_sequence = ++info.end_transaction;
307 }
308
309 journal->j_tail = 0;
310 return err;
311}
312
313static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
314{
315 unsigned long long block = be32_to_cpu(tag->t_blocknr);
316 if (tag_bytes > JBD2_TAG_SIZE32)
317 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
318 return block;
319}
320
321
322
323
324
325static int calc_chksums(journal_t *journal, struct buffer_head *bh,
326 unsigned long *next_log_block, __u32 *crc32_sum)
327{
328 int i, num_blks, err;
329 unsigned long io_block;
330 struct buffer_head *obh;
331
332 num_blks = count_tags(journal, bh);
333
334 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
335
336 for (i = 0; i < num_blks; i++) {
337 io_block = (*next_log_block)++;
338 wrap(journal, *next_log_block);
339 err = jread(&obh, journal, io_block);
340 if (err) {
341 printk(KERN_ERR "JBD2: IO error %d recovering block "
342 "%lu in log\n", err, io_block);
343 return 1;
344 } else {
345 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
346 obh->b_size);
347 }
348 put_bh(obh);
349 }
350 return 0;
351}
352
353static int do_one_pass(journal_t *journal,
354 struct recovery_info *info, enum passtype pass)
355{
356 unsigned int first_commit_ID, next_commit_ID;
357 unsigned long next_log_block;
358 int err, success = 0;
359 journal_superblock_t * sb;
360 journal_header_t * tmp;
361 struct buffer_head * bh;
362 unsigned int sequence;
363 int blocktype;
364 int tag_bytes = journal_tag_bytes(journal);
365 __u32 crc32_sum = ~0;
366
367
368
369
370
371
372
373 sb = journal->j_superblock;
374 next_commit_ID = be32_to_cpu(sb->s_sequence);
375 next_log_block = be32_to_cpu(sb->s_start);
376
377 first_commit_ID = next_commit_ID;
378 if (pass == PASS_SCAN)
379 info->start_transaction = first_commit_ID;
380
381 jbd_debug(1, "Starting recovery pass %d\n", pass);
382
383
384
385
386
387
388
389
390 while (1) {
391 int flags;
392 char * tagp;
393 journal_block_tag_t * tag;
394 struct buffer_head * obh;
395 struct buffer_head * nbh;
396
397 cond_resched();
398
399
400
401
402
403 if (pass != PASS_SCAN)
404 if (tid_geq(next_commit_ID, info->end_transaction))
405 break;
406
407 jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
408 next_commit_ID, next_log_block, journal->j_last);
409
410
411
412
413
414 jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
415 err = jread(&bh, journal, next_log_block);
416 if (err)
417 goto failed;
418
419 next_log_block++;
420 wrap(journal, next_log_block);
421
422
423
424
425
426
427
428 tmp = (journal_header_t *)bh->b_data;
429
430 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
431 brelse(bh);
432 break;
433 }
434
435 blocktype = be32_to_cpu(tmp->h_blocktype);
436 sequence = be32_to_cpu(tmp->h_sequence);
437 jbd_debug(3, "Found magic %d, sequence %d\n",
438 blocktype, sequence);
439
440 if (sequence != next_commit_ID) {
441 brelse(bh);
442 break;
443 }
444
445
446
447
448
449 switch(blocktype) {
450 case JBD2_DESCRIPTOR_BLOCK:
451
452
453
454
455 if (pass != PASS_REPLAY) {
456 if (pass == PASS_SCAN &&
457 JBD2_HAS_COMPAT_FEATURE(journal,
458 JBD2_FEATURE_COMPAT_CHECKSUM) &&
459 !info->end_transaction) {
460 if (calc_chksums(journal, bh,
461 &next_log_block,
462 &crc32_sum)) {
463 put_bh(bh);
464 break;
465 }
466 put_bh(bh);
467 continue;
468 }
469 next_log_block += count_tags(journal, bh);
470 wrap(journal, next_log_block);
471 put_bh(bh);
472 continue;
473 }
474
475
476
477
478
479 tagp = &bh->b_data[sizeof(journal_header_t)];
480 while ((tagp - bh->b_data + tag_bytes)
481 <= journal->j_blocksize) {
482 unsigned long io_block;
483
484 tag = (journal_block_tag_t *) tagp;
485 flags = be32_to_cpu(tag->t_flags);
486
487 io_block = next_log_block++;
488 wrap(journal, next_log_block);
489 err = jread(&obh, journal, io_block);
490 if (err) {
491
492
493 success = err;
494 printk(KERN_ERR
495 "JBD2: IO error %d recovering "
496 "block %ld in log\n",
497 err, io_block);
498 } else {
499 unsigned long long blocknr;
500
501 J_ASSERT(obh != NULL);
502 blocknr = read_tag_block(tag_bytes,
503 tag);
504
505
506
507
508 if (jbd2_journal_test_revoke
509 (journal, blocknr,
510 next_commit_ID)) {
511 brelse(obh);
512 ++info->nr_revoke_hits;
513 goto skip_write;
514 }
515
516
517
518 nbh = __getblk(journal->j_fs_dev,
519 blocknr,
520 journal->j_blocksize);
521 if (nbh == NULL) {
522 printk(KERN_ERR
523 "JBD2: Out of memory "
524 "during recovery.\n");
525 err = -ENOMEM;
526 brelse(bh);
527 brelse(obh);
528 goto failed;
529 }
530
531 lock_buffer(nbh);
532 memcpy(nbh->b_data, obh->b_data,
533 journal->j_blocksize);
534 if (flags & JBD2_FLAG_ESCAPE) {
535 *((__be32 *)nbh->b_data) =
536 cpu_to_be32(JBD2_MAGIC_NUMBER);
537 }
538
539 BUFFER_TRACE(nbh, "marking dirty");
540 set_buffer_uptodate(nbh);
541 mark_buffer_dirty(nbh);
542 BUFFER_TRACE(nbh, "marking uptodate");
543 ++info->nr_replays;
544
545 unlock_buffer(nbh);
546 brelse(obh);
547 brelse(nbh);
548 }
549
550 skip_write:
551 tagp += tag_bytes;
552 if (!(flags & JBD2_FLAG_SAME_UUID))
553 tagp += 16;
554
555 if (flags & JBD2_FLAG_LAST_TAG)
556 break;
557 }
558
559 brelse(bh);
560 continue;
561
562 case JBD2_COMMIT_BLOCK:
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598 if (pass == PASS_SCAN &&
599 JBD2_HAS_COMPAT_FEATURE(journal,
600 JBD2_FEATURE_COMPAT_CHECKSUM)) {
601 int chksum_err, chksum_seen;
602 struct commit_header *cbh =
603 (struct commit_header *)bh->b_data;
604 unsigned found_chksum =
605 be32_to_cpu(cbh->h_chksum[0]);
606
607 chksum_err = chksum_seen = 0;
608
609 if (info->end_transaction) {
610 journal->j_failed_commit =
611 info->end_transaction;
612 brelse(bh);
613 break;
614 }
615
616 if (crc32_sum == found_chksum &&
617 cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
618 cbh->h_chksum_size ==
619 JBD2_CRC32_CHKSUM_SIZE)
620 chksum_seen = 1;
621 else if (!(cbh->h_chksum_type == 0 &&
622 cbh->h_chksum_size == 0 &&
623 found_chksum == 0 &&
624 !chksum_seen))
625
626
627
628
629
630
631
632
633
634
635 chksum_err = 1;
636
637 if (chksum_err) {
638 info->end_transaction = next_commit_ID;
639
640 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
641 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
642 journal->j_failed_commit =
643 next_commit_ID;
644 brelse(bh);
645 break;
646 }
647 }
648 crc32_sum = ~0;
649 }
650 brelse(bh);
651 next_commit_ID++;
652 continue;
653
654 case JBD2_REVOKE_BLOCK:
655
656
657 if (pass != PASS_REVOKE) {
658 brelse(bh);
659 continue;
660 }
661
662 err = scan_revoke_records(journal, bh,
663 next_commit_ID, info);
664 brelse(bh);
665 if (err)
666 goto failed;
667 continue;
668
669 default:
670 jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
671 blocktype);
672 brelse(bh);
673 goto done;
674 }
675 }
676
677 done:
678
679
680
681
682
683
684
685 if (pass == PASS_SCAN) {
686 if (!info->end_transaction)
687 info->end_transaction = next_commit_ID;
688 } else {
689
690
691 if (info->end_transaction != next_commit_ID) {
692 printk(KERN_ERR "JBD2: recovery pass %d ended at "
693 "transaction %u, expected %u\n",
694 pass, next_commit_ID, info->end_transaction);
695 if (!success)
696 success = -EIO;
697 }
698 }
699
700 return success;
701
702 failed:
703 return err;
704}
705
706
707
708
709static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
710 tid_t sequence, struct recovery_info *info)
711{
712 jbd2_journal_revoke_header_t *header;
713 int offset, max;
714 int record_len = 4;
715
716 header = (jbd2_journal_revoke_header_t *) bh->b_data;
717 offset = sizeof(jbd2_journal_revoke_header_t);
718 max = be32_to_cpu(header->r_count);
719
720 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
721 record_len = 8;
722
723 while (offset + record_len <= max) {
724 unsigned long long blocknr;
725 int err;
726
727 if (record_len == 4)
728 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
729 else
730 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
731 offset += record_len;
732 err = jbd2_journal_set_revoke(journal, blocknr, sequence);
733 if (err)
734 return err;
735 ++info->nr_revokes;
736 }
737 return 0;
738}
739