1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#ifndef __KERNEL__
17#include "jfs_user.h"
18#else
19#include <linux/time.h>
20#include <linux/fs.h>
21#include <linux/jbd2.h>
22#include <linux/errno.h>
23#include <linux/slab.h>
24#include <linux/crc32.h>
25#endif
26
27
28
29
30
31struct recovery_info
32{
33 tid_t start_transaction;
34 tid_t end_transaction;
35
36 int nr_replays;
37 int nr_revokes;
38 int nr_revoke_hits;
39};
40
41enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
42static int do_one_pass(journal_t *journal,
43 struct recovery_info *info, enum passtype pass);
44static int scan_revoke_records(journal_t *, struct buffer_head *,
45 tid_t, struct recovery_info *);
46
47#ifdef __KERNEL__
48
49
50static void journal_brelse_array(struct buffer_head *b[], int n)
51{
52 while (--n >= 0)
53 brelse (b[n]);
54}
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69#define MAXBUF 8
70static int do_readahead(journal_t *journal, unsigned int start)
71{
72 int err;
73 unsigned int max, nbufs, next;
74 unsigned long long blocknr;
75 struct buffer_head *bh;
76
77 struct buffer_head * bufs[MAXBUF];
78
79
80 max = start + (128 * 1024 / journal->j_blocksize);
81 if (max > journal->j_maxlen)
82 max = journal->j_maxlen;
83
84
85
86
87 nbufs = 0;
88
89 for (next = start; next < max; next++) {
90 err = jbd2_journal_bmap(journal, next, &blocknr);
91
92 if (err) {
93 printk (KERN_ERR "JBD: bad block at offset %u\n",
94 next);
95 goto failed;
96 }
97
98 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
99 if (!bh) {
100 err = -ENOMEM;
101 goto failed;
102 }
103
104 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
105 bufs[nbufs++] = bh;
106 if (nbufs == MAXBUF) {
107 ll_rw_block(READ, nbufs, bufs);
108 journal_brelse_array(bufs, nbufs);
109 nbufs = 0;
110 }
111 } else
112 brelse(bh);
113 }
114
115 if (nbufs)
116 ll_rw_block(READ, nbufs, bufs);
117 err = 0;
118
119failed:
120 if (nbufs)
121 journal_brelse_array(bufs, nbufs);
122 return err;
123}
124
125#endif
126
127
128
129
130
131
132static int jread(struct buffer_head **bhp, journal_t *journal,
133 unsigned int offset)
134{
135 int err;
136 unsigned long long blocknr;
137 struct buffer_head *bh;
138
139 *bhp = NULL;
140
141 if (offset >= journal->j_maxlen) {
142 printk(KERN_ERR "JBD: corrupted journal superblock\n");
143 return -EIO;
144 }
145
146 err = jbd2_journal_bmap(journal, offset, &blocknr);
147
148 if (err) {
149 printk (KERN_ERR "JBD: bad block at offset %u\n",
150 offset);
151 return err;
152 }
153
154 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
155 if (!bh)
156 return -ENOMEM;
157
158 if (!buffer_uptodate(bh)) {
159
160
161 if (!buffer_req(bh))
162 do_readahead(journal, offset);
163 wait_on_buffer(bh);
164 }
165
166 if (!buffer_uptodate(bh)) {
167 printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
168 offset);
169 brelse(bh);
170 return -EIO;
171 }
172
173 *bhp = bh;
174 return 0;
175}
176
177
178
179
180
181
182static int count_tags(journal_t *journal, struct buffer_head *bh)
183{
184 char * tagp;
185 journal_block_tag_t * tag;
186 int nr = 0, size = journal->j_blocksize;
187 int tag_bytes = journal_tag_bytes(journal);
188
189 tagp = &bh->b_data[sizeof(journal_header_t)];
190
191 while ((tagp - bh->b_data + tag_bytes) <= size) {
192 tag = (journal_block_tag_t *) tagp;
193
194 nr++;
195 tagp += tag_bytes;
196 if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
197 tagp += 16;
198
199 if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG))
200 break;
201 }
202
203 return nr;
204}
205
206
207
208#define wrap(journal, var) \
209do { \
210 if (var >= (journal)->j_last) \
211 var -= ((journal)->j_last - (journal)->j_first); \
212} while (0)
213
214
215
216
217
218
219
220
221
222
223
224
225
226int jbd2_journal_recover(journal_t *journal)
227{
228 int err;
229 journal_superblock_t * sb;
230
231 struct recovery_info info;
232
233 memset(&info, 0, sizeof(info));
234 sb = journal->j_superblock;
235
236
237
238
239
240
241
242 if (!sb->s_start) {
243 jbd_debug(1, "No recovery required, last transaction %d\n",
244 be32_to_cpu(sb->s_sequence));
245 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
246 return 0;
247 }
248
249 err = do_one_pass(journal, &info, PASS_SCAN);
250 if (!err)
251 err = do_one_pass(journal, &info, PASS_REVOKE);
252 if (!err)
253 err = do_one_pass(journal, &info, PASS_REPLAY);
254
255 jbd_debug(1, "JBD: recovery, exit status %d, "
256 "recovered transactions %u to %u\n",
257 err, info.start_transaction, info.end_transaction);
258 jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
259 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
260
261
262
263 journal->j_transaction_sequence = ++info.end_transaction;
264
265 jbd2_journal_clear_revoke(journal);
266 sync_blockdev(journal->j_fs_dev);
267 return err;
268}
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283int jbd2_journal_skip_recovery(journal_t *journal)
284{
285 int err;
286 journal_superblock_t * sb;
287
288 struct recovery_info info;
289
290 memset (&info, 0, sizeof(info));
291 sb = journal->j_superblock;
292
293 err = do_one_pass(journal, &info, PASS_SCAN);
294
295 if (err) {
296 printk(KERN_ERR "JBD: error %d scanning journal\n", err);
297 ++journal->j_transaction_sequence;
298 } else {
299#ifdef CONFIG_JBD2_DEBUG
300 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
301#endif
302 jbd_debug(1,
303 "JBD: ignoring %d transaction%s from the journal.\n",
304 dropped, (dropped == 1) ? "" : "s");
305 journal->j_transaction_sequence = ++info.end_transaction;
306 }
307
308 journal->j_tail = 0;
309 return err;
310}
311
312static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
313{
314 unsigned long long block = be32_to_cpu(tag->t_blocknr);
315 if (tag_bytes > JBD2_TAG_SIZE32)
316 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
317 return block;
318}
319
320
321
322
323
324static int calc_chksums(journal_t *journal, struct buffer_head *bh,
325 unsigned long *next_log_block, __u32 *crc32_sum)
326{
327 int i, num_blks, err;
328 unsigned long io_block;
329 struct buffer_head *obh;
330
331 num_blks = count_tags(journal, bh);
332
333 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
334
335 for (i = 0; i < num_blks; i++) {
336 io_block = (*next_log_block)++;
337 wrap(journal, *next_log_block);
338 err = jread(&obh, journal, io_block);
339 if (err) {
340 printk(KERN_ERR "JBD: IO error %d recovering block "
341 "%lu in log\n", err, io_block);
342 return 1;
343 } else {
344 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
345 obh->b_size);
346 }
347 put_bh(obh);
348 }
349 return 0;
350}
351
352static int do_one_pass(journal_t *journal,
353 struct recovery_info *info, enum passtype pass)
354{
355 unsigned int first_commit_ID, next_commit_ID;
356 unsigned long next_log_block;
357 int err, success = 0;
358 journal_superblock_t * sb;
359 journal_header_t * tmp;
360 struct buffer_head * bh;
361 unsigned int sequence;
362 int blocktype;
363 int tag_bytes = journal_tag_bytes(journal);
364 __u32 crc32_sum = ~0;
365
366
367 int MAX_BLOCKS_PER_DESC;
368 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
369 / tag_bytes);
370
371
372
373
374
375
376
377 sb = journal->j_superblock;
378 next_commit_ID = be32_to_cpu(sb->s_sequence);
379 next_log_block = be32_to_cpu(sb->s_start);
380
381 first_commit_ID = next_commit_ID;
382 if (pass == PASS_SCAN)
383 info->start_transaction = first_commit_ID;
384
385 jbd_debug(1, "Starting recovery pass %d\n", pass);
386
387
388
389
390
391
392
393
394 while (1) {
395 int flags;
396 char * tagp;
397 journal_block_tag_t * tag;
398 struct buffer_head * obh;
399 struct buffer_head * nbh;
400
401 cond_resched();
402
403
404
405
406
407 if (pass != PASS_SCAN)
408 if (tid_geq(next_commit_ID, info->end_transaction))
409 break;
410
411 jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
412 next_commit_ID, next_log_block, journal->j_last);
413
414
415
416
417
418 jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
419 err = jread(&bh, journal, next_log_block);
420 if (err)
421 goto failed;
422
423 next_log_block++;
424 wrap(journal, next_log_block);
425
426
427
428
429
430
431
432 tmp = (journal_header_t *)bh->b_data;
433
434 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
435 brelse(bh);
436 break;
437 }
438
439 blocktype = be32_to_cpu(tmp->h_blocktype);
440 sequence = be32_to_cpu(tmp->h_sequence);
441 jbd_debug(3, "Found magic %d, sequence %d\n",
442 blocktype, sequence);
443
444 if (sequence != next_commit_ID) {
445 brelse(bh);
446 break;
447 }
448
449
450
451
452
453 switch(blocktype) {
454 case JBD2_DESCRIPTOR_BLOCK:
455
456
457
458
459 if (pass != PASS_REPLAY) {
460 if (pass == PASS_SCAN &&
461 JBD2_HAS_COMPAT_FEATURE(journal,
462 JBD2_FEATURE_COMPAT_CHECKSUM) &&
463 !info->end_transaction) {
464 if (calc_chksums(journal, bh,
465 &next_log_block,
466 &crc32_sum)) {
467 put_bh(bh);
468 break;
469 }
470 put_bh(bh);
471 continue;
472 }
473 next_log_block += count_tags(journal, bh);
474 wrap(journal, next_log_block);
475 put_bh(bh);
476 continue;
477 }
478
479
480
481
482
483 tagp = &bh->b_data[sizeof(journal_header_t)];
484 while ((tagp - bh->b_data + tag_bytes)
485 <= journal->j_blocksize) {
486 unsigned long io_block;
487
488 tag = (journal_block_tag_t *) tagp;
489 flags = be32_to_cpu(tag->t_flags);
490
491 io_block = next_log_block++;
492 wrap(journal, next_log_block);
493 err = jread(&obh, journal, io_block);
494 if (err) {
495
496
497 success = err;
498 printk (KERN_ERR
499 "JBD: IO error %d recovering "
500 "block %ld in log\n",
501 err, io_block);
502 } else {
503 unsigned long long blocknr;
504
505 J_ASSERT(obh != NULL);
506 blocknr = read_tag_block(tag_bytes,
507 tag);
508
509
510
511
512 if (jbd2_journal_test_revoke
513 (journal, blocknr,
514 next_commit_ID)) {
515 brelse(obh);
516 ++info->nr_revoke_hits;
517 goto skip_write;
518 }
519
520
521
522 nbh = __getblk(journal->j_fs_dev,
523 blocknr,
524 journal->j_blocksize);
525 if (nbh == NULL) {
526 printk(KERN_ERR
527 "JBD: Out of memory "
528 "during recovery.\n");
529 err = -ENOMEM;
530 brelse(bh);
531 brelse(obh);
532 goto failed;
533 }
534
535 lock_buffer(nbh);
536 memcpy(nbh->b_data, obh->b_data,
537 journal->j_blocksize);
538 if (flags & JBD2_FLAG_ESCAPE) {
539 *((__be32 *)nbh->b_data) =
540 cpu_to_be32(JBD2_MAGIC_NUMBER);
541 }
542
543 BUFFER_TRACE(nbh, "marking dirty");
544 set_buffer_uptodate(nbh);
545 mark_buffer_dirty(nbh);
546 BUFFER_TRACE(nbh, "marking uptodate");
547 ++info->nr_replays;
548
549 unlock_buffer(nbh);
550 brelse(obh);
551 brelse(nbh);
552 }
553
554 skip_write:
555 tagp += tag_bytes;
556 if (!(flags & JBD2_FLAG_SAME_UUID))
557 tagp += 16;
558
559 if (flags & JBD2_FLAG_LAST_TAG)
560 break;
561 }
562
563 brelse(bh);
564 continue;
565
566 case JBD2_COMMIT_BLOCK:
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602 if (pass == PASS_SCAN &&
603 JBD2_HAS_COMPAT_FEATURE(journal,
604 JBD2_FEATURE_COMPAT_CHECKSUM)) {
605 int chksum_err, chksum_seen;
606 struct commit_header *cbh =
607 (struct commit_header *)bh->b_data;
608 unsigned found_chksum =
609 be32_to_cpu(cbh->h_chksum[0]);
610
611 chksum_err = chksum_seen = 0;
612
613 if (info->end_transaction) {
614 journal->j_failed_commit =
615 info->end_transaction;
616 brelse(bh);
617 break;
618 }
619
620 if (crc32_sum == found_chksum &&
621 cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
622 cbh->h_chksum_size ==
623 JBD2_CRC32_CHKSUM_SIZE)
624 chksum_seen = 1;
625 else if (!(cbh->h_chksum_type == 0 &&
626 cbh->h_chksum_size == 0 &&
627 found_chksum == 0 &&
628 !chksum_seen))
629
630
631
632
633
634
635
636
637
638
639 chksum_err = 1;
640
641 if (chksum_err) {
642 info->end_transaction = next_commit_ID;
643
644 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
645 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
646 journal->j_failed_commit =
647 next_commit_ID;
648 brelse(bh);
649 break;
650 }
651 }
652 crc32_sum = ~0;
653 }
654 brelse(bh);
655 next_commit_ID++;
656 continue;
657
658 case JBD2_REVOKE_BLOCK:
659
660
661 if (pass != PASS_REVOKE) {
662 brelse(bh);
663 continue;
664 }
665
666 err = scan_revoke_records(journal, bh,
667 next_commit_ID, info);
668 brelse(bh);
669 if (err)
670 goto failed;
671 continue;
672
673 default:
674 jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
675 blocktype);
676 brelse(bh);
677 goto done;
678 }
679 }
680
681 done:
682
683
684
685
686
687
688
689 if (pass == PASS_SCAN) {
690 if (!info->end_transaction)
691 info->end_transaction = next_commit_ID;
692 } else {
693
694
695 if (info->end_transaction != next_commit_ID) {
696 printk (KERN_ERR "JBD: recovery pass %d ended at "
697 "transaction %u, expected %u\n",
698 pass, next_commit_ID, info->end_transaction);
699 if (!success)
700 success = -EIO;
701 }
702 }
703
704 return success;
705
706 failed:
707 return err;
708}
709
710
711
712
713static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
714 tid_t sequence, struct recovery_info *info)
715{
716 jbd2_journal_revoke_header_t *header;
717 int offset, max;
718 int record_len = 4;
719
720 header = (jbd2_journal_revoke_header_t *) bh->b_data;
721 offset = sizeof(jbd2_journal_revoke_header_t);
722 max = be32_to_cpu(header->r_count);
723
724 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
725 record_len = 8;
726
727 while (offset + record_len <= max) {
728 unsigned long long blocknr;
729 int err;
730
731 if (record_len == 4)
732 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
733 else
734 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
735 offset += record_len;
736 err = jbd2_journal_set_revoke(journal, blocknr, sequence);
737 if (err)
738 return err;
739 ++info->nr_revokes;
740 }
741 return 0;
742}
743