1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#ifndef __KERNEL__
17#include "jfs_user.h"
18#else
19#include <linux/time.h>
20#include <linux/fs.h>
21#include <linux/jbd2.h>
22#include <linux/errno.h>
23#include <linux/crc32.h>
24#endif
25
26
27
28
29
30struct recovery_info
31{
32 tid_t start_transaction;
33 tid_t end_transaction;
34
35 int nr_replays;
36 int nr_revokes;
37 int nr_revoke_hits;
38};
39
40enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
41static int do_one_pass(journal_t *journal,
42 struct recovery_info *info, enum passtype pass);
43static int scan_revoke_records(journal_t *, struct buffer_head *,
44 tid_t, struct recovery_info *);
45
46#ifdef __KERNEL__
47
48
49static void journal_brelse_array(struct buffer_head *b[], int n)
50{
51 while (--n >= 0)
52 brelse (b[n]);
53}
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68#define MAXBUF 8
69static int do_readahead(journal_t *journal, unsigned int start)
70{
71 int err;
72 unsigned int max, nbufs, next;
73 unsigned long long blocknr;
74 struct buffer_head *bh;
75
76 struct buffer_head * bufs[MAXBUF];
77
78
79 max = start + (128 * 1024 / journal->j_blocksize);
80 if (max > journal->j_maxlen)
81 max = journal->j_maxlen;
82
83
84
85
86 nbufs = 0;
87
88 for (next = start; next < max; next++) {
89 err = jbd2_journal_bmap(journal, next, &blocknr);
90
91 if (err) {
92 printk (KERN_ERR "JBD: bad block at offset %u\n",
93 next);
94 goto failed;
95 }
96
97 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
98 if (!bh) {
99 err = -ENOMEM;
100 goto failed;
101 }
102
103 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
104 bufs[nbufs++] = bh;
105 if (nbufs == MAXBUF) {
106 ll_rw_block(READ, nbufs, bufs);
107 journal_brelse_array(bufs, nbufs);
108 nbufs = 0;
109 }
110 } else
111 brelse(bh);
112 }
113
114 if (nbufs)
115 ll_rw_block(READ, nbufs, bufs);
116 err = 0;
117
118failed:
119 if (nbufs)
120 journal_brelse_array(bufs, nbufs);
121 return err;
122}
123
124#endif
125
126
127
128
129
130
131static int jread(struct buffer_head **bhp, journal_t *journal,
132 unsigned int offset)
133{
134 int err;
135 unsigned long long blocknr;
136 struct buffer_head *bh;
137
138 *bhp = NULL;
139
140 if (offset >= journal->j_maxlen) {
141 printk(KERN_ERR "JBD: corrupted journal superblock\n");
142 return -EIO;
143 }
144
145 err = jbd2_journal_bmap(journal, offset, &blocknr);
146
147 if (err) {
148 printk (KERN_ERR "JBD: bad block at offset %u\n",
149 offset);
150 return err;
151 }
152
153 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
154 if (!bh)
155 return -ENOMEM;
156
157 if (!buffer_uptodate(bh)) {
158
159
160 if (!buffer_req(bh))
161 do_readahead(journal, offset);
162 wait_on_buffer(bh);
163 }
164
165 if (!buffer_uptodate(bh)) {
166 printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
167 offset);
168 brelse(bh);
169 return -EIO;
170 }
171
172 *bhp = bh;
173 return 0;
174}
175
176
177
178
179
180
181static int count_tags(journal_t *journal, struct buffer_head *bh)
182{
183 char * tagp;
184 journal_block_tag_t * tag;
185 int nr = 0, size = journal->j_blocksize;
186 int tag_bytes = journal_tag_bytes(journal);
187
188 tagp = &bh->b_data[sizeof(journal_header_t)];
189
190 while ((tagp - bh->b_data + tag_bytes) <= size) {
191 tag = (journal_block_tag_t *) tagp;
192
193 nr++;
194 tagp += tag_bytes;
195 if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
196 tagp += 16;
197
198 if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG))
199 break;
200 }
201
202 return nr;
203}
204
205
206
207#define wrap(journal, var) \
208do { \
209 if (var >= (journal)->j_last) \
210 var -= ((journal)->j_last - (journal)->j_first); \
211} while (0)
212
213
214
215
216
217
218
219
220
221
222
223
224
225int jbd2_journal_recover(journal_t *journal)
226{
227 int err, err2;
228 journal_superblock_t * sb;
229
230 struct recovery_info info;
231
232 memset(&info, 0, sizeof(info));
233 sb = journal->j_superblock;
234
235
236
237
238
239
240
241 if (!sb->s_start) {
242 jbd_debug(1, "No recovery required, last transaction %d\n",
243 be32_to_cpu(sb->s_sequence));
244 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
245 return 0;
246 }
247
248 err = do_one_pass(journal, &info, PASS_SCAN);
249 if (!err)
250 err = do_one_pass(journal, &info, PASS_REVOKE);
251 if (!err)
252 err = do_one_pass(journal, &info, PASS_REPLAY);
253
254 jbd_debug(1, "JBD: recovery, exit status %d, "
255 "recovered transactions %u to %u\n",
256 err, info.start_transaction, info.end_transaction);
257 jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
258 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
259
260
261
262 journal->j_transaction_sequence = ++info.end_transaction;
263
264 jbd2_journal_clear_revoke(journal);
265 err2 = sync_blockdev(journal->j_fs_dev);
266 if (!err)
267 err = err2;
268
269 return err;
270}
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285int jbd2_journal_skip_recovery(journal_t *journal)
286{
287 int err;
288 journal_superblock_t * sb;
289
290 struct recovery_info info;
291
292 memset (&info, 0, sizeof(info));
293 sb = journal->j_superblock;
294
295 err = do_one_pass(journal, &info, PASS_SCAN);
296
297 if (err) {
298 printk(KERN_ERR "JBD: error %d scanning journal\n", err);
299 ++journal->j_transaction_sequence;
300 } else {
301#ifdef CONFIG_JBD2_DEBUG
302 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
303#endif
304 jbd_debug(1,
305 "JBD: ignoring %d transaction%s from the journal.\n",
306 dropped, (dropped == 1) ? "" : "s");
307 journal->j_transaction_sequence = ++info.end_transaction;
308 }
309
310 journal->j_tail = 0;
311 return err;
312}
313
314static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
315{
316 unsigned long long block = be32_to_cpu(tag->t_blocknr);
317 if (tag_bytes > JBD2_TAG_SIZE32)
318 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
319 return block;
320}
321
322
323
324
325
326static int calc_chksums(journal_t *journal, struct buffer_head *bh,
327 unsigned long *next_log_block, __u32 *crc32_sum)
328{
329 int i, num_blks, err;
330 unsigned long io_block;
331 struct buffer_head *obh;
332
333 num_blks = count_tags(journal, bh);
334
335 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
336
337 for (i = 0; i < num_blks; i++) {
338 io_block = (*next_log_block)++;
339 wrap(journal, *next_log_block);
340 err = jread(&obh, journal, io_block);
341 if (err) {
342 printk(KERN_ERR "JBD: IO error %d recovering block "
343 "%lu in log\n", err, io_block);
344 return 1;
345 } else {
346 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
347 obh->b_size);
348 }
349 put_bh(obh);
350 }
351 return 0;
352}
353
354static int do_one_pass(journal_t *journal,
355 struct recovery_info *info, enum passtype pass)
356{
357 unsigned int first_commit_ID, next_commit_ID;
358 unsigned long next_log_block;
359 int err, success = 0;
360 journal_superblock_t * sb;
361 journal_header_t * tmp;
362 struct buffer_head * bh;
363 unsigned int sequence;
364 int blocktype;
365 int tag_bytes = journal_tag_bytes(journal);
366 __u32 crc32_sum = ~0;
367
368
369 int MAX_BLOCKS_PER_DESC;
370 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
371 / tag_bytes);
372
373
374
375
376
377
378
379 sb = journal->j_superblock;
380 next_commit_ID = be32_to_cpu(sb->s_sequence);
381 next_log_block = be32_to_cpu(sb->s_start);
382
383 first_commit_ID = next_commit_ID;
384 if (pass == PASS_SCAN)
385 info->start_transaction = first_commit_ID;
386
387 jbd_debug(1, "Starting recovery pass %d\n", pass);
388
389
390
391
392
393
394
395
396 while (1) {
397 int flags;
398 char * tagp;
399 journal_block_tag_t * tag;
400 struct buffer_head * obh;
401 struct buffer_head * nbh;
402
403 cond_resched();
404
405
406
407
408
409 if (pass != PASS_SCAN)
410 if (tid_geq(next_commit_ID, info->end_transaction))
411 break;
412
413 jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
414 next_commit_ID, next_log_block, journal->j_last);
415
416
417
418
419
420 jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
421 err = jread(&bh, journal, next_log_block);
422 if (err)
423 goto failed;
424
425 next_log_block++;
426 wrap(journal, next_log_block);
427
428
429
430
431
432
433
434 tmp = (journal_header_t *)bh->b_data;
435
436 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
437 brelse(bh);
438 break;
439 }
440
441 blocktype = be32_to_cpu(tmp->h_blocktype);
442 sequence = be32_to_cpu(tmp->h_sequence);
443 jbd_debug(3, "Found magic %d, sequence %d\n",
444 blocktype, sequence);
445
446 if (sequence != next_commit_ID) {
447 brelse(bh);
448 break;
449 }
450
451
452
453
454
455 switch(blocktype) {
456 case JBD2_DESCRIPTOR_BLOCK:
457
458
459
460
461 if (pass != PASS_REPLAY) {
462 if (pass == PASS_SCAN &&
463 JBD2_HAS_COMPAT_FEATURE(journal,
464 JBD2_FEATURE_COMPAT_CHECKSUM) &&
465 !info->end_transaction) {
466 if (calc_chksums(journal, bh,
467 &next_log_block,
468 &crc32_sum)) {
469 put_bh(bh);
470 break;
471 }
472 put_bh(bh);
473 continue;
474 }
475 next_log_block += count_tags(journal, bh);
476 wrap(journal, next_log_block);
477 put_bh(bh);
478 continue;
479 }
480
481
482
483
484
485 tagp = &bh->b_data[sizeof(journal_header_t)];
486 while ((tagp - bh->b_data + tag_bytes)
487 <= journal->j_blocksize) {
488 unsigned long io_block;
489
490 tag = (journal_block_tag_t *) tagp;
491 flags = be32_to_cpu(tag->t_flags);
492
493 io_block = next_log_block++;
494 wrap(journal, next_log_block);
495 err = jread(&obh, journal, io_block);
496 if (err) {
497
498
499 success = err;
500 printk (KERN_ERR
501 "JBD: IO error %d recovering "
502 "block %ld in log\n",
503 err, io_block);
504 } else {
505 unsigned long long blocknr;
506
507 J_ASSERT(obh != NULL);
508 blocknr = read_tag_block(tag_bytes,
509 tag);
510
511
512
513
514 if (jbd2_journal_test_revoke
515 (journal, blocknr,
516 next_commit_ID)) {
517 brelse(obh);
518 ++info->nr_revoke_hits;
519 goto skip_write;
520 }
521
522
523
524 nbh = __getblk(journal->j_fs_dev,
525 blocknr,
526 journal->j_blocksize);
527 if (nbh == NULL) {
528 printk(KERN_ERR
529 "JBD: Out of memory "
530 "during recovery.\n");
531 err = -ENOMEM;
532 brelse(bh);
533 brelse(obh);
534 goto failed;
535 }
536
537 lock_buffer(nbh);
538 memcpy(nbh->b_data, obh->b_data,
539 journal->j_blocksize);
540 if (flags & JBD2_FLAG_ESCAPE) {
541 *((__be32 *)nbh->b_data) =
542 cpu_to_be32(JBD2_MAGIC_NUMBER);
543 }
544
545 BUFFER_TRACE(nbh, "marking dirty");
546 set_buffer_uptodate(nbh);
547 mark_buffer_dirty(nbh);
548 BUFFER_TRACE(nbh, "marking uptodate");
549 ++info->nr_replays;
550
551 unlock_buffer(nbh);
552 brelse(obh);
553 brelse(nbh);
554 }
555
556 skip_write:
557 tagp += tag_bytes;
558 if (!(flags & JBD2_FLAG_SAME_UUID))
559 tagp += 16;
560
561 if (flags & JBD2_FLAG_LAST_TAG)
562 break;
563 }
564
565 brelse(bh);
566 continue;
567
568 case JBD2_COMMIT_BLOCK:
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604 if (pass == PASS_SCAN &&
605 JBD2_HAS_COMPAT_FEATURE(journal,
606 JBD2_FEATURE_COMPAT_CHECKSUM)) {
607 int chksum_err, chksum_seen;
608 struct commit_header *cbh =
609 (struct commit_header *)bh->b_data;
610 unsigned found_chksum =
611 be32_to_cpu(cbh->h_chksum[0]);
612
613 chksum_err = chksum_seen = 0;
614
615 if (info->end_transaction) {
616 journal->j_failed_commit =
617 info->end_transaction;
618 brelse(bh);
619 break;
620 }
621
622 if (crc32_sum == found_chksum &&
623 cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
624 cbh->h_chksum_size ==
625 JBD2_CRC32_CHKSUM_SIZE)
626 chksum_seen = 1;
627 else if (!(cbh->h_chksum_type == 0 &&
628 cbh->h_chksum_size == 0 &&
629 found_chksum == 0 &&
630 !chksum_seen))
631
632
633
634
635
636
637
638
639
640
641 chksum_err = 1;
642
643 if (chksum_err) {
644 info->end_transaction = next_commit_ID;
645
646 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
647 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
648 journal->j_failed_commit =
649 next_commit_ID;
650 brelse(bh);
651 break;
652 }
653 }
654 crc32_sum = ~0;
655 }
656 brelse(bh);
657 next_commit_ID++;
658 continue;
659
660 case JBD2_REVOKE_BLOCK:
661
662
663 if (pass != PASS_REVOKE) {
664 brelse(bh);
665 continue;
666 }
667
668 err = scan_revoke_records(journal, bh,
669 next_commit_ID, info);
670 brelse(bh);
671 if (err)
672 goto failed;
673 continue;
674
675 default:
676 jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
677 blocktype);
678 brelse(bh);
679 goto done;
680 }
681 }
682
683 done:
684
685
686
687
688
689
690
691 if (pass == PASS_SCAN) {
692 if (!info->end_transaction)
693 info->end_transaction = next_commit_ID;
694 } else {
695
696
697 if (info->end_transaction != next_commit_ID) {
698 printk (KERN_ERR "JBD: recovery pass %d ended at "
699 "transaction %u, expected %u\n",
700 pass, next_commit_ID, info->end_transaction);
701 if (!success)
702 success = -EIO;
703 }
704 }
705
706 return success;
707
708 failed:
709 return err;
710}
711
712
713
714
715static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
716 tid_t sequence, struct recovery_info *info)
717{
718 jbd2_journal_revoke_header_t *header;
719 int offset, max;
720 int record_len = 4;
721
722 header = (jbd2_journal_revoke_header_t *) bh->b_data;
723 offset = sizeof(jbd2_journal_revoke_header_t);
724 max = be32_to_cpu(header->r_count);
725
726 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
727 record_len = 8;
728
729 while (offset + record_len <= max) {
730 unsigned long long blocknr;
731 int err;
732
733 if (record_len == 4)
734 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
735 else
736 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
737 offset += record_len;
738 err = jbd2_journal_set_revoke(journal, blocknr, sequence);
739 if (err)
740 return err;
741 ++info->nr_revokes;
742 }
743 return 0;
744}
745