linux/fs/f2fs/recovery.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * fs/f2fs/recovery.c
   4 *
   5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   6 *             http://www.samsung.com/
   7 */
   8#include <asm/unaligned.h>
   9#include <linux/fs.h>
  10#include <linux/f2fs_fs.h>
  11#include "f2fs.h"
  12#include "node.h"
  13#include "segment.h"
  14
  15/*
  16 * Roll forward recovery scenarios.
  17 *
  18 * [Term] F: fsync_mark, D: dentry_mark
  19 *
  20 * 1. inode(x) | CP | inode(x) | dnode(F)
  21 * -> Update the latest inode(x).
  22 *
  23 * 2. inode(x) | CP | inode(F) | dnode(F)
  24 * -> No problem.
  25 *
  26 * 3. inode(x) | CP | dnode(F) | inode(x)
  27 * -> Recover to the latest dnode(F), and drop the last inode(x)
  28 *
  29 * 4. inode(x) | CP | dnode(F) | inode(F)
  30 * -> No problem.
  31 *
  32 * 5. CP | inode(x) | dnode(F)
  33 * -> The inode(DF) was missing. Should drop this dnode(F).
  34 *
  35 * 6. CP | inode(DF) | dnode(F)
  36 * -> No problem.
  37 *
  38 * 7. CP | dnode(F) | inode(DF)
  39 * -> If f2fs_iget fails, then goto next to find inode(DF).
  40 *
  41 * 8. CP | dnode(F) | inode(x)
  42 * -> If f2fs_iget fails, then goto next to find inode(DF).
  43 *    But it will fail due to no inode(DF).
  44 */
  45
  46static struct kmem_cache *fsync_entry_slab;
  47
  48#ifdef CONFIG_UNICODE
  49extern struct kmem_cache *f2fs_cf_name_slab;
  50#endif
  51
  52bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
  53{
  54        s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
  55
  56        if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
  57                return false;
  58        return true;
  59}
  60
  61static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
  62                                                                nid_t ino)
  63{
  64        struct fsync_inode_entry *entry;
  65
  66        list_for_each_entry(entry, head, list)
  67                if (entry->inode->i_ino == ino)
  68                        return entry;
  69
  70        return NULL;
  71}
  72
  73static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
  74                        struct list_head *head, nid_t ino, bool quota_inode)
  75{
  76        struct inode *inode;
  77        struct fsync_inode_entry *entry;
  78        int err;
  79
  80        inode = f2fs_iget_retry(sbi->sb, ino);
  81        if (IS_ERR(inode))
  82                return ERR_CAST(inode);
  83
  84        err = dquot_initialize(inode);
  85        if (err)
  86                goto err_out;
  87
  88        if (quota_inode) {
  89                err = dquot_alloc_inode(inode);
  90                if (err)
  91                        goto err_out;
  92        }
  93
  94        entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
  95        entry->inode = inode;
  96        list_add_tail(&entry->list, head);
  97
  98        return entry;
  99err_out:
 100        iput(inode);
 101        return ERR_PTR(err);
 102}
 103
 104static void del_fsync_inode(struct fsync_inode_entry *entry, int drop)
 105{
 106        if (drop) {
 107                /* inode should not be recovered, drop it */
 108                f2fs_inode_synced(entry->inode);
 109        }
 110        iput(entry->inode);
 111        list_del(&entry->list);
 112        kmem_cache_free(fsync_entry_slab, entry);
 113}
 114
 115static int init_recovered_filename(const struct inode *dir,
 116                                   struct f2fs_inode *raw_inode,
 117                                   struct f2fs_filename *fname,
 118                                   struct qstr *usr_fname)
 119{
 120        int err;
 121
 122        memset(fname, 0, sizeof(*fname));
 123        fname->disk_name.len = le32_to_cpu(raw_inode->i_namelen);
 124        fname->disk_name.name = raw_inode->i_name;
 125
 126        if (WARN_ON(fname->disk_name.len > F2FS_NAME_LEN))
 127                return -ENAMETOOLONG;
 128
 129        if (!IS_ENCRYPTED(dir)) {
 130                usr_fname->name = fname->disk_name.name;
 131                usr_fname->len = fname->disk_name.len;
 132                fname->usr_fname = usr_fname;
 133        }
 134
 135        /* Compute the hash of the filename */
 136        if (IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)) {
 137                /*
 138                 * In this case the hash isn't computable without the key, so it
 139                 * was saved on-disk.
 140                 */
 141                if (fname->disk_name.len + sizeof(f2fs_hash_t) > F2FS_NAME_LEN)
 142                        return -EINVAL;
 143                fname->hash = get_unaligned((f2fs_hash_t *)
 144                                &raw_inode->i_name[fname->disk_name.len]);
 145        } else if (IS_CASEFOLDED(dir)) {
 146                err = f2fs_init_casefolded_name(dir, fname);
 147                if (err)
 148                        return err;
 149                f2fs_hash_filename(dir, fname);
 150#ifdef CONFIG_UNICODE
 151                /* Case-sensitive match is fine for recovery */
 152                kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
 153                fname->cf_name.name = NULL;
 154#endif
 155        } else {
 156                f2fs_hash_filename(dir, fname);
 157        }
 158        return 0;
 159}
 160
 161static int recover_dentry(struct inode *inode, struct page *ipage,
 162                                                struct list_head *dir_list)
 163{
 164        struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
 165        nid_t pino = le32_to_cpu(raw_inode->i_pino);
 166        struct f2fs_dir_entry *de;
 167        struct f2fs_filename fname;
 168        struct qstr usr_fname;
 169        struct page *page;
 170        struct inode *dir, *einode;
 171        struct fsync_inode_entry *entry;
 172        int err = 0;
 173        char *name;
 174
 175        entry = get_fsync_inode(dir_list, pino);
 176        if (!entry) {
 177                entry = add_fsync_inode(F2FS_I_SB(inode), dir_list,
 178                                                        pino, false);
 179                if (IS_ERR(entry)) {
 180                        dir = ERR_CAST(entry);
 181                        err = PTR_ERR(entry);
 182                        goto out;
 183                }
 184        }
 185
 186        dir = entry->inode;
 187        err = init_recovered_filename(dir, raw_inode, &fname, &usr_fname);
 188        if (err)
 189                goto out;
 190retry:
 191        de = __f2fs_find_entry(dir, &fname, &page);
 192        if (de && inode->i_ino == le32_to_cpu(de->ino))
 193                goto out_put;
 194
 195        if (de) {
 196                einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
 197                if (IS_ERR(einode)) {
 198                        WARN_ON(1);
 199                        err = PTR_ERR(einode);
 200                        if (err == -ENOENT)
 201                                err = -EEXIST;
 202                        goto out_put;
 203                }
 204
 205                err = dquot_initialize(einode);
 206                if (err) {
 207                        iput(einode);
 208                        goto out_put;
 209                }
 210
 211                err = f2fs_acquire_orphan_inode(F2FS_I_SB(inode));
 212                if (err) {
 213                        iput(einode);
 214                        goto out_put;
 215                }
 216                f2fs_delete_entry(de, page, dir, einode);
 217                iput(einode);
 218                goto retry;
 219        } else if (IS_ERR(page)) {
 220                err = PTR_ERR(page);
 221        } else {
 222                err = f2fs_add_dentry(dir, &fname, inode,
 223                                        inode->i_ino, inode->i_mode);
 224        }
 225        if (err == -ENOMEM)
 226                goto retry;
 227        goto out;
 228
 229out_put:
 230        f2fs_put_page(page, 0);
 231out:
 232        if (file_enc_name(inode))
 233                name = "<encrypted>";
 234        else
 235                name = raw_inode->i_name;
 236        f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d",
 237                    __func__, ino_of_node(ipage), name,
 238                    IS_ERR(dir) ? 0 : dir->i_ino, err);
 239        return err;
 240}
 241
 242static int recover_quota_data(struct inode *inode, struct page *page)
 243{
 244        struct f2fs_inode *raw = F2FS_INODE(page);
 245        struct iattr attr;
 246        uid_t i_uid = le32_to_cpu(raw->i_uid);
 247        gid_t i_gid = le32_to_cpu(raw->i_gid);
 248        int err;
 249
 250        memset(&attr, 0, sizeof(attr));
 251
 252        attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
 253        attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
 254
 255        if (!uid_eq(attr.ia_uid, inode->i_uid))
 256                attr.ia_valid |= ATTR_UID;
 257        if (!gid_eq(attr.ia_gid, inode->i_gid))
 258                attr.ia_valid |= ATTR_GID;
 259
 260        if (!attr.ia_valid)
 261                return 0;
 262
 263        err = dquot_transfer(inode, &attr);
 264        if (err)
 265                set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
 266        return err;
 267}
 268
 269static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
 270{
 271        if (ri->i_inline & F2FS_PIN_FILE)
 272                set_inode_flag(inode, FI_PIN_FILE);
 273        else
 274                clear_inode_flag(inode, FI_PIN_FILE);
 275        if (ri->i_inline & F2FS_DATA_EXIST)
 276                set_inode_flag(inode, FI_DATA_EXIST);
 277        else
 278                clear_inode_flag(inode, FI_DATA_EXIST);
 279}
 280
 281static int recover_inode(struct inode *inode, struct page *page)
 282{
 283        struct f2fs_inode *raw = F2FS_INODE(page);
 284        char *name;
 285        int err;
 286
 287        inode->i_mode = le16_to_cpu(raw->i_mode);
 288
 289        err = recover_quota_data(inode, page);
 290        if (err)
 291                return err;
 292
 293        i_uid_write(inode, le32_to_cpu(raw->i_uid));
 294        i_gid_write(inode, le32_to_cpu(raw->i_gid));
 295
 296        if (raw->i_inline & F2FS_EXTRA_ATTR) {
 297                if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
 298                        F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize),
 299                                                                i_projid)) {
 300                        projid_t i_projid;
 301                        kprojid_t kprojid;
 302
 303                        i_projid = (projid_t)le32_to_cpu(raw->i_projid);
 304                        kprojid = make_kprojid(&init_user_ns, i_projid);
 305
 306                        if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) {
 307                                err = f2fs_transfer_project_quota(inode,
 308                                                                kprojid);
 309                                if (err)
 310                                        return err;
 311                                F2FS_I(inode)->i_projid = kprojid;
 312                        }
 313                }
 314        }
 315
 316        f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
 317        inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
 318        inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
 319        inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
 320        inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
 321        inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
 322        inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
 323
 324        F2FS_I(inode)->i_advise = raw->i_advise;
 325        F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags);
 326        f2fs_set_inode_flags(inode);
 327        F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] =
 328                                le16_to_cpu(raw->i_gc_failures);
 329
 330        recover_inline_flags(inode, raw);
 331
 332        f2fs_mark_inode_dirty_sync(inode, true);
 333
 334        if (file_enc_name(inode))
 335                name = "<encrypted>";
 336        else
 337                name = F2FS_INODE(page)->i_name;
 338
 339        f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x",
 340                    ino_of_node(page), name, raw->i_inline);
 341        return 0;
 342}
 343
 344static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
 345                                bool check_only)
 346{
 347        struct curseg_info *curseg;
 348        struct page *page = NULL;
 349        block_t blkaddr;
 350        unsigned int loop_cnt = 0;
 351        unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
 352                                                valid_user_blocks(sbi);
 353        int err = 0;
 354
 355        /* get node pages in the current segment */
 356        curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
 357        blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 358
 359        while (1) {
 360                struct fsync_inode_entry *entry;
 361
 362                if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
 363                        return 0;
 364
 365                page = f2fs_get_tmp_page(sbi, blkaddr);
 366                if (IS_ERR(page)) {
 367                        err = PTR_ERR(page);
 368                        break;
 369                }
 370
 371                if (!is_recoverable_dnode(page)) {
 372                        f2fs_put_page(page, 1);
 373                        break;
 374                }
 375
 376                if (!is_fsync_dnode(page))
 377                        goto next;
 378
 379                entry = get_fsync_inode(head, ino_of_node(page));
 380                if (!entry) {
 381                        bool quota_inode = false;
 382
 383                        if (!check_only &&
 384                                        IS_INODE(page) && is_dent_dnode(page)) {
 385                                err = f2fs_recover_inode_page(sbi, page);
 386                                if (err) {
 387                                        f2fs_put_page(page, 1);
 388                                        break;
 389                                }
 390                                quota_inode = true;
 391                        }
 392
 393                        /*
 394                         * CP | dnode(F) | inode(DF)
 395                         * For this case, we should not give up now.
 396                         */
 397                        entry = add_fsync_inode(sbi, head, ino_of_node(page),
 398                                                                quota_inode);
 399                        if (IS_ERR(entry)) {
 400                                err = PTR_ERR(entry);
 401                                if (err == -ENOENT) {
 402                                        err = 0;
 403                                        goto next;
 404                                }
 405                                f2fs_put_page(page, 1);
 406                                break;
 407                        }
 408                }
 409                entry->blkaddr = blkaddr;
 410
 411                if (IS_INODE(page) && is_dent_dnode(page))
 412                        entry->last_dentry = blkaddr;
 413next:
 414                /* sanity check in order to detect looped node chain */
 415                if (++loop_cnt >= free_blocks ||
 416                        blkaddr == next_blkaddr_of_node(page)) {
 417                        f2fs_notice(sbi, "%s: detect looped node chain, blkaddr:%u, next:%u",
 418                                    __func__, blkaddr,
 419                                    next_blkaddr_of_node(page));
 420                        f2fs_put_page(page, 1);
 421                        err = -EINVAL;
 422                        break;
 423                }
 424
 425                /* check next segment */
 426                blkaddr = next_blkaddr_of_node(page);
 427                f2fs_put_page(page, 1);
 428
 429                f2fs_ra_meta_pages_cond(sbi, blkaddr);
 430        }
 431        return err;
 432}
 433
 434static void destroy_fsync_dnodes(struct list_head *head, int drop)
 435{
 436        struct fsync_inode_entry *entry, *tmp;
 437
 438        list_for_each_entry_safe(entry, tmp, head, list)
 439                del_fsync_inode(entry, drop);
 440}
 441
 442static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
 443                        block_t blkaddr, struct dnode_of_data *dn)
 444{
 445        struct seg_entry *sentry;
 446        unsigned int segno = GET_SEGNO(sbi, blkaddr);
 447        unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
 448        struct f2fs_summary_block *sum_node;
 449        struct f2fs_summary sum;
 450        struct page *sum_page, *node_page;
 451        struct dnode_of_data tdn = *dn;
 452        nid_t ino, nid;
 453        struct inode *inode;
 454        unsigned int offset;
 455        block_t bidx;
 456        int i;
 457
 458        sentry = get_seg_entry(sbi, segno);
 459        if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
 460                return 0;
 461
 462        /* Get the previous summary */
 463        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
 464                struct curseg_info *curseg = CURSEG_I(sbi, i);
 465
 466                if (curseg->segno == segno) {
 467                        sum = curseg->sum_blk->entries[blkoff];
 468                        goto got_it;
 469                }
 470        }
 471
 472        sum_page = f2fs_get_sum_page(sbi, segno);
 473        if (IS_ERR(sum_page))
 474                return PTR_ERR(sum_page);
 475        sum_node = (struct f2fs_summary_block *)page_address(sum_page);
 476        sum = sum_node->entries[blkoff];
 477        f2fs_put_page(sum_page, 1);
 478got_it:
 479        /* Use the locked dnode page and inode */
 480        nid = le32_to_cpu(sum.nid);
 481        if (dn->inode->i_ino == nid) {
 482                tdn.nid = nid;
 483                if (!dn->inode_page_locked)
 484                        lock_page(dn->inode_page);
 485                tdn.node_page = dn->inode_page;
 486                tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
 487                goto truncate_out;
 488        } else if (dn->nid == nid) {
 489                tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
 490                goto truncate_out;
 491        }
 492
 493        /* Get the node page */
 494        node_page = f2fs_get_node_page(sbi, nid);
 495        if (IS_ERR(node_page))
 496                return PTR_ERR(node_page);
 497
 498        offset = ofs_of_node(node_page);
 499        ino = ino_of_node(node_page);
 500        f2fs_put_page(node_page, 1);
 501
 502        if (ino != dn->inode->i_ino) {
 503                int ret;
 504
 505                /* Deallocate previous index in the node page */
 506                inode = f2fs_iget_retry(sbi->sb, ino);
 507                if (IS_ERR(inode))
 508                        return PTR_ERR(inode);
 509
 510                ret = dquot_initialize(inode);
 511                if (ret) {
 512                        iput(inode);
 513                        return ret;
 514                }
 515        } else {
 516                inode = dn->inode;
 517        }
 518
 519        bidx = f2fs_start_bidx_of_node(offset, inode) +
 520                                le16_to_cpu(sum.ofs_in_node);
 521
 522        /*
 523         * if inode page is locked, unlock temporarily, but its reference
 524         * count keeps alive.
 525         */
 526        if (ino == dn->inode->i_ino && dn->inode_page_locked)
 527                unlock_page(dn->inode_page);
 528
 529        set_new_dnode(&tdn, inode, NULL, NULL, 0);
 530        if (f2fs_get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
 531                goto out;
 532
 533        if (tdn.data_blkaddr == blkaddr)
 534                f2fs_truncate_data_blocks_range(&tdn, 1);
 535
 536        f2fs_put_dnode(&tdn);
 537out:
 538        if (ino != dn->inode->i_ino)
 539                iput(inode);
 540        else if (dn->inode_page_locked)
 541                lock_page(dn->inode_page);
 542        return 0;
 543
 544truncate_out:
 545        if (f2fs_data_blkaddr(&tdn) == blkaddr)
 546                f2fs_truncate_data_blocks_range(&tdn, 1);
 547        if (dn->inode->i_ino == nid && !dn->inode_page_locked)
 548                unlock_page(dn->inode_page);
 549        return 0;
 550}
 551
 552static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 553                                        struct page *page)
 554{
 555        struct dnode_of_data dn;
 556        struct node_info ni;
 557        unsigned int start, end;
 558        int err = 0, recovered = 0;
 559
 560        /* step 1: recover xattr */
 561        if (IS_INODE(page)) {
 562                err = f2fs_recover_inline_xattr(inode, page);
 563                if (err)
 564                        goto out;
 565        } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
 566                err = f2fs_recover_xattr_data(inode, page);
 567                if (!err)
 568                        recovered++;
 569                goto out;
 570        }
 571
 572        /* step 2: recover inline data */
 573        err = f2fs_recover_inline_data(inode, page);
 574        if (err) {
 575                if (err == 1)
 576                        err = 0;
 577                goto out;
 578        }
 579
 580        /* step 3: recover data indices */
 581        start = f2fs_start_bidx_of_node(ofs_of_node(page), inode);
 582        end = start + ADDRS_PER_PAGE(page, inode);
 583
 584        set_new_dnode(&dn, inode, NULL, NULL, 0);
 585retry_dn:
 586        err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
 587        if (err) {
 588                if (err == -ENOMEM) {
 589                        congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
 590                        goto retry_dn;
 591                }
 592                goto out;
 593        }
 594
 595        f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
 596
 597        err = f2fs_get_node_info(sbi, dn.nid, &ni);
 598        if (err)
 599                goto err;
 600
 601        f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
 602
 603        if (ofs_of_node(dn.node_page) != ofs_of_node(page)) {
 604                f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
 605                          inode->i_ino, ofs_of_node(dn.node_page),
 606                          ofs_of_node(page));
 607                err = -EFSCORRUPTED;
 608                goto err;
 609        }
 610
 611        for (; start < end; start++, dn.ofs_in_node++) {
 612                block_t src, dest;
 613
 614                src = f2fs_data_blkaddr(&dn);
 615                dest = data_blkaddr(dn.inode, page, dn.ofs_in_node);
 616
 617                if (__is_valid_data_blkaddr(src) &&
 618                        !f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
 619                        err = -EFSCORRUPTED;
 620                        goto err;
 621                }
 622
 623                if (__is_valid_data_blkaddr(dest) &&
 624                        !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
 625                        err = -EFSCORRUPTED;
 626                        goto err;
 627                }
 628
 629                /* skip recovering if dest is the same as src */
 630                if (src == dest)
 631                        continue;
 632
 633                /* dest is invalid, just invalidate src block */
 634                if (dest == NULL_ADDR) {
 635                        f2fs_truncate_data_blocks_range(&dn, 1);
 636                        continue;
 637                }
 638
 639                if (!file_keep_isize(inode) &&
 640                        (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
 641                        f2fs_i_size_write(inode,
 642                                (loff_t)(start + 1) << PAGE_SHIFT);
 643
 644                /*
 645                 * dest is reserved block, invalidate src block
 646                 * and then reserve one new block in dnode page.
 647                 */
 648                if (dest == NEW_ADDR) {
 649                        f2fs_truncate_data_blocks_range(&dn, 1);
 650                        f2fs_reserve_new_block(&dn);
 651                        continue;
 652                }
 653
 654                /* dest is valid block, try to recover from src to dest */
 655                if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
 656
 657                        if (src == NULL_ADDR) {
 658                                err = f2fs_reserve_new_block(&dn);
 659                                while (err &&
 660                                       IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION))
 661                                        err = f2fs_reserve_new_block(&dn);
 662                                /* We should not get -ENOSPC */
 663                                f2fs_bug_on(sbi, err);
 664                                if (err)
 665                                        goto err;
 666                        }
 667retry_prev:
 668                        /* Check the previous node page having this index */
 669                        err = check_index_in_prev_nodes(sbi, dest, &dn);
 670                        if (err) {
 671                                if (err == -ENOMEM) {
 672                                        congestion_wait(BLK_RW_ASYNC,
 673                                                        DEFAULT_IO_TIMEOUT);
 674                                        goto retry_prev;
 675                                }
 676                                goto err;
 677                        }
 678
 679                        /* write dummy data page */
 680                        f2fs_replace_block(sbi, &dn, src, dest,
 681                                                ni.version, false, false);
 682                        recovered++;
 683                }
 684        }
 685
 686        copy_node_footer(dn.node_page, page);
 687        fill_node_footer(dn.node_page, dn.nid, ni.ino,
 688                                        ofs_of_node(page), false);
 689        set_page_dirty(dn.node_page);
 690err:
 691        f2fs_put_dnode(&dn);
 692out:
 693        f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
 694                    inode->i_ino, file_keep_isize(inode) ? "keep" : "recover",
 695                    recovered, err);
 696        return err;
 697}
 698
 699static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
 700                struct list_head *tmp_inode_list, struct list_head *dir_list)
 701{
 702        struct curseg_info *curseg;
 703        struct page *page = NULL;
 704        int err = 0;
 705        block_t blkaddr;
 706
 707        /* get node pages in the current segment */
 708        curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
 709        blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 710
 711        while (1) {
 712                struct fsync_inode_entry *entry;
 713
 714                if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
 715                        break;
 716
 717                f2fs_ra_meta_pages_cond(sbi, blkaddr);
 718
 719                page = f2fs_get_tmp_page(sbi, blkaddr);
 720                if (IS_ERR(page)) {
 721                        err = PTR_ERR(page);
 722                        break;
 723                }
 724
 725                if (!is_recoverable_dnode(page)) {
 726                        f2fs_put_page(page, 1);
 727                        break;
 728                }
 729
 730                entry = get_fsync_inode(inode_list, ino_of_node(page));
 731                if (!entry)
 732                        goto next;
 733                /*
 734                 * inode(x) | CP | inode(x) | dnode(F)
 735                 * In this case, we can lose the latest inode(x).
 736                 * So, call recover_inode for the inode update.
 737                 */
 738                if (IS_INODE(page)) {
 739                        err = recover_inode(entry->inode, page);
 740                        if (err) {
 741                                f2fs_put_page(page, 1);
 742                                break;
 743                        }
 744                }
 745                if (entry->last_dentry == blkaddr) {
 746                        err = recover_dentry(entry->inode, page, dir_list);
 747                        if (err) {
 748                                f2fs_put_page(page, 1);
 749                                break;
 750                        }
 751                }
 752                err = do_recover_data(sbi, entry->inode, page);
 753                if (err) {
 754                        f2fs_put_page(page, 1);
 755                        break;
 756                }
 757
 758                if (entry->blkaddr == blkaddr)
 759                        list_move_tail(&entry->list, tmp_inode_list);
 760next:
 761                /* check next segment */
 762                blkaddr = next_blkaddr_of_node(page);
 763                f2fs_put_page(page, 1);
 764        }
 765        if (!err)
 766                f2fs_allocate_new_segments(sbi);
 767        return err;
 768}
 769
 770int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
 771{
 772        struct list_head inode_list, tmp_inode_list;
 773        struct list_head dir_list;
 774        int err;
 775        int ret = 0;
 776        unsigned long s_flags = sbi->sb->s_flags;
 777        bool need_writecp = false;
 778        bool fix_curseg_write_pointer = false;
 779#ifdef CONFIG_QUOTA
 780        int quota_enabled;
 781#endif
 782
 783        if (s_flags & SB_RDONLY) {
 784                f2fs_info(sbi, "recover fsync data on readonly fs");
 785                sbi->sb->s_flags &= ~SB_RDONLY;
 786        }
 787
 788#ifdef CONFIG_QUOTA
 789        /* Needed for iput() to work correctly and not trash data */
 790        sbi->sb->s_flags |= SB_ACTIVE;
 791        /* Turn on quotas so that they are updated correctly */
 792        quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
 793#endif
 794
 795        INIT_LIST_HEAD(&inode_list);
 796        INIT_LIST_HEAD(&tmp_inode_list);
 797        INIT_LIST_HEAD(&dir_list);
 798
 799        /* prevent checkpoint */
 800        down_write(&sbi->cp_global_sem);
 801
 802        /* step #1: find fsynced inode numbers */
 803        err = find_fsync_dnodes(sbi, &inode_list, check_only);
 804        if (err || list_empty(&inode_list))
 805                goto skip;
 806
 807        if (check_only) {
 808                ret = 1;
 809                goto skip;
 810        }
 811
 812        need_writecp = true;
 813
 814        /* step #2: recover data */
 815        err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list);
 816        if (!err)
 817                f2fs_bug_on(sbi, !list_empty(&inode_list));
 818        else {
 819                /* restore s_flags to let iput() trash data */
 820                sbi->sb->s_flags = s_flags;
 821        }
 822skip:
 823        fix_curseg_write_pointer = !check_only || list_empty(&inode_list);
 824
 825        destroy_fsync_dnodes(&inode_list, err);
 826        destroy_fsync_dnodes(&tmp_inode_list, err);
 827
 828        /* truncate meta pages to be used by the recovery */
 829        truncate_inode_pages_range(META_MAPPING(sbi),
 830                        (loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
 831
 832        if (err) {
 833                truncate_inode_pages_final(NODE_MAPPING(sbi));
 834                truncate_inode_pages_final(META_MAPPING(sbi));
 835        }
 836
 837        /*
 838         * If fsync data succeeds or there is no fsync data to recover,
 839         * and the f2fs is not read only, check and fix zoned block devices'
 840         * write pointer consistency.
 841         */
 842        if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) &&
 843                        f2fs_sb_has_blkzoned(sbi)) {
 844                err = f2fs_fix_curseg_write_pointer(sbi);
 845                ret = err;
 846        }
 847
 848        if (!err)
 849                clear_sbi_flag(sbi, SBI_POR_DOING);
 850
 851        up_write(&sbi->cp_global_sem);
 852
 853        /* let's drop all the directory inodes for clean checkpoint */
 854        destroy_fsync_dnodes(&dir_list, err);
 855
 856        if (need_writecp) {
 857                set_sbi_flag(sbi, SBI_IS_RECOVERED);
 858
 859                if (!err) {
 860                        struct cp_control cpc = {
 861                                .reason = CP_RECOVERY,
 862                        };
 863                        err = f2fs_write_checkpoint(sbi, &cpc);
 864                }
 865        }
 866
 867#ifdef CONFIG_QUOTA
 868        /* Turn quotas off */
 869        if (quota_enabled)
 870                f2fs_quota_off_umount(sbi->sb);
 871#endif
 872        sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
 873
 874        return ret ? ret : err;
 875}
 876
 877int __init f2fs_create_recovery_cache(void)
 878{
 879        fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
 880                                        sizeof(struct fsync_inode_entry));
 881        if (!fsync_entry_slab)
 882                return -ENOMEM;
 883        return 0;
 884}
 885
 886void f2fs_destroy_recovery_cache(void)
 887{
 888        kmem_cache_destroy(fsync_entry_slab);
 889}
 890