linux/fs/xfs/xfs_inode_item_recover.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_mount.h"
  13#include "xfs_inode.h"
  14#include "xfs_trans.h"
  15#include "xfs_inode_item.h"
  16#include "xfs_trace.h"
  17#include "xfs_trans_priv.h"
  18#include "xfs_buf_item.h"
  19#include "xfs_log.h"
  20#include "xfs_error.h"
  21#include "xfs_log_priv.h"
  22#include "xfs_log_recover.h"
  23#include "xfs_icache.h"
  24#include "xfs_bmap_btree.h"
  25
  26STATIC void
  27xlog_recover_inode_ra_pass2(
  28        struct xlog                     *log,
  29        struct xlog_recover_item        *item)
  30{
  31        if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
  32                struct xfs_inode_log_format     *ilfp = item->ri_buf[0].i_addr;
  33
  34                xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
  35                                   &xfs_inode_buf_ra_ops);
  36        } else {
  37                struct xfs_inode_log_format_32  *ilfp = item->ri_buf[0].i_addr;
  38
  39                xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
  40                                   &xfs_inode_buf_ra_ops);
  41        }
  42}
  43
  44/*
  45 * Inode fork owner changes
  46 *
  47 * If we have been told that we have to reparent the inode fork, it's because an
  48 * extent swap operation on a CRC enabled filesystem has been done and we are
  49 * replaying it. We need to walk the BMBT of the appropriate fork and change the
  50 * owners of it.
  51 *
  52 * The complexity here is that we don't have an inode context to work with, so
  53 * after we've replayed the inode we need to instantiate one.  This is where the
  54 * fun begins.
  55 *
  56 * We are in the middle of log recovery, so we can't run transactions. That
  57 * means we cannot use cache coherent inode instantiation via xfs_iget(), as
  58 * that will result in the corresponding iput() running the inode through
  59 * xfs_inactive(). If we've just replayed an inode core that changes the link
  60 * count to zero (i.e. it's been unlinked), then xfs_inactive() will run
  61 * transactions (bad!).
  62 *
  63 * So, to avoid this, we instantiate an inode directly from the inode core we've
  64 * just recovered. We have the buffer still locked, and all we really need to
  65 * instantiate is the inode core and the forks being modified. We can do this
  66 * manually, then run the inode btree owner change, and then tear down the
  67 * xfs_inode without having to run any transactions at all.
  68 *
  69 * Also, because we don't have a transaction context available here but need to
  70 * gather all the buffers we modify for writeback so we pass the buffer_list
  71 * instead for the operation to use.
  72 */
  73
  74STATIC int
  75xfs_recover_inode_owner_change(
  76        struct xfs_mount        *mp,
  77        struct xfs_dinode       *dip,
  78        struct xfs_inode_log_format *in_f,
  79        struct list_head        *buffer_list)
  80{
  81        struct xfs_inode        *ip;
  82        int                     error;
  83
  84        ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
  85
  86        ip = xfs_inode_alloc(mp, in_f->ilf_ino);
  87        if (!ip)
  88                return -ENOMEM;
  89
  90        /* instantiate the inode */
  91        ASSERT(dip->di_version >= 3);
  92
  93        error = xfs_inode_from_disk(ip, dip);
  94        if (error)
  95                goto out_free_ip;
  96
  97        if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
  98                ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
  99                error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
 100                                              ip->i_ino, buffer_list);
 101                if (error)
 102                        goto out_free_ip;
 103        }
 104
 105        if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
 106                ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
 107                error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
 108                                              ip->i_ino, buffer_list);
 109                if (error)
 110                        goto out_free_ip;
 111        }
 112
 113out_free_ip:
 114        xfs_inode_free(ip);
 115        return error;
 116}
 117
 118static inline bool xfs_log_dinode_has_bigtime(const struct xfs_log_dinode *ld)
 119{
 120        return ld->di_version >= 3 &&
 121               (ld->di_flags2 & XFS_DIFLAG2_BIGTIME);
 122}
 123
 124/* Convert a log timestamp to an ondisk timestamp. */
 125static inline xfs_timestamp_t
 126xfs_log_dinode_to_disk_ts(
 127        struct xfs_log_dinode           *from,
 128        const xfs_log_timestamp_t       its)
 129{
 130        struct xfs_legacy_timestamp     *lts;
 131        struct xfs_log_legacy_timestamp *lits;
 132        xfs_timestamp_t                 ts;
 133
 134        if (xfs_log_dinode_has_bigtime(from))
 135                return cpu_to_be64(its);
 136
 137        lts = (struct xfs_legacy_timestamp *)&ts;
 138        lits = (struct xfs_log_legacy_timestamp *)&its;
 139        lts->t_sec = cpu_to_be32(lits->t_sec);
 140        lts->t_nsec = cpu_to_be32(lits->t_nsec);
 141
 142        return ts;
 143}
 144
 145STATIC void
 146xfs_log_dinode_to_disk(
 147        struct xfs_log_dinode   *from,
 148        struct xfs_dinode       *to)
 149{
 150        to->di_magic = cpu_to_be16(from->di_magic);
 151        to->di_mode = cpu_to_be16(from->di_mode);
 152        to->di_version = from->di_version;
 153        to->di_format = from->di_format;
 154        to->di_onlink = 0;
 155        to->di_uid = cpu_to_be32(from->di_uid);
 156        to->di_gid = cpu_to_be32(from->di_gid);
 157        to->di_nlink = cpu_to_be32(from->di_nlink);
 158        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
 159        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
 160        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
 161
 162        to->di_atime = xfs_log_dinode_to_disk_ts(from, from->di_atime);
 163        to->di_mtime = xfs_log_dinode_to_disk_ts(from, from->di_mtime);
 164        to->di_ctime = xfs_log_dinode_to_disk_ts(from, from->di_ctime);
 165
 166        to->di_size = cpu_to_be64(from->di_size);
 167        to->di_nblocks = cpu_to_be64(from->di_nblocks);
 168        to->di_extsize = cpu_to_be32(from->di_extsize);
 169        to->di_nextents = cpu_to_be32(from->di_nextents);
 170        to->di_anextents = cpu_to_be16(from->di_anextents);
 171        to->di_forkoff = from->di_forkoff;
 172        to->di_aformat = from->di_aformat;
 173        to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
 174        to->di_dmstate = cpu_to_be16(from->di_dmstate);
 175        to->di_flags = cpu_to_be16(from->di_flags);
 176        to->di_gen = cpu_to_be32(from->di_gen);
 177
 178        if (from->di_version == 3) {
 179                to->di_changecount = cpu_to_be64(from->di_changecount);
 180                to->di_crtime = xfs_log_dinode_to_disk_ts(from,
 181                                                          from->di_crtime);
 182                to->di_flags2 = cpu_to_be64(from->di_flags2);
 183                to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
 184                to->di_ino = cpu_to_be64(from->di_ino);
 185                to->di_lsn = cpu_to_be64(from->di_lsn);
 186                memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
 187                uuid_copy(&to->di_uuid, &from->di_uuid);
 188                to->di_flushiter = 0;
 189        } else {
 190                to->di_flushiter = cpu_to_be16(from->di_flushiter);
 191        }
 192}
 193
 194STATIC int
 195xlog_recover_inode_commit_pass2(
 196        struct xlog                     *log,
 197        struct list_head                *buffer_list,
 198        struct xlog_recover_item        *item,
 199        xfs_lsn_t                       current_lsn)
 200{
 201        struct xfs_inode_log_format     *in_f;
 202        struct xfs_mount                *mp = log->l_mp;
 203        struct xfs_buf                  *bp;
 204        struct xfs_dinode               *dip;
 205        int                             len;
 206        char                            *src;
 207        char                            *dest;
 208        int                             error;
 209        int                             attr_index;
 210        uint                            fields;
 211        struct xfs_log_dinode           *ldip;
 212        uint                            isize;
 213        int                             need_free = 0;
 214
 215        if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
 216                in_f = item->ri_buf[0].i_addr;
 217        } else {
 218                in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
 219                need_free = 1;
 220                error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
 221                if (error)
 222                        goto error;
 223        }
 224
 225        /*
 226         * Inode buffers can be freed, look out for it,
 227         * and do not replay the inode.
 228         */
 229        if (xlog_is_buffer_cancelled(log, in_f->ilf_blkno, in_f->ilf_len)) {
 230                error = 0;
 231                trace_xfs_log_recover_inode_cancel(log, in_f);
 232                goto error;
 233        }
 234        trace_xfs_log_recover_inode_recover(log, in_f);
 235
 236        error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
 237                        0, &bp, &xfs_inode_buf_ops);
 238        if (error)
 239                goto error;
 240        ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
 241        dip = xfs_buf_offset(bp, in_f->ilf_boffset);
 242
 243        /*
 244         * Make sure the place we're flushing out to really looks
 245         * like an inode!
 246         */
 247        if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) {
 248                xfs_alert(mp,
 249        "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
 250                        __func__, dip, bp, in_f->ilf_ino);
 251                error = -EFSCORRUPTED;
 252                goto out_release;
 253        }
 254        ldip = item->ri_buf[1].i_addr;
 255        if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) {
 256                xfs_alert(mp,
 257                        "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
 258                        __func__, item, in_f->ilf_ino);
 259                error = -EFSCORRUPTED;
 260                goto out_release;
 261        }
 262
 263        /*
 264         * If the inode has an LSN in it, recover the inode only if it's less
 265         * than the lsn of the transaction we are replaying. Note: we still
 266         * need to replay an owner change even though the inode is more recent
 267         * than the transaction as there is no guarantee that all the btree
 268         * blocks are more recent than this transaction, too.
 269         */
 270        if (dip->di_version >= 3) {
 271                xfs_lsn_t       lsn = be64_to_cpu(dip->di_lsn);
 272
 273                if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
 274                        trace_xfs_log_recover_inode_skip(log, in_f);
 275                        error = 0;
 276                        goto out_owner_change;
 277                }
 278        }
 279
 280        /*
 281         * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
 282         * are transactional and if ordering is necessary we can determine that
 283         * more accurately by the LSN field in the V3 inode core. Don't trust
 284         * the inode versions we might be changing them here - use the
 285         * superblock flag to determine whether we need to look at di_flushiter
 286         * to skip replay when the on disk inode is newer than the log one
 287         */
 288        if (!xfs_sb_version_has_v3inode(&mp->m_sb) &&
 289            ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
 290                /*
 291                 * Deal with the wrap case, DI_MAX_FLUSH is less
 292                 * than smaller numbers
 293                 */
 294                if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
 295                    ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
 296                        /* do nothing */
 297                } else {
 298                        trace_xfs_log_recover_inode_skip(log, in_f);
 299                        error = 0;
 300                        goto out_release;
 301                }
 302        }
 303
 304        /* Take the opportunity to reset the flush iteration count */
 305        ldip->di_flushiter = 0;
 306
 307        if (unlikely(S_ISREG(ldip->di_mode))) {
 308                if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
 309                    (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
 310                        XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
 311                                         XFS_ERRLEVEL_LOW, mp, ldip,
 312                                         sizeof(*ldip));
 313                        xfs_alert(mp,
 314                "%s: Bad regular inode log record, rec ptr "PTR_FMT", "
 315                "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
 316                                __func__, item, dip, bp, in_f->ilf_ino);
 317                        error = -EFSCORRUPTED;
 318                        goto out_release;
 319                }
 320        } else if (unlikely(S_ISDIR(ldip->di_mode))) {
 321                if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
 322                    (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
 323                    (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
 324                        XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
 325                                             XFS_ERRLEVEL_LOW, mp, ldip,
 326                                             sizeof(*ldip));
 327                        xfs_alert(mp,
 328                "%s: Bad dir inode log record, rec ptr "PTR_FMT", "
 329                "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
 330                                __func__, item, dip, bp, in_f->ilf_ino);
 331                        error = -EFSCORRUPTED;
 332                        goto out_release;
 333                }
 334        }
 335        if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
 336                XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
 337                                     XFS_ERRLEVEL_LOW, mp, ldip,
 338                                     sizeof(*ldip));
 339                xfs_alert(mp,
 340        "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
 341        "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
 342                        __func__, item, dip, bp, in_f->ilf_ino,
 343                        ldip->di_nextents + ldip->di_anextents,
 344                        ldip->di_nblocks);
 345                error = -EFSCORRUPTED;
 346                goto out_release;
 347        }
 348        if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
 349                XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
 350                                     XFS_ERRLEVEL_LOW, mp, ldip,
 351                                     sizeof(*ldip));
 352                xfs_alert(mp,
 353        "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
 354        "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
 355                        item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
 356                error = -EFSCORRUPTED;
 357                goto out_release;
 358        }
 359        isize = xfs_log_dinode_size(mp);
 360        if (unlikely(item->ri_buf[1].i_len > isize)) {
 361                XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
 362                                     XFS_ERRLEVEL_LOW, mp, ldip,
 363                                     sizeof(*ldip));
 364                xfs_alert(mp,
 365                        "%s: Bad inode log record length %d, rec ptr "PTR_FMT,
 366                        __func__, item->ri_buf[1].i_len, item);
 367                error = -EFSCORRUPTED;
 368                goto out_release;
 369        }
 370
 371        /* recover the log dinode inode into the on disk inode */
 372        xfs_log_dinode_to_disk(ldip, dip);
 373
 374        fields = in_f->ilf_fields;
 375        if (fields & XFS_ILOG_DEV)
 376                xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
 377
 378        if (in_f->ilf_size == 2)
 379                goto out_owner_change;
 380        len = item->ri_buf[2].i_len;
 381        src = item->ri_buf[2].i_addr;
 382        ASSERT(in_f->ilf_size <= 4);
 383        ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
 384        ASSERT(!(fields & XFS_ILOG_DFORK) ||
 385               (len == in_f->ilf_dsize));
 386
 387        switch (fields & XFS_ILOG_DFORK) {
 388        case XFS_ILOG_DDATA:
 389        case XFS_ILOG_DEXT:
 390                memcpy(XFS_DFORK_DPTR(dip), src, len);
 391                break;
 392
 393        case XFS_ILOG_DBROOT:
 394                xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
 395                                 (struct xfs_bmdr_block *)XFS_DFORK_DPTR(dip),
 396                                 XFS_DFORK_DSIZE(dip, mp));
 397                break;
 398
 399        default:
 400                /*
 401                 * There are no data fork flags set.
 402                 */
 403                ASSERT((fields & XFS_ILOG_DFORK) == 0);
 404                break;
 405        }
 406
 407        /*
 408         * If we logged any attribute data, recover it.  There may or
 409         * may not have been any other non-core data logged in this
 410         * transaction.
 411         */
 412        if (in_f->ilf_fields & XFS_ILOG_AFORK) {
 413                if (in_f->ilf_fields & XFS_ILOG_DFORK) {
 414                        attr_index = 3;
 415                } else {
 416                        attr_index = 2;
 417                }
 418                len = item->ri_buf[attr_index].i_len;
 419                src = item->ri_buf[attr_index].i_addr;
 420                ASSERT(len == in_f->ilf_asize);
 421
 422                switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
 423                case XFS_ILOG_ADATA:
 424                case XFS_ILOG_AEXT:
 425                        dest = XFS_DFORK_APTR(dip);
 426                        ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
 427                        memcpy(dest, src, len);
 428                        break;
 429
 430                case XFS_ILOG_ABROOT:
 431                        dest = XFS_DFORK_APTR(dip);
 432                        xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
 433                                         len, (struct xfs_bmdr_block *)dest,
 434                                         XFS_DFORK_ASIZE(dip, mp));
 435                        break;
 436
 437                default:
 438                        xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
 439                        ASSERT(0);
 440                        error = -EFSCORRUPTED;
 441                        goto out_release;
 442                }
 443        }
 444
 445out_owner_change:
 446        /* Recover the swapext owner change unless inode has been deleted */
 447        if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
 448            (dip->di_mode != 0))
 449                error = xfs_recover_inode_owner_change(mp, dip, in_f,
 450                                                       buffer_list);
 451        /* re-generate the checksum. */
 452        xfs_dinode_calc_crc(log->l_mp, dip);
 453
 454        ASSERT(bp->b_mount == mp);
 455        bp->b_flags |= _XBF_LOGRECOVERY;
 456        xfs_buf_delwri_queue(bp, buffer_list);
 457
 458out_release:
 459        xfs_buf_relse(bp);
 460error:
 461        if (need_free)
 462                kmem_free(in_f);
 463        return error;
 464}
 465
 466const struct xlog_recover_item_ops xlog_inode_item_ops = {
 467        .item_type              = XFS_LI_INODE,
 468        .ra_pass2               = xlog_recover_inode_ra_pass2,
 469        .commit_pass2           = xlog_recover_inode_commit_pass2,
 470};
 471