linux/fs/ubifs/recovery.c
<<
>>
Prefs
   1/*
   2 * This file is part of UBIFS.
   3 *
   4 * Copyright (C) 2006-2008 Nokia Corporation
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published by
   8 * the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 *
  15 * You should have received a copy of the GNU General Public License along with
  16 * this program; if not, write to the Free Software Foundation, Inc., 51
  17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  18 *
  19 * Authors: Adrian Hunter
  20 *          Artem Bityutskiy (Битюцкий Артём)
  21 */
  22
  23/*
  24 * This file implements functions needed to recover from unclean un-mounts.
  25 * When UBIFS is mounted, it checks a flag on the master node to determine if
  26 * an un-mount was completed sucessfully. If not, the process of mounting
  27 * incorparates additional checking and fixing of on-flash data structures.
  28 * UBIFS always cleans away all remnants of an unclean un-mount, so that
  29 * errors do not accumulate. However UBIFS defers recovery if it is mounted
  30 * read-only, and the flash is not modified in that case.
  31 */
  32
  33#include <linux/crc32.h>
  34#include "ubifs.h"
  35
  36/**
  37 * is_empty - determine whether a buffer is empty (contains all 0xff).
  38 * @buf: buffer to clean
  39 * @len: length of buffer
  40 *
  41 * This function returns %1 if the buffer is empty (contains all 0xff) otherwise
  42 * %0 is returned.
  43 */
  44static int is_empty(void *buf, int len)
  45{
  46        uint8_t *p = buf;
  47        int i;
  48
  49        for (i = 0; i < len; i++)
  50                if (*p++ != 0xff)
  51                        return 0;
  52        return 1;
  53}
  54
  55/**
  56 * get_master_node - get the last valid master node allowing for corruption.
  57 * @c: UBIFS file-system description object
  58 * @lnum: LEB number
  59 * @pbuf: buffer containing the LEB read, is returned here
  60 * @mst: master node, if found, is returned here
  61 * @cor: corruption, if found, is returned here
  62 *
  63 * This function allocates a buffer, reads the LEB into it, and finds and
  64 * returns the last valid master node allowing for one area of corruption.
  65 * The corrupt area, if there is one, must be consistent with the assumption
  66 * that it is the result of an unclean unmount while the master node was being
  67 * written. Under those circumstances, it is valid to use the previously written
  68 * master node.
  69 *
  70 * This function returns %0 on success and a negative error code on failure.
  71 */
  72static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
  73                           struct ubifs_mst_node **mst, void **cor)
  74{
  75        const int sz = c->mst_node_alsz;
  76        int err, offs, len;
  77        void *sbuf, *buf;
  78
  79        sbuf = vmalloc(c->leb_size);
  80        if (!sbuf)
  81                return -ENOMEM;
  82
  83        err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size);
  84        if (err && err != -EBADMSG)
  85                goto out_free;
  86
  87        /* Find the first position that is definitely not a node */
  88        offs = 0;
  89        buf = sbuf;
  90        len = c->leb_size;
  91        while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) {
  92                struct ubifs_ch *ch = buf;
  93
  94                if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
  95                        break;
  96                offs += sz;
  97                buf  += sz;
  98                len  -= sz;
  99        }
 100        /* See if there was a valid master node before that */
 101        if (offs) {
 102                int ret;
 103
 104                offs -= sz;
 105                buf  -= sz;
 106                len  += sz;
 107                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 108                if (ret != SCANNED_A_NODE && offs) {
 109                        /* Could have been corruption so check one place back */
 110                        offs -= sz;
 111                        buf  -= sz;
 112                        len  += sz;
 113                        ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 114                        if (ret != SCANNED_A_NODE)
 115                                /*
 116                                 * We accept only one area of corruption because
 117                                 * we are assuming that it was caused while
 118                                 * trying to write a master node.
 119                                 */
 120                                goto out_err;
 121                }
 122                if (ret == SCANNED_A_NODE) {
 123                        struct ubifs_ch *ch = buf;
 124
 125                        if (ch->node_type != UBIFS_MST_NODE)
 126                                goto out_err;
 127                        dbg_rcvry("found a master node at %d:%d", lnum, offs);
 128                        *mst = buf;
 129                        offs += sz;
 130                        buf  += sz;
 131                        len  -= sz;
 132                }
 133        }
 134        /* Check for corruption */
 135        if (offs < c->leb_size) {
 136                if (!is_empty(buf, min_t(int, len, sz))) {
 137                        *cor = buf;
 138                        dbg_rcvry("found corruption at %d:%d", lnum, offs);
 139                }
 140                offs += sz;
 141                buf  += sz;
 142                len  -= sz;
 143        }
 144        /* Check remaining empty space */
 145        if (offs < c->leb_size)
 146                if (!is_empty(buf, len))
 147                        goto out_err;
 148        *pbuf = sbuf;
 149        return 0;
 150
 151out_err:
 152        err = -EINVAL;
 153out_free:
 154        vfree(sbuf);
 155        *mst = NULL;
 156        *cor = NULL;
 157        return err;
 158}
 159
 160/**
 161 * write_rcvrd_mst_node - write recovered master node.
 162 * @c: UBIFS file-system description object
 163 * @mst: master node
 164 *
 165 * This function returns %0 on success and a negative error code on failure.
 166 */
 167static int write_rcvrd_mst_node(struct ubifs_info *c,
 168                                struct ubifs_mst_node *mst)
 169{
 170        int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
 171        uint32_t save_flags;
 172
 173        dbg_rcvry("recovery");
 174
 175        save_flags = mst->flags;
 176        mst->flags = cpu_to_le32(le32_to_cpu(mst->flags) | UBIFS_MST_RCVRY);
 177
 178        ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
 179        err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
 180        if (err)
 181                goto out;
 182        err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM);
 183        if (err)
 184                goto out;
 185out:
 186        mst->flags = save_flags;
 187        return err;
 188}
 189
 190/**
 191 * ubifs_recover_master_node - recover the master node.
 192 * @c: UBIFS file-system description object
 193 *
 194 * This function recovers the master node from corruption that may occur due to
 195 * an unclean unmount.
 196 *
 197 * This function returns %0 on success and a negative error code on failure.
 198 */
 199int ubifs_recover_master_node(struct ubifs_info *c)
 200{
 201        void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL;
 202        struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst;
 203        const int sz = c->mst_node_alsz;
 204        int err, offs1, offs2;
 205
 206        dbg_rcvry("recovery");
 207
 208        err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1);
 209        if (err)
 210                goto out_free;
 211
 212        err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2);
 213        if (err)
 214                goto out_free;
 215
 216        if (mst1) {
 217                offs1 = (void *)mst1 - buf1;
 218                if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) &&
 219                    (offs1 == 0 && !cor1)) {
 220                        /*
 221                         * mst1 was written by recovery at offset 0 with no
 222                         * corruption.
 223                         */
 224                        dbg_rcvry("recovery recovery");
 225                        mst = mst1;
 226                } else if (mst2) {
 227                        offs2 = (void *)mst2 - buf2;
 228                        if (offs1 == offs2) {
 229                                /* Same offset, so must be the same */
 230                                if (memcmp((void *)mst1 + UBIFS_CH_SZ,
 231                                           (void *)mst2 + UBIFS_CH_SZ,
 232                                           UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
 233                                        goto out_err;
 234                                mst = mst1;
 235                        } else if (offs2 + sz == offs1) {
 236                                /* 1st LEB was written, 2nd was not */
 237                                if (cor1)
 238                                        goto out_err;
 239                                mst = mst1;
 240                        } else if (offs1 == 0 && offs2 + sz >= c->leb_size) {
 241                                /* 1st LEB was unmapped and written, 2nd not */
 242                                if (cor1)
 243                                        goto out_err;
 244                                mst = mst1;
 245                        } else
 246                                goto out_err;
 247                } else {
 248                        /*
 249                         * 2nd LEB was unmapped and about to be written, so
 250                         * there must be only one master node in the first LEB
 251                         * and no corruption.
 252                         */
 253                        if (offs1 != 0 || cor1)
 254                                goto out_err;
 255                        mst = mst1;
 256                }
 257        } else {
 258                if (!mst2)
 259                        goto out_err;
 260                /*
 261                 * 1st LEB was unmapped and about to be written, so there must
 262                 * be no room left in 2nd LEB.
 263                 */
 264                offs2 = (void *)mst2 - buf2;
 265                if (offs2 + sz + sz <= c->leb_size)
 266                        goto out_err;
 267                mst = mst2;
 268        }
 269
 270        dbg_rcvry("recovered master node from LEB %d",
 271                  (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
 272
 273        memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
 274
 275        if ((c->vfs_sb->s_flags & MS_RDONLY)) {
 276                /* Read-only mode. Keep a copy for switching to rw mode */
 277                c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
 278                if (!c->rcvrd_mst_node) {
 279                        err = -ENOMEM;
 280                        goto out_free;
 281                }
 282                memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
 283
 284                /*
 285                 * We had to recover the master node, which means there was an
 286                 * unclean reboot. However, it is possible that the master node
 287                 * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
 288                 * E.g., consider the following chain of events:
 289                 *
 290                 * 1. UBIFS was cleanly unmounted, so the master node is clean
 291                 * 2. UBIFS is being mounted R/W and starts changing the master
 292                 *    node in the first (%UBIFS_MST_LNUM). A power cut happens,
 293                 *    so this LEB ends up with some amount of garbage at the
 294                 *    end.
 295                 * 3. UBIFS is being mounted R/O. We reach this place and
 296                 *    recover the master node from the second LEB
 297                 *    (%UBIFS_MST_LNUM + 1). But we cannot update the media
 298                 *    because we are being mounted R/O. We have to defer the
 299                 *    operation.
 300                 * 4. However, this master node (@c->mst_node) is marked as
 301                 *    clean (since the step 1). And if we just return, the
 302                 *    mount code will be confused and won't recover the master
 303                 *    node when it is re-mounter R/W later.
 304                 *
 305                 *    Thus, to force the recovery by marking the master node as
 306                 *    dirty.
 307                 */
 308                c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 309        } else {
 310                /* Write the recovered master node */
 311                c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
 312                err = write_rcvrd_mst_node(c, c->mst_node);
 313                if (err)
 314                        goto out_free;
 315        }
 316
 317        vfree(buf2);
 318        vfree(buf1);
 319
 320        return 0;
 321
 322out_err:
 323        err = -EINVAL;
 324out_free:
 325        ubifs_err("failed to recover master node");
 326        if (mst1) {
 327                dbg_err("dumping first master node");
 328                dbg_dump_node(c, mst1);
 329        }
 330        if (mst2) {
 331                dbg_err("dumping second master node");
 332                dbg_dump_node(c, mst2);
 333        }
 334        vfree(buf2);
 335        vfree(buf1);
 336        return err;
 337}
 338
 339/**
 340 * ubifs_write_rcvrd_mst_node - write the recovered master node.
 341 * @c: UBIFS file-system description object
 342 *
 343 * This function writes the master node that was recovered during mounting in
 344 * read-only mode and must now be written because we are remounting rw.
 345 *
 346 * This function returns %0 on success and a negative error code on failure.
 347 */
 348int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
 349{
 350        int err;
 351
 352        if (!c->rcvrd_mst_node)
 353                return 0;
 354        c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 355        c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 356        err = write_rcvrd_mst_node(c, c->rcvrd_mst_node);
 357        if (err)
 358                return err;
 359        kfree(c->rcvrd_mst_node);
 360        c->rcvrd_mst_node = NULL;
 361        return 0;
 362}
 363
 364/**
 365 * is_last_write - determine if an offset was in the last write to a LEB.
 366 * @c: UBIFS file-system description object
 367 * @buf: buffer to check
 368 * @offs: offset to check
 369 *
 370 * This function returns %1 if @offs was in the last write to the LEB whose data
 371 * is in @buf, otherwise %0 is returned.  The determination is made by checking
 372 * for subsequent empty space starting from the next min_io_size boundary (or a
 373 * bit less than the common header size if min_io_size is one).
 374 */
 375static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
 376{
 377        int empty_offs;
 378        int check_len;
 379        uint8_t *p;
 380
 381        if (c->min_io_size == 1) {
 382                check_len = c->leb_size - offs;
 383                p = buf + check_len;
 384                for (; check_len > 0; check_len--)
 385                        if (*--p != 0xff)
 386                                break;
 387                /*
 388                 * 'check_len' is the size of the corruption which cannot be
 389                 * more than the size of 1 node if it was caused by an unclean
 390                 * unmount.
 391                 */
 392                if (check_len > UBIFS_MAX_NODE_SZ)
 393                        return 0;
 394                return 1;
 395        }
 396
 397        /*
 398         * Round up to the next c->min_io_size boundary i.e. 'offs' is in the
 399         * last wbuf written. After that should be empty space.
 400         */
 401        empty_offs = ALIGN(offs + 1, c->min_io_size);
 402        check_len = c->leb_size - empty_offs;
 403        p = buf + empty_offs - offs;
 404
 405        for (; check_len > 0; check_len--)
 406                if (*p++ != 0xff)
 407                        return 0;
 408        return 1;
 409}
 410
 411/**
 412 * clean_buf - clean the data from an LEB sitting in a buffer.
 413 * @c: UBIFS file-system description object
 414 * @buf: buffer to clean
 415 * @lnum: LEB number to clean
 416 * @offs: offset from which to clean
 417 * @len: length of buffer
 418 *
 419 * This function pads up to the next min_io_size boundary (if there is one) and
 420 * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
 421 * min_io_size boundary (if there is one).
 422 */
 423static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
 424                      int *offs, int *len)
 425{
 426        int empty_offs, pad_len;
 427
 428        lnum = lnum;
 429        dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
 430
 431        if (c->min_io_size == 1) {
 432                memset(*buf, 0xff, c->leb_size - *offs);
 433                return;
 434        }
 435
 436        ubifs_assert(!(*offs & 7));
 437        empty_offs = ALIGN(*offs, c->min_io_size);
 438        pad_len = empty_offs - *offs;
 439        ubifs_pad(c, *buf, pad_len);
 440        *offs += pad_len;
 441        *buf += pad_len;
 442        *len -= pad_len;
 443        memset(*buf, 0xff, c->leb_size - empty_offs);
 444}
 445
 446/**
 447 * no_more_nodes - determine if there are no more nodes in a buffer.
 448 * @c: UBIFS file-system description object
 449 * @buf: buffer to check
 450 * @len: length of buffer
 451 * @lnum: LEB number of the LEB from which @buf was read
 452 * @offs: offset from which @buf was read
 453 *
 454 * This function scans @buf for more nodes and returns %0 is a node is found and
 455 * %1 if no more nodes are found.
 456 */
 457static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
 458                        int lnum, int offs)
 459{
 460        int skip, next_offs = 0;
 461
 462        if (len > UBIFS_DATA_NODE_SZ) {
 463                struct ubifs_ch *ch = buf;
 464                int dlen = le32_to_cpu(ch->len);
 465
 466                if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ &&
 467                    dlen <= UBIFS_MAX_DATA_NODE_SZ)
 468                        /* The corrupt node looks like a data node */
 469                        next_offs = ALIGN(offs + dlen, 8);
 470        }
 471
 472        if (c->min_io_size == 1)
 473                skip = 8;
 474        else
 475                skip = ALIGN(offs + 1, c->min_io_size) - offs;
 476
 477        offs += skip;
 478        buf += skip;
 479        len -= skip;
 480        while (len > 8) {
 481                struct ubifs_ch *ch = buf;
 482                uint32_t magic = le32_to_cpu(ch->magic);
 483                int ret;
 484
 485                if (magic == UBIFS_NODE_MAGIC) {
 486                        ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 487                        if (ret == SCANNED_A_NODE || ret > 0) {
 488                                /*
 489                                 * There is a small chance this is just data in
 490                                 * a data node, so check that possibility. e.g.
 491                                 * this is part of a file that itself contains
 492                                 * a UBIFS image.
 493                                 */
 494                                if (next_offs && offs + le32_to_cpu(ch->len) <=
 495                                    next_offs)
 496                                        continue;
 497                                dbg_rcvry("unexpected node at %d:%d", lnum,
 498                                          offs);
 499                                return 0;
 500                        }
 501                }
 502                offs += 8;
 503                buf += 8;
 504                len -= 8;
 505        }
 506        return 1;
 507}
 508
 509/**
 510 * fix_unclean_leb - fix an unclean LEB.
 511 * @c: UBIFS file-system description object
 512 * @sleb: scanned LEB information
 513 * @start: offset where scan started
 514 */
 515static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 516                           int start)
 517{
 518        int lnum = sleb->lnum, endpt = start;
 519
 520        /* Get the end offset of the last node we are keeping */
 521        if (!list_empty(&sleb->nodes)) {
 522                struct ubifs_scan_node *snod;
 523
 524                snod = list_entry(sleb->nodes.prev,
 525                                  struct ubifs_scan_node, list);
 526                endpt = snod->offs + snod->len;
 527        }
 528
 529        if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) {
 530                /* Add to recovery list */
 531                struct ubifs_unclean_leb *ucleb;
 532
 533                dbg_rcvry("need to fix LEB %d start %d endpt %d",
 534                          lnum, start, sleb->endpt);
 535                ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS);
 536                if (!ucleb)
 537                        return -ENOMEM;
 538                ucleb->lnum = lnum;
 539                ucleb->endpt = endpt;
 540                list_add_tail(&ucleb->list, &c->unclean_leb_list);
 541        } else {
 542                /* Write the fixed LEB back to flash */
 543                int err;
 544
 545                dbg_rcvry("fixing LEB %d start %d endpt %d",
 546                          lnum, start, sleb->endpt);
 547                if (endpt == 0) {
 548                        err = ubifs_leb_unmap(c, lnum);
 549                        if (err)
 550                                return err;
 551                } else {
 552                        int len = ALIGN(endpt, c->min_io_size);
 553
 554                        if (start) {
 555                                err = ubi_read(c->ubi, lnum, sleb->buf, 0,
 556                                               start);
 557                                if (err)
 558                                        return err;
 559                        }
 560                        /* Pad to min_io_size */
 561                        if (len > endpt) {
 562                                int pad_len = len - ALIGN(endpt, 8);
 563
 564                                if (pad_len > 0) {
 565                                        void *buf = sleb->buf + len - pad_len;
 566
 567                                        ubifs_pad(c, buf, pad_len);
 568                                }
 569                        }
 570                        err = ubi_leb_change(c->ubi, lnum, sleb->buf, len,
 571                                             UBI_UNKNOWN);
 572                        if (err)
 573                                return err;
 574                }
 575        }
 576        return 0;
 577}
 578
 579/**
 580 * drop_incomplete_group - drop nodes from an incomplete group.
 581 * @sleb: scanned LEB information
 582 * @offs: offset of dropped nodes is returned here
 583 *
 584 * This function returns %1 if nodes are dropped and %0 otherwise.
 585 */
 586static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
 587{
 588        int dropped = 0;
 589
 590        while (!list_empty(&sleb->nodes)) {
 591                struct ubifs_scan_node *snod;
 592                struct ubifs_ch *ch;
 593
 594                snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
 595                                  list);
 596                ch = snod->node;
 597                if (ch->group_type != UBIFS_IN_NODE_GROUP)
 598                        return dropped;
 599                dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs);
 600                *offs = snod->offs;
 601                list_del(&snod->list);
 602                kfree(snod);
 603                sleb->nodes_cnt -= 1;
 604                dropped = 1;
 605        }
 606        return dropped;
 607}
 608
 609/**
 610 * ubifs_recover_leb - scan and recover a LEB.
 611 * @c: UBIFS file-system description object
 612 * @lnum: LEB number
 613 * @offs: offset
 614 * @sbuf: LEB-sized buffer to use
 615 * @grouped: nodes may be grouped for recovery
 616 *
 617 * This function does a scan of a LEB, but caters for errors that might have
 618 * been caused by the unclean unmount from which we are attempting to recover.
 619 *
 620 * This function returns %0 on success and a negative error code on failure.
 621 */
 622struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
 623                                         int offs, void *sbuf, int grouped)
 624{
 625        int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
 626        int empty_chkd = 0, start = offs;
 627        struct ubifs_scan_leb *sleb;
 628        void *buf = sbuf + offs;
 629
 630        dbg_rcvry("%d:%d", lnum, offs);
 631
 632        sleb = ubifs_start_scan(c, lnum, offs, sbuf);
 633        if (IS_ERR(sleb))
 634                return sleb;
 635
 636        if (sleb->ecc)
 637                need_clean = 1;
 638
 639        while (len >= 8) {
 640                int ret;
 641
 642                dbg_scan("look at LEB %d:%d (%d bytes left)",
 643                         lnum, offs, len);
 644
 645                cond_resched();
 646
 647                /*
 648                 * Scan quietly until there is an error from which we cannot
 649                 * recover
 650                 */
 651                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
 652
 653                if (ret == SCANNED_A_NODE) {
 654                        /* A valid node, and not a padding node */
 655                        struct ubifs_ch *ch = buf;
 656                        int node_len;
 657
 658                        err = ubifs_add_snod(c, sleb, buf, offs);
 659                        if (err)
 660                                goto error;
 661                        node_len = ALIGN(le32_to_cpu(ch->len), 8);
 662                        offs += node_len;
 663                        buf += node_len;
 664                        len -= node_len;
 665                        continue;
 666                }
 667
 668                if (ret > 0) {
 669                        /* Padding bytes or a valid padding node */
 670                        offs += ret;
 671                        buf += ret;
 672                        len -= ret;
 673                        continue;
 674                }
 675
 676                if (ret == SCANNED_EMPTY_SPACE) {
 677                        if (!is_empty(buf, len)) {
 678                                if (!is_last_write(c, buf, offs))
 679                                        break;
 680                                clean_buf(c, &buf, lnum, &offs, &len);
 681                                need_clean = 1;
 682                        }
 683                        empty_chkd = 1;
 684                        break;
 685                }
 686
 687                if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
 688                        if (is_last_write(c, buf, offs)) {
 689                                clean_buf(c, &buf, lnum, &offs, &len);
 690                                need_clean = 1;
 691                                empty_chkd = 1;
 692                                break;
 693                        }
 694
 695                if (ret == SCANNED_A_CORRUPT_NODE)
 696                        if (no_more_nodes(c, buf, len, lnum, offs)) {
 697                                clean_buf(c, &buf, lnum, &offs, &len);
 698                                need_clean = 1;
 699                                empty_chkd = 1;
 700                                break;
 701                        }
 702
 703                if (quiet) {
 704                        /* Redo the last scan but noisily */
 705                        quiet = 0;
 706                        continue;
 707                }
 708
 709                switch (ret) {
 710                case SCANNED_GARBAGE:
 711                        dbg_err("garbage");
 712                        goto corrupted;
 713                case SCANNED_A_CORRUPT_NODE:
 714                case SCANNED_A_BAD_PAD_NODE:
 715                        dbg_err("bad node");
 716                        goto corrupted;
 717                default:
 718                        dbg_err("unknown");
 719                        goto corrupted;
 720                }
 721        }
 722
 723        if (!empty_chkd && !is_empty(buf, len)) {
 724                if (is_last_write(c, buf, offs)) {
 725                        clean_buf(c, &buf, lnum, &offs, &len);
 726                        need_clean = 1;
 727                } else {
 728                        ubifs_err("corrupt empty space at LEB %d:%d",
 729                                  lnum, offs);
 730                        goto corrupted;
 731                }
 732        }
 733
 734        /* Drop nodes from incomplete group */
 735        if (grouped && drop_incomplete_group(sleb, &offs)) {
 736                buf = sbuf + offs;
 737                len = c->leb_size - offs;
 738                clean_buf(c, &buf, lnum, &offs, &len);
 739                need_clean = 1;
 740        }
 741
 742        if (offs % c->min_io_size) {
 743                clean_buf(c, &buf, lnum, &offs, &len);
 744                need_clean = 1;
 745        }
 746
 747        ubifs_end_scan(c, sleb, lnum, offs);
 748
 749        if (need_clean) {
 750                err = fix_unclean_leb(c, sleb, start);
 751                if (err)
 752                        goto error;
 753        }
 754
 755        return sleb;
 756
 757corrupted:
 758        ubifs_scanned_corruption(c, lnum, offs, buf);
 759        err = -EUCLEAN;
 760error:
 761        ubifs_err("LEB %d scanning failed", lnum);
 762        ubifs_scan_destroy(sleb);
 763        return ERR_PTR(err);
 764}
 765
 766/**
 767 * get_cs_sqnum - get commit start sequence number.
 768 * @c: UBIFS file-system description object
 769 * @lnum: LEB number of commit start node
 770 * @offs: offset of commit start node
 771 * @cs_sqnum: commit start sequence number is returned here
 772 *
 773 * This function returns %0 on success and a negative error code on failure.
 774 */
 775static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
 776                        unsigned long long *cs_sqnum)
 777{
 778        struct ubifs_cs_node *cs_node = NULL;
 779        int err, ret;
 780
 781        dbg_rcvry("at %d:%d", lnum, offs);
 782        cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL);
 783        if (!cs_node)
 784                return -ENOMEM;
 785        if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
 786                goto out_err;
 787        err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ);
 788        if (err && err != -EBADMSG)
 789                goto out_free;
 790        ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
 791        if (ret != SCANNED_A_NODE) {
 792                dbg_err("Not a valid node");
 793                goto out_err;
 794        }
 795        if (cs_node->ch.node_type != UBIFS_CS_NODE) {
 796                dbg_err("Node a CS node, type is %d", cs_node->ch.node_type);
 797                goto out_err;
 798        }
 799        if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
 800                dbg_err("CS node cmt_no %llu != current cmt_no %llu",
 801                        (unsigned long long)le64_to_cpu(cs_node->cmt_no),
 802                        c->cmt_no);
 803                goto out_err;
 804        }
 805        *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
 806        dbg_rcvry("commit start sqnum %llu", *cs_sqnum);
 807        kfree(cs_node);
 808        return 0;
 809
 810out_err:
 811        err = -EINVAL;
 812out_free:
 813        ubifs_err("failed to get CS sqnum");
 814        kfree(cs_node);
 815        return err;
 816}
 817
 818/**
 819 * ubifs_recover_log_leb - scan and recover a log LEB.
 820 * @c: UBIFS file-system description object
 821 * @lnum: LEB number
 822 * @offs: offset
 823 * @sbuf: LEB-sized buffer to use
 824 *
 825 * This function does a scan of a LEB, but caters for errors that might have
 826 * been caused by the unclean unmount from which we are attempting to recover.
 827 *
 828 * This function returns %0 on success and a negative error code on failure.
 829 */
 830struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
 831                                             int offs, void *sbuf)
 832{
 833        struct ubifs_scan_leb *sleb;
 834        int next_lnum;
 835
 836        dbg_rcvry("LEB %d", lnum);
 837        next_lnum = lnum + 1;
 838        if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs)
 839                next_lnum = UBIFS_LOG_LNUM;
 840        if (next_lnum != c->ltail_lnum) {
 841                /*
 842                 * We can only recover at the end of the log, so check that the
 843                 * next log LEB is empty or out of date.
 844                 */
 845                sleb = ubifs_scan(c, next_lnum, 0, sbuf);
 846                if (IS_ERR(sleb))
 847                        return sleb;
 848                if (sleb->nodes_cnt) {
 849                        struct ubifs_scan_node *snod;
 850                        unsigned long long cs_sqnum = c->cs_sqnum;
 851
 852                        snod = list_entry(sleb->nodes.next,
 853                                          struct ubifs_scan_node, list);
 854                        if (cs_sqnum == 0) {
 855                                int err;
 856
 857                                err = get_cs_sqnum(c, lnum, offs, &cs_sqnum);
 858                                if (err) {
 859                                        ubifs_scan_destroy(sleb);
 860                                        return ERR_PTR(err);
 861                                }
 862                        }
 863                        if (snod->sqnum > cs_sqnum) {
 864                                ubifs_err("unrecoverable log corruption "
 865                                          "in LEB %d", lnum);
 866                                ubifs_scan_destroy(sleb);
 867                                return ERR_PTR(-EUCLEAN);
 868                        }
 869                }
 870                ubifs_scan_destroy(sleb);
 871        }
 872        return ubifs_recover_leb(c, lnum, offs, sbuf, 0);
 873}
 874
 875/**
 876 * recover_head - recover a head.
 877 * @c: UBIFS file-system description object
 878 * @lnum: LEB number of head to recover
 879 * @offs: offset of head to recover
 880 * @sbuf: LEB-sized buffer to use
 881 *
 882 * This function ensures that there is no data on the flash at a head location.
 883 *
 884 * This function returns %0 on success and a negative error code on failure.
 885 */
 886static int recover_head(const struct ubifs_info *c, int lnum, int offs,
 887                        void *sbuf)
 888{
 889        int len, err, need_clean = 0;
 890
 891        if (c->min_io_size > 1)
 892                len = c->min_io_size;
 893        else
 894                len = 512;
 895        if (offs + len > c->leb_size)
 896                len = c->leb_size - offs;
 897
 898        if (!len)
 899                return 0;
 900
 901        /* Read at the head location and check it is empty flash */
 902        err = ubi_read(c->ubi, lnum, sbuf, offs, len);
 903        if (err)
 904                need_clean = 1;
 905        else {
 906                uint8_t *p = sbuf;
 907
 908                while (len--)
 909                        if (*p++ != 0xff) {
 910                                need_clean = 1;
 911                                break;
 912                        }
 913        }
 914
 915        if (need_clean) {
 916                dbg_rcvry("cleaning head at %d:%d", lnum, offs);
 917                if (offs == 0)
 918                        return ubifs_leb_unmap(c, lnum);
 919                err = ubi_read(c->ubi, lnum, sbuf, 0, offs);
 920                if (err)
 921                        return err;
 922                return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN);
 923        }
 924
 925        return 0;
 926}
 927
 928/**
 929 * ubifs_recover_inl_heads - recover index and LPT heads.
 930 * @c: UBIFS file-system description object
 931 * @sbuf: LEB-sized buffer to use
 932 *
 933 * This function ensures that there is no data on the flash at the index and
 934 * LPT head locations.
 935 *
 936 * This deals with the recovery of a half-completed journal commit. UBIFS is
 937 * careful never to overwrite the last version of the index or the LPT. Because
 938 * the index and LPT are wandering trees, data from a half-completed commit will
 939 * not be referenced anywhere in UBIFS. The data will be either in LEBs that are
 940 * assumed to be empty and will be unmapped anyway before use, or in the index
 941 * and LPT heads.
 942 *
 943 * This function returns %0 on success and a negative error code on failure.
 944 */
 945int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
 946{
 947        int err;
 948
 949        ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw);
 950
 951        dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
 952        err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
 953        if (err)
 954                return err;
 955
 956        dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
 957        err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
 958        if (err)
 959                return err;
 960
 961        return 0;
 962}
 963
 964/**
 965 *  clean_an_unclean_leb - read and write a LEB to remove corruption.
 966 * @c: UBIFS file-system description object
 967 * @ucleb: unclean LEB information
 968 * @sbuf: LEB-sized buffer to use
 969 *
 970 * This function reads a LEB up to a point pre-determined by the mount recovery,
 971 * checks the nodes, and writes the result back to the flash, thereby cleaning
 972 * off any following corruption, or non-fatal ECC errors.
 973 *
 974 * This function returns %0 on success and a negative error code on failure.
 975 */
 976static int clean_an_unclean_leb(const struct ubifs_info *c,
 977                                struct ubifs_unclean_leb *ucleb, void *sbuf)
 978{
 979        int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
 980        void *buf = sbuf;
 981
 982        dbg_rcvry("LEB %d len %d", lnum, len);
 983
 984        if (len == 0) {
 985                /* Nothing to read, just unmap it */
 986                err = ubifs_leb_unmap(c, lnum);
 987                if (err)
 988                        return err;
 989                return 0;
 990        }
 991
 992        err = ubi_read(c->ubi, lnum, buf, offs, len);
 993        if (err && err != -EBADMSG)
 994                return err;
 995
 996        while (len >= 8) {
 997                int ret;
 998
 999                cond_resched();
1000
1001                /* Scan quietly until there is an error */
1002                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
1003
1004                if (ret == SCANNED_A_NODE) {
1005                        /* A valid node, and not a padding node */
1006                        struct ubifs_ch *ch = buf;
1007                        int node_len;
1008
1009                        node_len = ALIGN(le32_to_cpu(ch->len), 8);
1010                        offs += node_len;
1011                        buf += node_len;
1012                        len -= node_len;
1013                        continue;
1014                }
1015
1016                if (ret > 0) {
1017                        /* Padding bytes or a valid padding node */
1018                        offs += ret;
1019                        buf += ret;
1020                        len -= ret;
1021                        continue;
1022                }
1023
1024                if (ret == SCANNED_EMPTY_SPACE) {
1025                        ubifs_err("unexpected empty space at %d:%d",
1026                                  lnum, offs);
1027                        return -EUCLEAN;
1028                }
1029
1030                if (quiet) {
1031                        /* Redo the last scan but noisily */
1032                        quiet = 0;
1033                        continue;
1034                }
1035
1036                ubifs_scanned_corruption(c, lnum, offs, buf);
1037                return -EUCLEAN;
1038        }
1039
1040        /* Pad to min_io_size */
1041        len = ALIGN(ucleb->endpt, c->min_io_size);
1042        if (len > ucleb->endpt) {
1043                int pad_len = len - ALIGN(ucleb->endpt, 8);
1044
1045                if (pad_len > 0) {
1046                        buf = c->sbuf + len - pad_len;
1047                        ubifs_pad(c, buf, pad_len);
1048                }
1049        }
1050
1051        /* Write back the LEB atomically */
1052        err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN);
1053        if (err)
1054                return err;
1055
1056        dbg_rcvry("cleaned LEB %d", lnum);
1057
1058        return 0;
1059}
1060
1061/**
1062 * ubifs_clean_lebs - clean LEBs recovered during read-only mount.
1063 * @c: UBIFS file-system description object
1064 * @sbuf: LEB-sized buffer to use
1065 *
1066 * This function cleans a LEB identified during recovery that needs to be
1067 * written but was not because UBIFS was mounted read-only. This happens when
1068 * remounting to read-write mode.
1069 *
1070 * This function returns %0 on success and a negative error code on failure.
1071 */
1072int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
1073{
1074        dbg_rcvry("recovery");
1075        while (!list_empty(&c->unclean_leb_list)) {
1076                struct ubifs_unclean_leb *ucleb;
1077                int err;
1078
1079                ucleb = list_entry(c->unclean_leb_list.next,
1080                                   struct ubifs_unclean_leb, list);
1081                err = clean_an_unclean_leb(c, ucleb, sbuf);
1082                if (err)
1083                        return err;
1084                list_del(&ucleb->list);
1085                kfree(ucleb);
1086        }
1087        return 0;
1088}
1089
1090/**
1091 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1092 * @c: UBIFS file-system description object
1093 *
1094 * Out-of-place garbage collection requires always one empty LEB with which to
1095 * start garbage collection. The LEB number is recorded in c->gc_lnum and is
1096 * written to the master node on unmounting. In the case of an unclean unmount
1097 * the value of gc_lnum recorded in the master node is out of date and cannot
1098 * be used. Instead, recovery must allocate an empty LEB for this purpose.
1099 * However, there may not be enough empty space, in which case it must be
1100 * possible to GC the dirtiest LEB into the GC head LEB.
1101 *
1102 * This function also runs the commit which causes the TNC updates from
1103 * size-recovery and orphans to be written to the flash. That is important to
1104 * ensure correct replay order for subsequent mounts.
1105 *
1106 * This function returns %0 on success and a negative error code on failure.
1107 */
1108int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1109{
1110        struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1111        struct ubifs_lprops lp;
1112        int lnum, err;
1113
1114        c->gc_lnum = -1;
1115        if (wbuf->lnum == -1) {
1116                dbg_rcvry("no GC head LEB");
1117                goto find_free;
1118        }
1119        /*
1120         * See whether the used space in the dirtiest LEB fits in the GC head
1121         * LEB.
1122         */
1123        if (wbuf->offs == c->leb_size) {
1124                dbg_rcvry("no room in GC head LEB");
1125                goto find_free;
1126        }
1127        err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1128        if (err) {
1129                if (err == -ENOSPC)
1130                        dbg_err("could not find a dirty LEB");
1131                return err;
1132        }
1133        ubifs_assert(!(lp.flags & LPROPS_INDEX));
1134        lnum = lp.lnum;
1135        if (lp.free + lp.dirty == c->leb_size) {
1136                /* An empty LEB was returned */
1137                if (lp.free != c->leb_size) {
1138                        err = ubifs_change_one_lp(c, lnum, c->leb_size,
1139                                                  0, 0, 0, 0);
1140                        if (err)
1141                                return err;
1142                }
1143                err = ubifs_leb_unmap(c, lnum);
1144                if (err)
1145                        return err;
1146                c->gc_lnum = lnum;
1147                dbg_rcvry("allocated LEB %d for GC", lnum);
1148                /* Run the commit */
1149                dbg_rcvry("committing");
1150                return ubifs_run_commit(c);
1151        }
1152        /*
1153         * There was no empty LEB so the used space in the dirtiest LEB must fit
1154         * in the GC head LEB.
1155         */
1156        if (lp.free + lp.dirty < wbuf->offs) {
1157                dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
1158                          lnum, wbuf->lnum, wbuf->offs);
1159                err = ubifs_return_leb(c, lnum);
1160                if (err)
1161                        return err;
1162                goto find_free;
1163        }
1164        /*
1165         * We run the commit before garbage collection otherwise subsequent
1166         * mounts will see the GC and orphan deletion in a different order.
1167         */
1168        dbg_rcvry("committing");
1169        err = ubifs_run_commit(c);
1170        if (err)
1171                return err;
1172        /*
1173         * The data in the dirtiest LEB fits in the GC head LEB, so do the GC
1174         * - use locking to keep 'ubifs_assert()' happy.
1175         */
1176        dbg_rcvry("GC'ing LEB %d", lnum);
1177        mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1178        err = ubifs_garbage_collect_leb(c, &lp);
1179        if (err >= 0) {
1180                int err2 = ubifs_wbuf_sync_nolock(wbuf);
1181
1182                if (err2)
1183                        err = err2;
1184        }
1185        mutex_unlock(&wbuf->io_mutex);
1186        if (err < 0) {
1187                dbg_err("GC failed, error %d", err);
1188                if (err == -EAGAIN)
1189                        err = -EINVAL;
1190                return err;
1191        }
1192        if (err != LEB_RETAINED) {
1193                dbg_err("GC returned %d", err);
1194                return -EINVAL;
1195        }
1196        err = ubifs_leb_unmap(c, c->gc_lnum);
1197        if (err)
1198                return err;
1199        dbg_rcvry("allocated LEB %d for GC", lnum);
1200        return 0;
1201
1202find_free:
1203        /*
1204         * There is no GC head LEB or the free space in the GC head LEB is too
1205         * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so
1206         * GC is not run.
1207         */
1208        lnum = ubifs_find_free_leb_for_idx(c);
1209        if (lnum < 0) {
1210                dbg_err("could not find an empty LEB");
1211                return lnum;
1212        }
1213        /* And reset the index flag */
1214        err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1215                                  LPROPS_INDEX, 0);
1216        if (err)
1217                return err;
1218        c->gc_lnum = lnum;
1219        dbg_rcvry("allocated LEB %d for GC", lnum);
1220        /* Run the commit */
1221        dbg_rcvry("committing");
1222        return ubifs_run_commit(c);
1223}
1224
1225/**
1226 * struct size_entry - inode size information for recovery.
1227 * @rb: link in the RB-tree of sizes
1228 * @inum: inode number
1229 * @i_size: size on inode
1230 * @d_size: maximum size based on data nodes
1231 * @exists: indicates whether the inode exists
1232 * @inode: inode if pinned in memory awaiting rw mode to fix it
1233 */
1234struct size_entry {
1235        struct rb_node rb;
1236        ino_t inum;
1237        loff_t i_size;
1238        loff_t d_size;
1239        int exists;
1240        struct inode *inode;
1241};
1242
1243/**
1244 * add_ino - add an entry to the size tree.
1245 * @c: UBIFS file-system description object
1246 * @inum: inode number
1247 * @i_size: size on inode
1248 * @d_size: maximum size based on data nodes
1249 * @exists: indicates whether the inode exists
1250 */
1251static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size,
1252                   loff_t d_size, int exists)
1253{
1254        struct rb_node **p = &c->size_tree.rb_node, *parent = NULL;
1255        struct size_entry *e;
1256
1257        while (*p) {
1258                parent = *p;
1259                e = rb_entry(parent, struct size_entry, rb);
1260                if (inum < e->inum)
1261                        p = &(*p)->rb_left;
1262                else
1263                        p = &(*p)->rb_right;
1264        }
1265
1266        e = kzalloc(sizeof(struct size_entry), GFP_KERNEL);
1267        if (!e)
1268                return -ENOMEM;
1269
1270        e->inum = inum;
1271        e->i_size = i_size;
1272        e->d_size = d_size;
1273        e->exists = exists;
1274
1275        rb_link_node(&e->rb, parent, p);
1276        rb_insert_color(&e->rb, &c->size_tree);
1277
1278        return 0;
1279}
1280
1281/**
1282 * find_ino - find an entry on the size tree.
1283 * @c: UBIFS file-system description object
1284 * @inum: inode number
1285 */
1286static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum)
1287{
1288        struct rb_node *p = c->size_tree.rb_node;
1289        struct size_entry *e;
1290
1291        while (p) {
1292                e = rb_entry(p, struct size_entry, rb);
1293                if (inum < e->inum)
1294                        p = p->rb_left;
1295                else if (inum > e->inum)
1296                        p = p->rb_right;
1297                else
1298                        return e;
1299        }
1300        return NULL;
1301}
1302
1303/**
1304 * remove_ino - remove an entry from the size tree.
1305 * @c: UBIFS file-system description object
1306 * @inum: inode number
1307 */
1308static void remove_ino(struct ubifs_info *c, ino_t inum)
1309{
1310        struct size_entry *e = find_ino(c, inum);
1311
1312        if (!e)
1313                return;
1314        rb_erase(&e->rb, &c->size_tree);
1315        kfree(e);
1316}
1317
1318/**
1319 * ubifs_destroy_size_tree - free resources related to the size tree.
1320 * @c: UBIFS file-system description object
1321 */
1322void ubifs_destroy_size_tree(struct ubifs_info *c)
1323{
1324        struct rb_node *this = c->size_tree.rb_node;
1325        struct size_entry *e;
1326
1327        while (this) {
1328                if (this->rb_left) {
1329                        this = this->rb_left;
1330                        continue;
1331                } else if (this->rb_right) {
1332                        this = this->rb_right;
1333                        continue;
1334                }
1335                e = rb_entry(this, struct size_entry, rb);
1336                if (e->inode)
1337                        iput(e->inode);
1338                this = rb_parent(this);
1339                if (this) {
1340                        if (this->rb_left == &e->rb)
1341                                this->rb_left = NULL;
1342                        else
1343                                this->rb_right = NULL;
1344                }
1345                kfree(e);
1346        }
1347        c->size_tree = RB_ROOT;
1348}
1349
1350/**
1351 * ubifs_recover_size_accum - accumulate inode sizes for recovery.
1352 * @c: UBIFS file-system description object
1353 * @key: node key
1354 * @deletion: node is for a deletion
1355 * @new_size: inode size
1356 *
1357 * This function has two purposes:
1358 *     1) to ensure there are no data nodes that fall outside the inode size
1359 *     2) to ensure there are no data nodes for inodes that do not exist
1360 * To accomplish those purposes, a rb-tree is constructed containing an entry
1361 * for each inode number in the journal that has not been deleted, and recording
1362 * the size from the inode node, the maximum size of any data node (also altered
1363 * by truncations) and a flag indicating a inode number for which no inode node
1364 * was present in the journal.
1365 *
1366 * Note that there is still the possibility that there are data nodes that have
1367 * been committed that are beyond the inode size, however the only way to find
1368 * them would be to scan the entire index. Alternatively, some provision could
1369 * be made to record the size of inodes at the start of commit, which would seem
1370 * very cumbersome for a scenario that is quite unlikely and the only negative
1371 * consequence of which is wasted space.
1372 *
1373 * This functions returns %0 on success and a negative error code on failure.
1374 */
1375int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
1376                             int deletion, loff_t new_size)
1377{
1378        ino_t inum = key_inum(c, key);
1379        struct size_entry *e;
1380        int err;
1381
1382        switch (key_type(c, key)) {
1383        case UBIFS_INO_KEY:
1384                if (deletion)
1385                        remove_ino(c, inum);
1386                else {
1387                        e = find_ino(c, inum);
1388                        if (e) {
1389                                e->i_size = new_size;
1390                                e->exists = 1;
1391                        } else {
1392                                err = add_ino(c, inum, new_size, 0, 1);
1393                                if (err)
1394                                        return err;
1395                        }
1396                }
1397                break;
1398        case UBIFS_DATA_KEY:
1399                e = find_ino(c, inum);
1400                if (e) {
1401                        if (new_size > e->d_size)
1402                                e->d_size = new_size;
1403                } else {
1404                        err = add_ino(c, inum, 0, new_size, 0);
1405                        if (err)
1406                                return err;
1407                }
1408                break;
1409        case UBIFS_TRUN_KEY:
1410                e = find_ino(c, inum);
1411                if (e)
1412                        e->d_size = new_size;
1413                break;
1414        }
1415        return 0;
1416}
1417
1418/**
1419 * fix_size_in_place - fix inode size in place on flash.
1420 * @c: UBIFS file-system description object
1421 * @e: inode size information for recovery
1422 */
1423static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1424{
1425        struct ubifs_ino_node *ino = c->sbuf;
1426        unsigned char *p;
1427        union ubifs_key key;
1428        int err, lnum, offs, len;
1429        loff_t i_size;
1430        uint32_t crc;
1431
1432        /* Locate the inode node LEB number and offset */
1433        ino_key_init(c, &key, e->inum);
1434        err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs);
1435        if (err)
1436                goto out;
1437        /*
1438         * If the size recorded on the inode node is greater than the size that
1439         * was calculated from nodes in the journal then don't change the inode.
1440         */
1441        i_size = le64_to_cpu(ino->size);
1442        if (i_size >= e->d_size)
1443                return 0;
1444        /* Read the LEB */
1445        err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size);
1446        if (err)
1447                goto out;
1448        /* Change the size field and recalculate the CRC */
1449        ino = c->sbuf + offs;
1450        ino->size = cpu_to_le64(e->d_size);
1451        len = le32_to_cpu(ino->ch.len);
1452        crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8);
1453        ino->ch.crc = cpu_to_le32(crc);
1454        /* Work out where data in the LEB ends and free space begins */
1455        p = c->sbuf;
1456        len = c->leb_size - 1;
1457        while (p[len] == 0xff)
1458                len -= 1;
1459        len = ALIGN(len + 1, c->min_io_size);
1460        /* Atomically write the fixed LEB back again */
1461        err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
1462        if (err)
1463                goto out;
1464        dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", e->inum, lnum, offs,
1465                  i_size, e->d_size);
1466        return 0;
1467
1468out:
1469        ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d",
1470                   e->inum, e->i_size, e->d_size, err);
1471        return err;
1472}
1473
1474/**
1475 * ubifs_recover_size - recover inode size.
1476 * @c: UBIFS file-system description object
1477 *
1478 * This function attempts to fix inode size discrepancies identified by the
1479 * 'ubifs_recover_size_accum()' function.
1480 *
1481 * This functions returns %0 on success and a negative error code on failure.
1482 */
1483int ubifs_recover_size(struct ubifs_info *c)
1484{
1485        struct rb_node *this = rb_first(&c->size_tree);
1486
1487        while (this) {
1488                struct size_entry *e;
1489                int err;
1490
1491                e = rb_entry(this, struct size_entry, rb);
1492                if (!e->exists) {
1493                        union ubifs_key key;
1494
1495                        ino_key_init(c, &key, e->inum);
1496                        err = ubifs_tnc_lookup(c, &key, c->sbuf);
1497                        if (err && err != -ENOENT)
1498                                return err;
1499                        if (err == -ENOENT) {
1500                                /* Remove data nodes that have no inode */
1501                                dbg_rcvry("removing ino %lu", e->inum);
1502                                err = ubifs_tnc_remove_ino(c, e->inum);
1503                                if (err)
1504                                        return err;
1505                        } else {
1506                                struct ubifs_ino_node *ino = c->sbuf;
1507
1508                                e->exists = 1;
1509                                e->i_size = le64_to_cpu(ino->size);
1510                        }
1511                }
1512                if (e->exists && e->i_size < e->d_size) {
1513                        if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) {
1514                                /* Fix the inode size and pin it in memory */
1515                                struct inode *inode;
1516
1517                                inode = ubifs_iget(c->vfs_sb, e->inum);
1518                                if (IS_ERR(inode))
1519                                        return PTR_ERR(inode);
1520                                if (inode->i_size < e->d_size) {
1521                                        dbg_rcvry("ino %lu size %lld -> %lld",
1522                                                  e->inum, e->d_size,
1523                                                  inode->i_size);
1524                                        inode->i_size = e->d_size;
1525                                        ubifs_inode(inode)->ui_size = e->d_size;
1526                                        e->inode = inode;
1527                                        this = rb_next(this);
1528                                        continue;
1529                                }
1530                                iput(inode);
1531                        } else {
1532                                /* Fix the size in place */
1533                                err = fix_size_in_place(c, e);
1534                                if (err)
1535                                        return err;
1536                                if (e->inode)
1537                                        iput(e->inode);
1538                        }
1539                }
1540                this = rb_next(this);
1541                rb_erase(&e->rb, &c->size_tree);
1542                kfree(e);
1543        }
1544        return 0;
1545}
1546
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.