linux/fs/ubifs/recovery.c
<<
>>
Prefs
   1/*
   2 * This file is part of UBIFS.
   3 *
   4 * Copyright (C) 2006-2008 Nokia Corporation
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published by
   8 * the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 *
  15 * You should have received a copy of the GNU General Public License along with
  16 * this program; if not, write to the Free Software Foundation, Inc., 51
  17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  18 *
  19 * Authors: Adrian Hunter
  20 *          Artem Bityutskiy (\xD0\x91\xD0\xB8\xD1\x82\xD1\x8E\xD1\x86\xD0\xBA\xD0\xB8\xD0\xB9 \xD0\x90\xD1\x80\xD1\x82\xD1\x91\xD0\xBC)
  21 */
  22
  23/*
  24 * This file implements functions needed to recover from unclean un-mounts.
  25 * When UBIFS is mounted, it checks a flag on the master node to determine if
  26 * an un-mount was completed successfully. If not, the process of mounting
  27 * incorporates additional checking and fixing of on-flash data structures.
  28 * UBIFS always cleans away all remnants of an unclean un-mount, so that
  29 * errors do not accumulate. However UBIFS defers recovery if it is mounted
  30 * read-only, and the flash is not modified in that case.
  31 *
  32 * The general UBIFS approach to the recovery is that it recovers from
  33 * corruptions which could be caused by power cuts, but it refuses to recover
  34 * from corruption caused by other reasons. And UBIFS tries to distinguish
  35 * between these 2 reasons of corruptions and silently recover in the former
  36 * case and loudly complain in the latter case.
  37 *
  38 * UBIFS writes only to erased LEBs, so it writes only to the flash space
  39 * containing only 0xFFs. UBIFS also always writes strictly from the beginning
  40 * of the LEB to the end. And UBIFS assumes that the underlying flash media
  41 * writes in @c->max_write_size bytes at a time.
  42 *
  43 * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
  44 * I/O unit corresponding to offset X to contain corrupted data, all the
  45 * following min. I/O units have to contain empty space (all 0xFFs). If this is
  46 * not true, the corruption cannot be the result of a power cut, and UBIFS
  47 * refuses to mount.
  48 */
  49
  50#include <linux/crc32.h>
  51#include <linux/slab.h>
  52#include "ubifs.h"
  53
  54/**
  55 * is_empty - determine whether a buffer is empty (contains all 0xff).
  56 * @buf: buffer to clean
  57 * @len: length of buffer
  58 *
  59 * This function returns %1 if the buffer is empty (contains all 0xff) otherwise
  60 * %0 is returned.
  61 */
  62static int is_empty(void *buf, int len)
  63{
  64        uint8_t *p = buf;
  65        int i;
  66
  67        for (i = 0; i < len; i++)
  68                if (*p++ != 0xff)
  69                        return 0;
  70        return 1;
  71}
  72
  73/**
  74 * first_non_ff - find offset of the first non-0xff byte.
  75 * @buf: buffer to search in
  76 * @len: length of buffer
  77 *
  78 * This function returns offset of the first non-0xff byte in @buf or %-1 if
  79 * the buffer contains only 0xff bytes.
  80 */
  81static int first_non_ff(void *buf, int len)
  82{
  83        uint8_t *p = buf;
  84        int i;
  85
  86        for (i = 0; i < len; i++)
  87                if (*p++ != 0xff)
  88                        return i;
  89        return -1;
  90}
  91
  92/**
  93 * get_master_node - get the last valid master node allowing for corruption.
  94 * @c: UBIFS file-system description object
  95 * @lnum: LEB number
  96 * @pbuf: buffer containing the LEB read, is returned here
  97 * @mst: master node, if found, is returned here
  98 * @cor: corruption, if found, is returned here
  99 *
 100 * This function allocates a buffer, reads the LEB into it, and finds and
 101 * returns the last valid master node allowing for one area of corruption.
 102 * The corrupt area, if there is one, must be consistent with the assumption
 103 * that it is the result of an unclean unmount while the master node was being
 104 * written. Under those circumstances, it is valid to use the previously written
 105 * master node.
 106 *
 107 * This function returns %0 on success and a negative error code on failure.
 108 */
 109static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
 110                           struct ubifs_mst_node **mst, void **cor)
 111{
 112        const int sz = c->mst_node_alsz;
 113        int err, offs, len;
 114        void *sbuf, *buf;
 115
 116        sbuf = vmalloc(c->leb_size);
 117        if (!sbuf)
 118                return -ENOMEM;
 119
 120        err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size);
 121        if (err && err != -EBADMSG)
 122                goto out_free;
 123
 124        /* Find the first position that is definitely not a node */
 125        offs = 0;
 126        buf = sbuf;
 127        len = c->leb_size;
 128        while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) {
 129                struct ubifs_ch *ch = buf;
 130
 131                if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
 132                        break;
 133                offs += sz;
 134                buf  += sz;
 135                len  -= sz;
 136        }
 137        /* See if there was a valid master node before that */
 138        if (offs) {
 139                int ret;
 140
 141                offs -= sz;
 142                buf  -= sz;
 143                len  += sz;
 144                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 145                if (ret != SCANNED_A_NODE && offs) {
 146                        /* Could have been corruption so check one place back */
 147                        offs -= sz;
 148                        buf  -= sz;
 149                        len  += sz;
 150                        ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 151                        if (ret != SCANNED_A_NODE)
 152                                /*
 153                                 * We accept only one area of corruption because
 154                                 * we are assuming that it was caused while
 155                                 * trying to write a master node.
 156                                 */
 157                                goto out_err;
 158                }
 159                if (ret == SCANNED_A_NODE) {
 160                        struct ubifs_ch *ch = buf;
 161
 162                        if (ch->node_type != UBIFS_MST_NODE)
 163                                goto out_err;
 164                        dbg_rcvry("found a master node at %d:%d", lnum, offs);
 165                        *mst = buf;
 166                        offs += sz;
 167                        buf  += sz;
 168                        len  -= sz;
 169                }
 170        }
 171        /* Check for corruption */
 172        if (offs < c->leb_size) {
 173                if (!is_empty(buf, min_t(int, len, sz))) {
 174                        *cor = buf;
 175                        dbg_rcvry("found corruption at %d:%d", lnum, offs);
 176                }
 177                offs += sz;
 178                buf  += sz;
 179                len  -= sz;
 180        }
 181        /* Check remaining empty space */
 182        if (offs < c->leb_size)
 183                if (!is_empty(buf, len))
 184                        goto out_err;
 185        *pbuf = sbuf;
 186        return 0;
 187
 188out_err:
 189        err = -EINVAL;
 190out_free:
 191        vfree(sbuf);
 192        *mst = NULL;
 193        *cor = NULL;
 194        return err;
 195}
 196
 197/**
 198 * write_rcvrd_mst_node - write recovered master node.
 199 * @c: UBIFS file-system description object
 200 * @mst: master node
 201 *
 202 * This function returns %0 on success and a negative error code on failure.
 203 */
 204static int write_rcvrd_mst_node(struct ubifs_info *c,
 205                                struct ubifs_mst_node *mst)
 206{
 207        int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
 208        __le32 save_flags;
 209
 210        dbg_rcvry("recovery");
 211
 212        save_flags = mst->flags;
 213        mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
 214
 215        ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
 216        err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
 217        if (err)
 218                goto out;
 219        err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM);
 220        if (err)
 221                goto out;
 222out:
 223        mst->flags = save_flags;
 224        return err;
 225}
 226
 227/**
 228 * ubifs_recover_master_node - recover the master node.
 229 * @c: UBIFS file-system description object
 230 *
 231 * This function recovers the master node from corruption that may occur due to
 232 * an unclean unmount.
 233 *
 234 * This function returns %0 on success and a negative error code on failure.
 235 */
 236int ubifs_recover_master_node(struct ubifs_info *c)
 237{
 238        void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL;
 239        struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst;
 240        const int sz = c->mst_node_alsz;
 241        int err, offs1, offs2;
 242
 243        dbg_rcvry("recovery");
 244
 245        err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1);
 246        if (err)
 247                goto out_free;
 248
 249        err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2);
 250        if (err)
 251                goto out_free;
 252
 253        if (mst1) {
 254                offs1 = (void *)mst1 - buf1;
 255                if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) &&
 256                    (offs1 == 0 && !cor1)) {
 257                        /*
 258                         * mst1 was written by recovery at offset 0 with no
 259                         * corruption.
 260                         */
 261                        dbg_rcvry("recovery recovery");
 262                        mst = mst1;
 263                } else if (mst2) {
 264                        offs2 = (void *)mst2 - buf2;
 265                        if (offs1 == offs2) {
 266                                /* Same offset, so must be the same */
 267                                if (memcmp((void *)mst1 + UBIFS_CH_SZ,
 268                                           (void *)mst2 + UBIFS_CH_SZ,
 269                                           UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
 270                                        goto out_err;
 271                                mst = mst1;
 272                        } else if (offs2 + sz == offs1) {
 273                                /* 1st LEB was written, 2nd was not */
 274                                if (cor1)
 275                                        goto out_err;
 276                                mst = mst1;
 277                        } else if (offs1 == 0 && offs2 + sz >= c->leb_size) {
 278                                /* 1st LEB was unmapped and written, 2nd not */
 279                                if (cor1)
 280                                        goto out_err;
 281                                mst = mst1;
 282                        } else
 283                                goto out_err;
 284                } else {
 285                        /*
 286                         * 2nd LEB was unmapped and about to be written, so
 287                         * there must be only one master node in the first LEB
 288                         * and no corruption.
 289                         */
 290                        if (offs1 != 0 || cor1)
 291                                goto out_err;
 292                        mst = mst1;
 293                }
 294        } else {
 295                if (!mst2)
 296                        goto out_err;
 297                /*
 298                 * 1st LEB was unmapped and about to be written, so there must
 299                 * be no room left in 2nd LEB.
 300                 */
 301                offs2 = (void *)mst2 - buf2;
 302                if (offs2 + sz + sz <= c->leb_size)
 303                        goto out_err;
 304                mst = mst2;
 305        }
 306
 307        ubifs_msg("recovered master node from LEB %d",
 308                  (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
 309
 310        memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
 311
 312        if (c->ro_mount) {
 313                /* Read-only mode. Keep a copy for switching to rw mode */
 314                c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
 315                if (!c->rcvrd_mst_node) {
 316                        err = -ENOMEM;
 317                        goto out_free;
 318                }
 319                memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
 320
 321                /*
 322                 * We had to recover the master node, which means there was an
 323                 * unclean reboot. However, it is possible that the master node
 324                 * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
 325                 * E.g., consider the following chain of events:
 326                 *
 327                 * 1. UBIFS was cleanly unmounted, so the master node is clean
 328                 * 2. UBIFS is being mounted R/W and starts changing the master
 329                 *    node in the first (%UBIFS_MST_LNUM). A power cut happens,
 330                 *    so this LEB ends up with some amount of garbage at the
 331                 *    end.
 332                 * 3. UBIFS is being mounted R/O. We reach this place and
 333                 *    recover the master node from the second LEB
 334                 *    (%UBIFS_MST_LNUM + 1). But we cannot update the media
 335                 *    because we are being mounted R/O. We have to defer the
 336                 *    operation.
 337                 * 4. However, this master node (@c->mst_node) is marked as
 338                 *    clean (since the step 1). And if we just return, the
 339                 *    mount code will be confused and won't recover the master
 340                 *    node when it is re-mounter R/W later.
 341                 *
 342                 *    Thus, to force the recovery by marking the master node as
 343                 *    dirty.
 344                 */
 345                c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 346        } else {
 347                /* Write the recovered master node */
 348                c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
 349                err = write_rcvrd_mst_node(c, c->mst_node);
 350                if (err)
 351                        goto out_free;
 352        }
 353
 354        vfree(buf2);
 355        vfree(buf1);
 356
 357        return 0;
 358
 359out_err:
 360        err = -EINVAL;
 361out_free:
 362        ubifs_err("failed to recover master node");
 363        if (mst1) {
 364                dbg_err("dumping first master node");
 365                dbg_dump_node(c, mst1);
 366        }
 367        if (mst2) {
 368                dbg_err("dumping second master node");
 369                dbg_dump_node(c, mst2);
 370        }
 371        vfree(buf2);
 372        vfree(buf1);
 373        return err;
 374}
 375
 376/**
 377 * ubifs_write_rcvrd_mst_node - write the recovered master node.
 378 * @c: UBIFS file-system description object
 379 *
 380 * This function writes the master node that was recovered during mounting in
 381 * read-only mode and must now be written because we are remounting rw.
 382 *
 383 * This function returns %0 on success and a negative error code on failure.
 384 */
 385int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
 386{
 387        int err;
 388
 389        if (!c->rcvrd_mst_node)
 390                return 0;
 391        c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 392        c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 393        err = write_rcvrd_mst_node(c, c->rcvrd_mst_node);
 394        if (err)
 395                return err;
 396        kfree(c->rcvrd_mst_node);
 397        c->rcvrd_mst_node = NULL;
 398        return 0;
 399}
 400
 401/**
 402 * is_last_write - determine if an offset was in the last write to a LEB.
 403 * @c: UBIFS file-system description object
 404 * @buf: buffer to check
 405 * @offs: offset to check
 406 *
 407 * This function returns %1 if @offs was in the last write to the LEB whose data
 408 * is in @buf, otherwise %0 is returned. The determination is made by checking
 409 * for subsequent empty space starting from the next @c->max_write_size
 410 * boundary.
 411 */
 412static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
 413{
 414        int empty_offs, check_len;
 415        uint8_t *p;
 416
 417        /*
 418         * Round up to the next @c->max_write_size boundary i.e. @offs is in
 419         * the last wbuf written. After that should be empty space.
 420         */
 421        empty_offs = ALIGN(offs + 1, c->max_write_size);
 422        check_len = c->leb_size - empty_offs;
 423        p = buf + empty_offs - offs;
 424        return is_empty(p, check_len);
 425}
 426
 427/**
 428 * clean_buf - clean the data from an LEB sitting in a buffer.
 429 * @c: UBIFS file-system description object
 430 * @buf: buffer to clean
 431 * @lnum: LEB number to clean
 432 * @offs: offset from which to clean
 433 * @len: length of buffer
 434 *
 435 * This function pads up to the next min_io_size boundary (if there is one) and
 436 * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
 437 * @c->min_io_size boundary.
 438 */
 439static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
 440                      int *offs, int *len)
 441{
 442        int empty_offs, pad_len;
 443
 444        lnum = lnum;
 445        dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
 446
 447        ubifs_assert(!(*offs & 7));
 448        empty_offs = ALIGN(*offs, c->min_io_size);
 449        pad_len = empty_offs - *offs;
 450        ubifs_pad(c, *buf, pad_len);
 451        *offs += pad_len;
 452        *buf += pad_len;
 453        *len -= pad_len;
 454        memset(*buf, 0xff, c->leb_size - empty_offs);
 455}
 456
 457/**
 458 * no_more_nodes - determine if there are no more nodes in a buffer.
 459 * @c: UBIFS file-system description object
 460 * @buf: buffer to check
 461 * @len: length of buffer
 462 * @lnum: LEB number of the LEB from which @buf was read
 463 * @offs: offset from which @buf was read
 464 *
 465 * This function ensures that the corrupted node at @offs is the last thing
 466 * written to a LEB. This function returns %1 if more data is not found and
 467 * %0 if more data is found.
 468 */
 469static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
 470                        int lnum, int offs)
 471{
 472        struct ubifs_ch *ch = buf;
 473        int skip, dlen = le32_to_cpu(ch->len);
 474
 475        /* Check for empty space after the corrupt node's common header */
 476        skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
 477        if (is_empty(buf + skip, len - skip))
 478                return 1;
 479        /*
 480         * The area after the common header size is not empty, so the common
 481         * header must be intact. Check it.
 482         */
 483        if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) {
 484                dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
 485                return 0;
 486        }
 487        /* Now we know the corrupt node's length we can skip over it */
 488        skip = ALIGN(offs + dlen, c->max_write_size) - offs;
 489        /* After which there should be empty space */
 490        if (is_empty(buf + skip, len - skip))
 491                return 1;
 492        dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip);
 493        return 0;
 494}
 495
 496/**
 497 * fix_unclean_leb - fix an unclean LEB.
 498 * @c: UBIFS file-system description object
 499 * @sleb: scanned LEB information
 500 * @start: offset where scan started
 501 */
 502static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 503                           int start)
 504{
 505        int lnum = sleb->lnum, endpt = start;
 506
 507        /* Get the end offset of the last node we are keeping */
 508        if (!list_empty(&sleb->nodes)) {
 509                struct ubifs_scan_node *snod;
 510
 511                snod = list_entry(sleb->nodes.prev,
 512                                  struct ubifs_scan_node, list);
 513                endpt = snod->offs + snod->len;
 514        }
 515
 516        if (c->ro_mount && !c->remounting_rw) {
 517                /* Add to recovery list */
 518                struct ubifs_unclean_leb *ucleb;
 519
 520                dbg_rcvry("need to fix LEB %d start %d endpt %d",
 521                          lnum, start, sleb->endpt);
 522                ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS);
 523                if (!ucleb)
 524                        return -ENOMEM;
 525                ucleb->lnum = lnum;
 526                ucleb->endpt = endpt;
 527                list_add_tail(&ucleb->list, &c->unclean_leb_list);
 528        } else {
 529                /* Write the fixed LEB back to flash */
 530                int err;
 531
 532                dbg_rcvry("fixing LEB %d start %d endpt %d",
 533                          lnum, start, sleb->endpt);
 534                if (endpt == 0) {
 535                        err = ubifs_leb_unmap(c, lnum);
 536                        if (err)
 537                                return err;
 538                } else {
 539                        int len = ALIGN(endpt, c->min_io_size);
 540
 541                        if (start) {
 542                                err = ubi_read(c->ubi, lnum, sleb->buf, 0,
 543                                               start);
 544                                if (err)
 545                                        return err;
 546                        }
 547                        /* Pad to min_io_size */
 548                        if (len > endpt) {
 549                                int pad_len = len - ALIGN(endpt, 8);
 550
 551                                if (pad_len > 0) {
 552                                        void *buf = sleb->buf + len - pad_len;
 553
 554                                        ubifs_pad(c, buf, pad_len);
 555                                }
 556                        }
 557                        err = ubi_leb_change(c->ubi, lnum, sleb->buf, len,
 558                                             UBI_UNKNOWN);
 559                        if (err)
 560                                return err;
 561                }
 562        }
 563        return 0;
 564}
 565
 566/**
 567 * drop_incomplete_group - drop nodes from an incomplete group.
 568 * @sleb: scanned LEB information
 569 * @offs: offset of dropped nodes is returned here
 570 *
 571 * This function returns %1 if nodes are dropped and %0 otherwise.
 572 */
 573static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
 574{
 575        int dropped = 0;
 576
 577        while (!list_empty(&sleb->nodes)) {
 578                struct ubifs_scan_node *snod;
 579                struct ubifs_ch *ch;
 580
 581                snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
 582                                  list);
 583                ch = snod->node;
 584                if (ch->group_type != UBIFS_IN_NODE_GROUP)
 585                        return dropped;
 586                dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs);
 587                *offs = snod->offs;
 588                list_del(&snod->list);
 589                kfree(snod);
 590                sleb->nodes_cnt -= 1;
 591                dropped = 1;
 592        }
 593        return dropped;
 594}
 595
 596/**
 597 * ubifs_recover_leb - scan and recover a LEB.
 598 * @c: UBIFS file-system description object
 599 * @lnum: LEB number
 600 * @offs: offset
 601 * @sbuf: LEB-sized buffer to use
 602 * @grouped: nodes may be grouped for recovery
 603 *
 604 * This function does a scan of a LEB, but caters for errors that might have
 605 * been caused by the unclean unmount from which we are attempting to recover.
 606 * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is
 607 * found, and a negative error code in case of failure.
 608 */
 609struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
 610                                         int offs, void *sbuf, int grouped)
 611{
 612        int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
 613        int empty_chkd = 0, start = offs;
 614        struct ubifs_scan_leb *sleb;
 615        void *buf = sbuf + offs;
 616
 617        dbg_rcvry("%d:%d", lnum, offs);
 618
 619        sleb = ubifs_start_scan(c, lnum, offs, sbuf);
 620        if (IS_ERR(sleb))
 621                return sleb;
 622
 623        if (sleb->ecc)
 624                need_clean = 1;
 625
 626        while (len >= 8) {
 627                int ret;
 628
 629                dbg_scan("look at LEB %d:%d (%d bytes left)",
 630                         lnum, offs, len);
 631
 632                cond_resched();
 633
 634                /*
 635                 * Scan quietly until there is an error from which we cannot
 636                 * recover
 637                 */
 638                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
 639
 640                if (ret == SCANNED_A_NODE) {
 641                        /* A valid node, and not a padding node */
 642                        struct ubifs_ch *ch = buf;
 643                        int node_len;
 644
 645                        err = ubifs_add_snod(c, sleb, buf, offs);
 646                        if (err)
 647                                goto error;
 648                        node_len = ALIGN(le32_to_cpu(ch->len), 8);
 649                        offs += node_len;
 650                        buf += node_len;
 651                        len -= node_len;
 652                        continue;
 653                }
 654
 655                if (ret > 0) {
 656                        /* Padding bytes or a valid padding node */
 657                        offs += ret;
 658                        buf += ret;
 659                        len -= ret;
 660                        continue;
 661                }
 662
 663                if (ret == SCANNED_EMPTY_SPACE) {
 664                        if (!is_empty(buf, len)) {
 665                                if (!is_last_write(c, buf, offs))
 666                                        break;
 667                                clean_buf(c, &buf, lnum, &offs, &len);
 668                                need_clean = 1;
 669                        }
 670                        empty_chkd = 1;
 671                        break;
 672                }
 673
 674                if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
 675                        if (is_last_write(c, buf, offs)) {
 676                                clean_buf(c, &buf, lnum, &offs, &len);
 677                                need_clean = 1;
 678                                empty_chkd = 1;
 679                                break;
 680                        }
 681
 682                if (ret == SCANNED_A_CORRUPT_NODE)
 683                        if (no_more_nodes(c, buf, len, lnum, offs)) {
 684                                clean_buf(c, &buf, lnum, &offs, &len);
 685                                need_clean = 1;
 686                                empty_chkd = 1;
 687                                break;
 688                        }
 689
 690                if (quiet) {
 691                        /* Redo the last scan but noisily */
 692                        quiet = 0;
 693                        continue;
 694                }
 695
 696                switch (ret) {
 697                case SCANNED_GARBAGE:
 698                        dbg_err("garbage");
 699                        goto corrupted;
 700                case SCANNED_A_CORRUPT_NODE:
 701                case SCANNED_A_BAD_PAD_NODE:
 702                        dbg_err("bad node");
 703                        goto corrupted;
 704                default:
 705                        dbg_err("unknown");
 706                        err = -EINVAL;
 707                        goto error;
 708                }
 709        }
 710
 711        if (!empty_chkd && !is_empty(buf, len)) {
 712                if (is_last_write(c, buf, offs)) {
 713                        clean_buf(c, &buf, lnum, &offs, &len);
 714                        need_clean = 1;
 715                } else {
 716                        int corruption = first_non_ff(buf, len);
 717
 718                        /*
 719                         * See header comment for this file for more
 720                         * explanations about the reasons we have this check.
 721                         */
 722                        ubifs_err("corrupt empty space LEB %d:%d, corruption "
 723                                  "starts at %d", lnum, offs, corruption);
 724                        /* Make sure we dump interesting non-0xFF data */
 725                        offs += corruption;
 726                        buf += corruption;
 727                        goto corrupted;
 728                }
 729        }
 730
 731        /* Drop nodes from incomplete group */
 732        if (grouped && drop_incomplete_group(sleb, &offs)) {
 733                buf = sbuf + offs;
 734                len = c->leb_size - offs;
 735                clean_buf(c, &buf, lnum, &offs, &len);
 736                need_clean = 1;
 737        }
 738
 739        if (offs % c->min_io_size) {
 740                clean_buf(c, &buf, lnum, &offs, &len);
 741                need_clean = 1;
 742        }
 743
 744        ubifs_end_scan(c, sleb, lnum, offs);
 745
 746        if (need_clean) {
 747                err = fix_unclean_leb(c, sleb, start);
 748                if (err)
 749                        goto error;
 750        }
 751
 752        return sleb;
 753
 754corrupted:
 755        ubifs_scanned_corruption(c, lnum, offs, buf);
 756        err = -EUCLEAN;
 757error:
 758        ubifs_err("LEB %d scanning failed", lnum);
 759        ubifs_scan_destroy(sleb);
 760        return ERR_PTR(err);
 761}
 762
 763/**
 764 * get_cs_sqnum - get commit start sequence number.
 765 * @c: UBIFS file-system description object
 766 * @lnum: LEB number of commit start node
 767 * @offs: offset of commit start node
 768 * @cs_sqnum: commit start sequence number is returned here
 769 *
 770 * This function returns %0 on success and a negative error code on failure.
 771 */
 772static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
 773                        unsigned long long *cs_sqnum)
 774{
 775        struct ubifs_cs_node *cs_node = NULL;
 776        int err, ret;
 777
 778        dbg_rcvry("at %d:%d", lnum, offs);
 779        cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL);
 780        if (!cs_node)
 781                return -ENOMEM;
 782        if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
 783                goto out_err;
 784        err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ);
 785        if (err && err != -EBADMSG)
 786                goto out_free;
 787        ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
 788        if (ret != SCANNED_A_NODE) {
 789                dbg_err("Not a valid node");
 790                goto out_err;
 791        }
 792        if (cs_node->ch.node_type != UBIFS_CS_NODE) {
 793                dbg_err("Node a CS node, type is %d", cs_node->ch.node_type);
 794                goto out_err;
 795        }
 796        if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
 797                dbg_err("CS node cmt_no %llu != current cmt_no %llu",
 798                        (unsigned long long)le64_to_cpu(cs_node->cmt_no),
 799                        c->cmt_no);
 800                goto out_err;
 801        }
 802        *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
 803        dbg_rcvry("commit start sqnum %llu", *cs_sqnum);
 804        kfree(cs_node);
 805        return 0;
 806
 807out_err:
 808        err = -EINVAL;
 809out_free:
 810        ubifs_err("failed to get CS sqnum");
 811        kfree(cs_node);
 812        return err;
 813}
 814
 815/**
 816 * ubifs_recover_log_leb - scan and recover a log LEB.
 817 * @c: UBIFS file-system description object
 818 * @lnum: LEB number
 819 * @offs: offset
 820 * @sbuf: LEB-sized buffer to use
 821 *
 822 * This function does a scan of a LEB, but caters for errors that might have
 823 * been caused by unclean reboots from which we are attempting to recover
 824 * (assume that only the last log LEB can be corrupted by an unclean reboot).
 825 *
 826 * This function returns %0 on success and a negative error code on failure.
 827 */
 828struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
 829                                             int offs, void *sbuf)
 830{
 831        struct ubifs_scan_leb *sleb;
 832        int next_lnum;
 833
 834        dbg_rcvry("LEB %d", lnum);
 835        next_lnum = lnum + 1;
 836        if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs)
 837                next_lnum = UBIFS_LOG_LNUM;
 838        if (next_lnum != c->ltail_lnum) {
 839                /*
 840                 * We can only recover at the end of the log, so check that the
 841                 * next log LEB is empty or out of date.
 842                 */
 843                sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0);
 844                if (IS_ERR(sleb))
 845                        return sleb;
 846                if (sleb->nodes_cnt) {
 847                        struct ubifs_scan_node *snod;
 848                        unsigned long long cs_sqnum = c->cs_sqnum;
 849
 850                        snod = list_entry(sleb->nodes.next,
 851                                          struct ubifs_scan_node, list);
 852                        if (cs_sqnum == 0) {
 853                                int err;
 854
 855                                err = get_cs_sqnum(c, lnum, offs, &cs_sqnum);
 856                                if (err) {
 857                                        ubifs_scan_destroy(sleb);
 858                                        return ERR_PTR(err);
 859                                }
 860                        }
 861                        if (snod->sqnum > cs_sqnum) {
 862                                ubifs_err("unrecoverable log corruption "
 863                                          "in LEB %d", lnum);
 864                                ubifs_scan_destroy(sleb);
 865                                return ERR_PTR(-EUCLEAN);
 866                        }
 867                }
 868                ubifs_scan_destroy(sleb);
 869        }
 870        return ubifs_recover_leb(c, lnum, offs, sbuf, 0);
 871}
 872
 873/**
 874 * recover_head - recover a head.
 875 * @c: UBIFS file-system description object
 876 * @lnum: LEB number of head to recover
 877 * @offs: offset of head to recover
 878 * @sbuf: LEB-sized buffer to use
 879 *
 880 * This function ensures that there is no data on the flash at a head location.
 881 *
 882 * This function returns %0 on success and a negative error code on failure.
 883 */
 884static int recover_head(const struct ubifs_info *c, int lnum, int offs,
 885                        void *sbuf)
 886{
 887        int len = c->max_write_size, err;
 888
 889        if (offs + len > c->leb_size)
 890                len = c->leb_size - offs;
 891
 892        if (!len)
 893                return 0;
 894
 895        /* Read at the head location and check it is empty flash */
 896        err = ubi_read(c->ubi, lnum, sbuf, offs, len);
 897        if (err || !is_empty(sbuf, len)) {
 898                dbg_rcvry("cleaning head at %d:%d", lnum, offs);
 899                if (offs == 0)
 900                        return ubifs_leb_unmap(c, lnum);
 901                err = ubi_read(c->ubi, lnum, sbuf, 0, offs);
 902                if (err)
 903                        return err;
 904                return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN);
 905        }
 906
 907        return 0;
 908}
 909
 910/**
 911 * ubifs_recover_inl_heads - recover index and LPT heads.
 912 * @c: UBIFS file-system description object
 913 * @sbuf: LEB-sized buffer to use
 914 *
 915 * This function ensures that there is no data on the flash at the index and
 916 * LPT head locations.
 917 *
 918 * This deals with the recovery of a half-completed journal commit. UBIFS is
 919 * careful never to overwrite the last version of the index or the LPT. Because
 920 * the index and LPT are wandering trees, data from a half-completed commit will
 921 * not be referenced anywhere in UBIFS. The data will be either in LEBs that are
 922 * assumed to be empty and will be unmapped anyway before use, or in the index
 923 * and LPT heads.
 924 *
 925 * This function returns %0 on success and a negative error code on failure.
 926 */
 927int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
 928{
 929        int err;
 930
 931        ubifs_assert(!c->ro_mount || c->remounting_rw);
 932
 933        dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
 934        err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
 935        if (err)
 936                return err;
 937
 938        dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
 939        err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
 940        if (err)
 941                return err;
 942
 943        return 0;
 944}
 945
 946/**
 947 *  clean_an_unclean_leb - read and write a LEB to remove corruption.
 948 * @c: UBIFS file-system description object
 949 * @ucleb: unclean LEB information
 950 * @sbuf: LEB-sized buffer to use
 951 *
 952 * This function reads a LEB up to a point pre-determined by the mount recovery,
 953 * checks the nodes, and writes the result back to the flash, thereby cleaning
 954 * off any following corruption, or non-fatal ECC errors.
 955 *
 956 * This function returns %0 on success and a negative error code on failure.
 957 */
 958static int clean_an_unclean_leb(const struct ubifs_info *c,
 959                                struct ubifs_unclean_leb *ucleb, void *sbuf)
 960{
 961        int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
 962        void *buf = sbuf;
 963
 964        dbg_rcvry("LEB %d len %d", lnum, len);
 965
 966        if (len == 0) {
 967                /* Nothing to read, just unmap it */
 968                err = ubifs_leb_unmap(c, lnum);
 969                if (err)
 970                        return err;
 971                return 0;
 972        }
 973
 974        err = ubi_read(c->ubi, lnum, buf, offs, len);
 975        if (err && err != -EBADMSG)
 976                return err;
 977
 978        while (len >= 8) {
 979                int ret;
 980
 981                cond_resched();
 982
 983                /* Scan quietly until there is an error */
 984                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
 985
 986                if (ret == SCANNED_A_NODE) {
 987                        /* A valid node, and not a padding node */
 988                        struct ubifs_ch *ch = buf;
 989                        int node_len;
 990
 991                        node_len = ALIGN(le32_to_cpu(ch->len), 8);
 992                        offs += node_len;
 993                        buf += node_len;
 994                        len -= node_len;
 995                        continue;
 996                }
 997
 998                if (ret > 0) {
 999                        /* Padding bytes or a valid padding node */
1000                        offs += ret;
1001                        buf += ret;
1002                        len -= ret;
1003                        continue;
1004                }
1005
1006                if (ret == SCANNED_EMPTY_SPACE) {
1007                        ubifs_err("unexpected empty space at %d:%d",
1008                                  lnum, offs);
1009                        return -EUCLEAN;
1010                }
1011
1012                if (quiet) {
1013                        /* Redo the last scan but noisily */
1014                        quiet = 0;
1015                        continue;
1016                }
1017
1018                ubifs_scanned_corruption(c, lnum, offs, buf);
1019                return -EUCLEAN;
1020        }
1021
1022        /* Pad to min_io_size */
1023        len = ALIGN(ucleb->endpt, c->min_io_size);
1024        if (len > ucleb->endpt) {
1025                int pad_len = len - ALIGN(ucleb->endpt, 8);
1026
1027                if (pad_len > 0) {
1028                        buf = c->sbuf + len - pad_len;
1029                        ubifs_pad(c, buf, pad_len);
1030                }
1031        }
1032
1033        /* Write back the LEB atomically */
1034        err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN);
1035        if (err)
1036                return err;
1037
1038        dbg_rcvry("cleaned LEB %d", lnum);
1039
1040        return 0;
1041}
1042
1043/**
1044 * ubifs_clean_lebs - clean LEBs recovered during read-only mount.
1045 * @c: UBIFS file-system description object
1046 * @sbuf: LEB-sized buffer to use
1047 *
1048 * This function cleans a LEB identified during recovery that needs to be
1049 * written but was not because UBIFS was mounted read-only. This happens when
1050 * remounting to read-write mode.
1051 *
1052 * This function returns %0 on success and a negative error code on failure.
1053 */
1054int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
1055{
1056        dbg_rcvry("recovery");
1057        while (!list_empty(&c->unclean_leb_list)) {
1058                struct ubifs_unclean_leb *ucleb;
1059                int err;
1060
1061                ucleb = list_entry(c->unclean_leb_list.next,
1062                                   struct ubifs_unclean_leb, list);
1063                err = clean_an_unclean_leb(c, ucleb, sbuf);
1064                if (err)
1065                        return err;
1066                list_del(&ucleb->list);
1067                kfree(ucleb);
1068        }
1069        return 0;
1070}
1071
1072/**
1073 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1074 * @c: UBIFS file-system description object
1075 *
1076 * Out-of-place garbage collection requires always one empty LEB with which to
1077 * start garbage collection. The LEB number is recorded in c->gc_lnum and is
1078 * written to the master node on unmounting. In the case of an unclean unmount
1079 * the value of gc_lnum recorded in the master node is out of date and cannot
1080 * be used. Instead, recovery must allocate an empty LEB for this purpose.
1081 * However, there may not be enough empty space, in which case it must be
1082 * possible to GC the dirtiest LEB into the GC head LEB.
1083 *
1084 * This function also runs the commit which causes the TNC updates from
1085 * size-recovery and orphans to be written to the flash. That is important to
1086 * ensure correct replay order for subsequent mounts.
1087 *
1088 * This function returns %0 on success and a negative error code on failure.
1089 */
1090int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1091{
1092        struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1093        struct ubifs_lprops lp;
1094        int lnum, err;
1095
1096        c->gc_lnum = -1;
1097        if (wbuf->lnum == -1) {
1098                dbg_rcvry("no GC head LEB");
1099                goto find_free;
1100        }
1101        /*
1102         * See whether the used space in the dirtiest LEB fits in the GC head
1103         * LEB.
1104         */
1105        if (wbuf->offs == c->leb_size) {
1106                dbg_rcvry("no room in GC head LEB");
1107                goto find_free;
1108        }
1109        err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1110        if (err) {
1111                /*
1112                 * There are no dirty or empty LEBs subject to here being
1113                 * enough for the index. Try to use
1114                 * 'ubifs_find_free_leb_for_idx()', which will return any empty
1115                 * LEBs (ignoring index requirements). If the index then
1116                 * doesn't have enough LEBs the recovery commit will fail -
1117                 * which is the  same result anyway i.e. recovery fails. So
1118                 * there is no problem ignoring index  requirements and just
1119                 * grabbing a free LEB since we have already established there
1120                 * is not a dirty LEB we could have used instead.
1121                 */
1122                if (err == -ENOSPC) {
1123                        dbg_rcvry("could not find a dirty LEB");
1124                        goto find_free;
1125                }
1126                return err;
1127        }
1128        ubifs_assert(!(lp.flags & LPROPS_INDEX));
1129        lnum = lp.lnum;
1130        if (lp.free + lp.dirty == c->leb_size) {
1131                /* An empty LEB was returned */
1132                if (lp.free != c->leb_size) {
1133                        err = ubifs_change_one_lp(c, lnum, c->leb_size,
1134                                                  0, 0, 0, 0);
1135                        if (err)
1136                                return err;
1137                }
1138                err = ubifs_leb_unmap(c, lnum);
1139                if (err)
1140                        return err;
1141                c->gc_lnum = lnum;
1142                dbg_rcvry("allocated LEB %d for GC", lnum);
1143                /* Run the commit */
1144                dbg_rcvry("committing");
1145                return ubifs_run_commit(c);
1146        }
1147        /*
1148         * There was no empty LEB so the used space in the dirtiest LEB must fit
1149         * in the GC head LEB.
1150         */
1151        if (lp.free + lp.dirty < wbuf->offs) {
1152                dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
1153                          lnum, wbuf->lnum, wbuf->offs);
1154                err = ubifs_return_leb(c, lnum);
1155                if (err)
1156                        return err;
1157                goto find_free;
1158        }
1159        /*
1160         * We run the commit before garbage collection otherwise subsequent
1161         * mounts will see the GC and orphan deletion in a different order.
1162         */
1163        dbg_rcvry("committing");
1164        err = ubifs_run_commit(c);
1165        if (err)
1166                return err;
1167        /*
1168         * The data in the dirtiest LEB fits in the GC head LEB, so do the GC
1169         * - use locking to keep 'ubifs_assert()' happy.
1170         */
1171        dbg_rcvry("GC'ing LEB %d", lnum);
1172        mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1173        err = ubifs_garbage_collect_leb(c, &lp);
1174        if (err >= 0) {
1175                int err2 = ubifs_wbuf_sync_nolock(wbuf);
1176
1177                if (err2)
1178                        err = err2;
1179        }
1180        mutex_unlock(&wbuf->io_mutex);
1181        if (err < 0) {
1182                dbg_err("GC failed, error %d", err);
1183                if (err == -EAGAIN)
1184                        err = -EINVAL;
1185                return err;
1186        }
1187        if (err != LEB_RETAINED) {
1188                dbg_err("GC returned %d", err);
1189                return -EINVAL;
1190        }
1191        err = ubifs_leb_unmap(c, c->gc_lnum);
1192        if (err)
1193                return err;
1194        dbg_rcvry("allocated LEB %d for GC", lnum);
1195        return 0;
1196
1197find_free:
1198        /*
1199         * There is no GC head LEB or the free space in the GC head LEB is too
1200         * small, or there are not dirty LEBs. Allocate gc_lnum by calling
1201         * 'ubifs_find_free_leb_for_idx()' so GC is not run.
1202         */
1203        lnum = ubifs_find_free_leb_for_idx(c);
1204        if (lnum < 0) {
1205                dbg_err("could not find an empty LEB");
1206                return lnum;
1207        }
1208        /* And reset the index flag */
1209        err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1210                                  LPROPS_INDEX, 0);
1211        if (err)
1212                return err;
1213        c->gc_lnum = lnum;
1214        dbg_rcvry("allocated LEB %d for GC", lnum);
1215        /* Run the commit */
1216        dbg_rcvry("committing");
1217        return ubifs_run_commit(c);
1218}
1219
1220/**
1221 * struct size_entry - inode size information for recovery.
1222 * @rb: link in the RB-tree of sizes
1223 * @inum: inode number
1224 * @i_size: size on inode
1225 * @d_size: maximum size based on data nodes
1226 * @exists: indicates whether the inode exists
1227 * @inode: inode if pinned in memory awaiting rw mode to fix it
1228 */
1229struct size_entry {
1230        struct rb_node rb;
1231        ino_t inum;
1232        loff_t i_size;
1233        loff_t d_size;
1234        int exists;
1235        struct inode *inode;
1236};
1237
1238/**
1239 * add_ino - add an entry to the size tree.
1240 * @c: UBIFS file-system description object
1241 * @inum: inode number
1242 * @i_size: size on inode
1243 * @d_size: maximum size based on data nodes
1244 * @exists: indicates whether the inode exists
1245 */
1246static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size,
1247                   loff_t d_size, int exists)
1248{
1249        struct rb_node **p = &c->size_tree.rb_node, *parent = NULL;
1250        struct size_entry *e;
1251
1252        while (*p) {
1253                parent = *p;
1254                e = rb_entry(parent, struct size_entry, rb);
1255                if (inum < e->inum)
1256                        p = &(*p)->rb_left;
1257                else
1258                        p = &(*p)->rb_right;
1259        }
1260
1261        e = kzalloc(sizeof(struct size_entry), GFP_KERNEL);
1262        if (!e)
1263                return -ENOMEM;
1264
1265        e->inum = inum;
1266        e->i_size = i_size;
1267        e->d_size = d_size;
1268        e->exists = exists;
1269
1270        rb_link_node(&e->rb, parent, p);
1271        rb_insert_color(&e->rb, &c->size_tree);
1272
1273        return 0;
1274}
1275
1276/**
1277 * find_ino - find an entry on the size tree.
1278 * @c: UBIFS file-system description object
1279 * @inum: inode number
1280 */
1281static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum)
1282{
1283        struct rb_node *p = c->size_tree.rb_node;
1284        struct size_entry *e;
1285
1286        while (p) {
1287                e = rb_entry(p, struct size_entry, rb);
1288                if (inum < e->inum)
1289                        p = p->rb_left;
1290                else if (inum > e->inum)
1291                        p = p->rb_right;
1292                else
1293                        return e;
1294        }
1295        return NULL;
1296}
1297
1298/**
1299 * remove_ino - remove an entry from the size tree.
1300 * @c: UBIFS file-system description object
1301 * @inum: inode number
1302 */
1303static void remove_ino(struct ubifs_info *c, ino_t inum)
1304{
1305        struct size_entry *e = find_ino(c, inum);
1306
1307        if (!e)
1308                return;
1309        rb_erase(&e->rb, &c->size_tree);
1310        kfree(e);
1311}
1312
1313/**
1314 * ubifs_destroy_size_tree - free resources related to the size tree.
1315 * @c: UBIFS file-system description object
1316 */
1317void ubifs_destroy_size_tree(struct ubifs_info *c)
1318{
1319        struct rb_node *this = c->size_tree.rb_node;
1320        struct size_entry *e;
1321
1322        while (this) {
1323                if (this->rb_left) {
1324                        this = this->rb_left;
1325                        continue;
1326                } else if (this->rb_right) {
1327                        this = this->rb_right;
1328                        continue;
1329                }
1330                e = rb_entry(this, struct size_entry, rb);
1331                if (e->inode)
1332                        iput(e->inode);
1333                this = rb_parent(this);
1334                if (this) {
1335                        if (this->rb_left == &e->rb)
1336                                this->rb_left = NULL;
1337                        else
1338                                this->rb_right = NULL;
1339                }
1340                kfree(e);
1341        }
1342        c->size_tree = RB_ROOT;
1343}
1344
1345/**
1346 * ubifs_recover_size_accum - accumulate inode sizes for recovery.
1347 * @c: UBIFS file-system description object
1348 * @key: node key
1349 * @deletion: node is for a deletion
1350 * @new_size: inode size
1351 *
1352 * This function has two purposes:
1353 *     1) to ensure there are no data nodes that fall outside the inode size
1354 *     2) to ensure there are no data nodes for inodes that do not exist
1355 * To accomplish those purposes, a rb-tree is constructed containing an entry
1356 * for each inode number in the journal that has not been deleted, and recording
1357 * the size from the inode node, the maximum size of any data node (also altered
1358 * by truncations) and a flag indicating a inode number for which no inode node
1359 * was present in the journal.
1360 *
1361 * Note that there is still the possibility that there are data nodes that have
1362 * been committed that are beyond the inode size, however the only way to find
1363 * them would be to scan the entire index. Alternatively, some provision could
1364 * be made to record the size of inodes at the start of commit, which would seem
1365 * very cumbersome for a scenario that is quite unlikely and the only negative
1366 * consequence of which is wasted space.
1367 *
1368 * This functions returns %0 on success and a negative error code on failure.
1369 */
1370int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
1371                             int deletion, loff_t new_size)
1372{
1373        ino_t inum = key_inum(c, key);
1374        struct size_entry *e;
1375        int err;
1376
1377        switch (key_type(c, key)) {
1378        case UBIFS_INO_KEY:
1379                if (deletion)
1380                        remove_ino(c, inum);
1381                else {
1382                        e = find_ino(c, inum);
1383                        if (e) {
1384                                e->i_size = new_size;
1385                                e->exists = 1;
1386                        } else {
1387                                err = add_ino(c, inum, new_size, 0, 1);
1388                                if (err)
1389                                        return err;
1390                        }
1391                }
1392                break;
1393        case UBIFS_DATA_KEY:
1394                e = find_ino(c, inum);
1395                if (e) {
1396                        if (new_size > e->d_size)
1397                                e->d_size = new_size;
1398                } else {
1399                        err = add_ino(c, inum, 0, new_size, 0);
1400                        if (err)
1401                                return err;
1402                }
1403                break;
1404        case UBIFS_TRUN_KEY:
1405                e = find_ino(c, inum);
1406                if (e)
1407                        e->d_size = new_size;
1408                break;
1409        }
1410        return 0;
1411}
1412
1413/**
1414 * fix_size_in_place - fix inode size in place on flash.
1415 * @c: UBIFS file-system description object
1416 * @e: inode size information for recovery
1417 */
1418static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1419{
1420        struct ubifs_ino_node *ino = c->sbuf;
1421        unsigned char *p;
1422        union ubifs_key key;
1423        int err, lnum, offs, len;
1424        loff_t i_size;
1425        uint32_t crc;
1426
1427        /* Locate the inode node LEB number and offset */
1428        ino_key_init(c, &key, e->inum);
1429        err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs);
1430        if (err)
1431                goto out;
1432        /*
1433         * If the size recorded on the inode node is greater than the size that
1434         * was calculated from nodes in the journal then don't change the inode.
1435         */
1436        i_size = le64_to_cpu(ino->size);
1437        if (i_size >= e->d_size)
1438                return 0;
1439        /* Read the LEB */
1440        err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size);
1441        if (err)
1442                goto out;
1443        /* Change the size field and recalculate the CRC */
1444        ino = c->sbuf + offs;
1445        ino->size = cpu_to_le64(e->d_size);
1446        len = le32_to_cpu(ino->ch.len);
1447        crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8);
1448        ino->ch.crc = cpu_to_le32(crc);
1449        /* Work out where data in the LEB ends and free space begins */
1450        p = c->sbuf;
1451        len = c->leb_size - 1;
1452        while (p[len] == 0xff)
1453                len -= 1;
1454        len = ALIGN(len + 1, c->min_io_size);
1455        /* Atomically write the fixed LEB back again */
1456        err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
1457        if (err)
1458                goto out;
1459        dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ",
1460                  (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
1461        return 0;
1462
1463out:
1464        ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d",
1465                   (unsigned long)e->inum, e->i_size, e->d_size, err);
1466        return err;
1467}
1468
1469/**
1470 * ubifs_recover_size - recover inode size.
1471 * @c: UBIFS file-system description object
1472 *
1473 * This function attempts to fix inode size discrepancies identified by the
1474 * 'ubifs_recover_size_accum()' function.
1475 *
1476 * This functions returns %0 on success and a negative error code on failure.
1477 */
1478int ubifs_recover_size(struct ubifs_info *c)
1479{
1480        struct rb_node *this = rb_first(&c->size_tree);
1481
1482        while (this) {
1483                struct size_entry *e;
1484                int err;
1485
1486                e = rb_entry(this, struct size_entry, rb);
1487                if (!e->exists) {
1488                        union ubifs_key key;
1489
1490                        ino_key_init(c, &key, e->inum);
1491                        err = ubifs_tnc_lookup(c, &key, c->sbuf);
1492                        if (err && err != -ENOENT)
1493                                return err;
1494                        if (err == -ENOENT) {
1495                                /* Remove data nodes that have no inode */
1496                                dbg_rcvry("removing ino %lu",
1497                                          (unsigned long)e->inum);
1498                                err = ubifs_tnc_remove_ino(c, e->inum);
1499                                if (err)
1500                                        return err;
1501                        } else {
1502                                struct ubifs_ino_node *ino = c->sbuf;
1503
1504                                e->exists = 1;
1505                                e->i_size = le64_to_cpu(ino->size);
1506                        }
1507                }
1508                if (e->exists && e->i_size < e->d_size) {
1509                        if (!e->inode && c->ro_mount) {
1510                                /* Fix the inode size and pin it in memory */
1511                                struct inode *inode;
1512
1513                                inode = ubifs_iget(c->vfs_sb, e->inum);
1514                                if (IS_ERR(inode))
1515                                        return PTR_ERR(inode);
1516                                if (inode->i_size < e->d_size) {
1517                                        dbg_rcvry("ino %lu size %lld -> %lld",
1518                                                  (unsigned long)e->inum,
1519                                                  e->d_size, inode->i_size);
1520                                        inode->i_size = e->d_size;
1521                                        ubifs_inode(inode)->ui_size = e->d_size;
1522                                        e->inode = inode;
1523                                        this = rb_next(this);
1524                                        continue;
1525                                }
1526                                iput(inode);
1527                        } else {
1528                                /* Fix the size in place */
1529                                err = fix_size_in_place(c, e);
1530                                if (err)
1531                                        return err;
1532                                if (e->inode)
1533                                        iput(e->inode);
1534                        }
1535                }
1536                this = rb_next(this);
1537                rb_erase(&e->rb, &c->size_tree);
1538                kfree(e);
1539        }
1540        return 0;
1541}
1542