linux/fs/ubifs/recovery.c
<<
>>
Prefs
   1/*
   2 * This file is part of UBIFS.
   3 *
   4 * Copyright (C) 2006-2008 Nokia Corporation
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published by
   8 * the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 *
  15 * You should have received a copy of the GNU General Public License along with
  16 * this program; if not, write to the Free Software Foundation, Inc., 51
  17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  18 *
  19 * Authors: Adrian Hunter
  20 *          Artem Bityutskiy (\xD0\x91\xD0\xB8\xD1\x82\xD1\x8E\xD1\x86\xD0\xBA\xD0\xB8\xD0\xB9 \xD0\x90\xD1\x80\xD1\x82\xD1\x91\xD0\xBC)
  21 */
  22
  23/*
  24 * This file implements functions needed to recover from unclean un-mounts.
  25 * When UBIFS is mounted, it checks a flag on the master node to determine if
  26 * an un-mount was completed successfully. If not, the process of mounting
  27 * incorporates additional checking and fixing of on-flash data structures.
  28 * UBIFS always cleans away all remnants of an unclean un-mount, so that
  29 * errors do not accumulate. However UBIFS defers recovery if it is mounted
  30 * read-only, and the flash is not modified in that case.
  31 */
  32
  33#include <linux/crc32.h>
  34#include <linux/slab.h>
  35#include "ubifs.h"
  36
  37/**
  38 * is_empty - determine whether a buffer is empty (contains all 0xff).
  39 * @buf: buffer to clean
  40 * @len: length of buffer
  41 *
  42 * This function returns %1 if the buffer is empty (contains all 0xff) otherwise
  43 * %0 is returned.
  44 */
  45static int is_empty(void *buf, int len)
  46{
  47        uint8_t *p = buf;
  48        int i;
  49
  50        for (i = 0; i < len; i++)
  51                if (*p++ != 0xff)
  52                        return 0;
  53        return 1;
  54}
  55
  56/**
  57 * first_non_ff - find offset of the first non-0xff byte.
  58 * @buf: buffer to search in
  59 * @len: length of buffer
  60 *
  61 * This function returns offset of the first non-0xff byte in @buf or %-1 if
  62 * the buffer contains only 0xff bytes.
  63 */
  64static int first_non_ff(void *buf, int len)
  65{
  66        uint8_t *p = buf;
  67        int i;
  68
  69        for (i = 0; i < len; i++)
  70                if (*p++ != 0xff)
  71                        return i;
  72        return -1;
  73}
  74
  75/**
  76 * get_master_node - get the last valid master node allowing for corruption.
  77 * @c: UBIFS file-system description object
  78 * @lnum: LEB number
  79 * @pbuf: buffer containing the LEB read, is returned here
  80 * @mst: master node, if found, is returned here
  81 * @cor: corruption, if found, is returned here
  82 *
  83 * This function allocates a buffer, reads the LEB into it, and finds and
  84 * returns the last valid master node allowing for one area of corruption.
  85 * The corrupt area, if there is one, must be consistent with the assumption
  86 * that it is the result of an unclean unmount while the master node was being
  87 * written. Under those circumstances, it is valid to use the previously written
  88 * master node.
  89 *
  90 * This function returns %0 on success and a negative error code on failure.
  91 */
  92static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
  93                           struct ubifs_mst_node **mst, void **cor)
  94{
  95        const int sz = c->mst_node_alsz;
  96        int err, offs, len;
  97        void *sbuf, *buf;
  98
  99        sbuf = vmalloc(c->leb_size);
 100        if (!sbuf)
 101                return -ENOMEM;
 102
 103        err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size);
 104        if (err && err != -EBADMSG)
 105                goto out_free;
 106
 107        /* Find the first position that is definitely not a node */
 108        offs = 0;
 109        buf = sbuf;
 110        len = c->leb_size;
 111        while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) {
 112                struct ubifs_ch *ch = buf;
 113
 114                if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
 115                        break;
 116                offs += sz;
 117                buf  += sz;
 118                len  -= sz;
 119        }
 120        /* See if there was a valid master node before that */
 121        if (offs) {
 122                int ret;
 123
 124                offs -= sz;
 125                buf  -= sz;
 126                len  += sz;
 127                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 128                if (ret != SCANNED_A_NODE && offs) {
 129                        /* Could have been corruption so check one place back */
 130                        offs -= sz;
 131                        buf  -= sz;
 132                        len  += sz;
 133                        ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 134                        if (ret != SCANNED_A_NODE)
 135                                /*
 136                                 * We accept only one area of corruption because
 137                                 * we are assuming that it was caused while
 138                                 * trying to write a master node.
 139                                 */
 140                                goto out_err;
 141                }
 142                if (ret == SCANNED_A_NODE) {
 143                        struct ubifs_ch *ch = buf;
 144
 145                        if (ch->node_type != UBIFS_MST_NODE)
 146                                goto out_err;
 147                        dbg_rcvry("found a master node at %d:%d", lnum, offs);
 148                        *mst = buf;
 149                        offs += sz;
 150                        buf  += sz;
 151                        len  -= sz;
 152                }
 153        }
 154        /* Check for corruption */
 155        if (offs < c->leb_size) {
 156                if (!is_empty(buf, min_t(int, len, sz))) {
 157                        *cor = buf;
 158                        dbg_rcvry("found corruption at %d:%d", lnum, offs);
 159                }
 160                offs += sz;
 161                buf  += sz;
 162                len  -= sz;
 163        }
 164        /* Check remaining empty space */
 165        if (offs < c->leb_size)
 166                if (!is_empty(buf, len))
 167                        goto out_err;
 168        *pbuf = sbuf;
 169        return 0;
 170
 171out_err:
 172        err = -EINVAL;
 173out_free:
 174        vfree(sbuf);
 175        *mst = NULL;
 176        *cor = NULL;
 177        return err;
 178}
 179
 180/**
 181 * write_rcvrd_mst_node - write recovered master node.
 182 * @c: UBIFS file-system description object
 183 * @mst: master node
 184 *
 185 * This function returns %0 on success and a negative error code on failure.
 186 */
 187static int write_rcvrd_mst_node(struct ubifs_info *c,
 188                                struct ubifs_mst_node *mst)
 189{
 190        int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
 191        __le32 save_flags;
 192
 193        dbg_rcvry("recovery");
 194
 195        save_flags = mst->flags;
 196        mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
 197
 198        ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
 199        err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
 200        if (err)
 201                goto out;
 202        err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM);
 203        if (err)
 204                goto out;
 205out:
 206        mst->flags = save_flags;
 207        return err;
 208}
 209
 210/**
 211 * ubifs_recover_master_node - recover the master node.
 212 * @c: UBIFS file-system description object
 213 *
 214 * This function recovers the master node from corruption that may occur due to
 215 * an unclean unmount.
 216 *
 217 * This function returns %0 on success and a negative error code on failure.
 218 */
 219int ubifs_recover_master_node(struct ubifs_info *c)
 220{
 221        void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL;
 222        struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst;
 223        const int sz = c->mst_node_alsz;
 224        int err, offs1, offs2;
 225
 226        dbg_rcvry("recovery");
 227
 228        err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1);
 229        if (err)
 230                goto out_free;
 231
 232        err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2);
 233        if (err)
 234                goto out_free;
 235
 236        if (mst1) {
 237                offs1 = (void *)mst1 - buf1;
 238                if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) &&
 239                    (offs1 == 0 && !cor1)) {
 240                        /*
 241                         * mst1 was written by recovery at offset 0 with no
 242                         * corruption.
 243                         */
 244                        dbg_rcvry("recovery recovery");
 245                        mst = mst1;
 246                } else if (mst2) {
 247                        offs2 = (void *)mst2 - buf2;
 248                        if (offs1 == offs2) {
 249                                /* Same offset, so must be the same */
 250                                if (memcmp((void *)mst1 + UBIFS_CH_SZ,
 251                                           (void *)mst2 + UBIFS_CH_SZ,
 252                                           UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
 253                                        goto out_err;
 254                                mst = mst1;
 255                        } else if (offs2 + sz == offs1) {
 256                                /* 1st LEB was written, 2nd was not */
 257                                if (cor1)
 258                                        goto out_err;
 259                                mst = mst1;
 260                        } else if (offs1 == 0 && offs2 + sz >= c->leb_size) {
 261                                /* 1st LEB was unmapped and written, 2nd not */
 262                                if (cor1)
 263                                        goto out_err;
 264                                mst = mst1;
 265                        } else
 266                                goto out_err;
 267                } else {
 268                        /*
 269                         * 2nd LEB was unmapped and about to be written, so
 270                         * there must be only one master node in the first LEB
 271                         * and no corruption.
 272                         */
 273                        if (offs1 != 0 || cor1)
 274                                goto out_err;
 275                        mst = mst1;
 276                }
 277        } else {
 278                if (!mst2)
 279                        goto out_err;
 280                /*
 281                 * 1st LEB was unmapped and about to be written, so there must
 282                 * be no room left in 2nd LEB.
 283                 */
 284                offs2 = (void *)mst2 - buf2;
 285                if (offs2 + sz + sz <= c->leb_size)
 286                        goto out_err;
 287                mst = mst2;
 288        }
 289
 290        ubifs_msg("recovered master node from LEB %d",
 291                  (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
 292
 293        memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
 294
 295        if (c->ro_mount) {
 296                /* Read-only mode. Keep a copy for switching to rw mode */
 297                c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
 298                if (!c->rcvrd_mst_node) {
 299                        err = -ENOMEM;
 300                        goto out_free;
 301                }
 302                memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
 303
 304                /*
 305                 * We had to recover the master node, which means there was an
 306                 * unclean reboot. However, it is possible that the master node
 307                 * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
 308                 * E.g., consider the following chain of events:
 309                 *
 310                 * 1. UBIFS was cleanly unmounted, so the master node is clean
 311                 * 2. UBIFS is being mounted R/W and starts changing the master
 312                 *    node in the first (%UBIFS_MST_LNUM). A power cut happens,
 313                 *    so this LEB ends up with some amount of garbage at the
 314                 *    end.
 315                 * 3. UBIFS is being mounted R/O. We reach this place and
 316                 *    recover the master node from the second LEB
 317                 *    (%UBIFS_MST_LNUM + 1). But we cannot update the media
 318                 *    because we are being mounted R/O. We have to defer the
 319                 *    operation.
 320                 * 4. However, this master node (@c->mst_node) is marked as
 321                 *    clean (since the step 1). And if we just return, the
 322                 *    mount code will be confused and won't recover the master
 323                 *    node when it is re-mounter R/W later.
 324                 *
 325                 *    Thus, to force the recovery by marking the master node as
 326                 *    dirty.
 327                 */
 328                c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 329        } else {
 330                /* Write the recovered master node */
 331                c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
 332                err = write_rcvrd_mst_node(c, c->mst_node);
 333                if (err)
 334                        goto out_free;
 335        }
 336
 337        vfree(buf2);
 338        vfree(buf1);
 339
 340        return 0;
 341
 342out_err:
 343        err = -EINVAL;
 344out_free:
 345        ubifs_err("failed to recover master node");
 346        if (mst1) {
 347                dbg_err("dumping first master node");
 348                dbg_dump_node(c, mst1);
 349        }
 350        if (mst2) {
 351                dbg_err("dumping second master node");
 352                dbg_dump_node(c, mst2);
 353        }
 354        vfree(buf2);
 355        vfree(buf1);
 356        return err;
 357}
 358
 359/**
 360 * ubifs_write_rcvrd_mst_node - write the recovered master node.
 361 * @c: UBIFS file-system description object
 362 *
 363 * This function writes the master node that was recovered during mounting in
 364 * read-only mode and must now be written because we are remounting rw.
 365 *
 366 * This function returns %0 on success and a negative error code on failure.
 367 */
 368int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
 369{
 370        int err;
 371
 372        if (!c->rcvrd_mst_node)
 373                return 0;
 374        c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 375        c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 376        err = write_rcvrd_mst_node(c, c->rcvrd_mst_node);
 377        if (err)
 378                return err;
 379        kfree(c->rcvrd_mst_node);
 380        c->rcvrd_mst_node = NULL;
 381        return 0;
 382}
 383
 384/**
 385 * is_last_write - determine if an offset was in the last write to a LEB.
 386 * @c: UBIFS file-system description object
 387 * @buf: buffer to check
 388 * @offs: offset to check
 389 *
 390 * This function returns %1 if @offs was in the last write to the LEB whose data
 391 * is in @buf, otherwise %0 is returned.  The determination is made by checking
 392 * for subsequent empty space starting from the next @c->min_io_size boundary.
 393 */
 394static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
 395{
 396        int empty_offs, check_len;
 397        uint8_t *p;
 398
 399        /*
 400         * Round up to the next @c->min_io_size boundary i.e. @offs is in the
 401         * last wbuf written. After that should be empty space.
 402         */
 403        empty_offs = ALIGN(offs + 1, c->min_io_size);
 404        check_len = c->leb_size - empty_offs;
 405        p = buf + empty_offs - offs;
 406        return is_empty(p, check_len);
 407}
 408
 409/**
 410 * clean_buf - clean the data from an LEB sitting in a buffer.
 411 * @c: UBIFS file-system description object
 412 * @buf: buffer to clean
 413 * @lnum: LEB number to clean
 414 * @offs: offset from which to clean
 415 * @len: length of buffer
 416 *
 417 * This function pads up to the next min_io_size boundary (if there is one) and
 418 * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
 419 * @c->min_io_size boundary.
 420 */
 421static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
 422                      int *offs, int *len)
 423{
 424        int empty_offs, pad_len;
 425
 426        lnum = lnum;
 427        dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
 428
 429        ubifs_assert(!(*offs & 7));
 430        empty_offs = ALIGN(*offs, c->min_io_size);
 431        pad_len = empty_offs - *offs;
 432        ubifs_pad(c, *buf, pad_len);
 433        *offs += pad_len;
 434        *buf += pad_len;
 435        *len -= pad_len;
 436        memset(*buf, 0xff, c->leb_size - empty_offs);
 437}
 438
 439/**
 440 * no_more_nodes - determine if there are no more nodes in a buffer.
 441 * @c: UBIFS file-system description object
 442 * @buf: buffer to check
 443 * @len: length of buffer
 444 * @lnum: LEB number of the LEB from which @buf was read
 445 * @offs: offset from which @buf was read
 446 *
 447 * This function ensures that the corrupted node at @offs is the last thing
 448 * written to a LEB. This function returns %1 if more data is not found and
 449 * %0 if more data is found.
 450 */
 451static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
 452                        int lnum, int offs)
 453{
 454        struct ubifs_ch *ch = buf;
 455        int skip, dlen = le32_to_cpu(ch->len);
 456
 457        /* Check for empty space after the corrupt node's common header */
 458        skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs;
 459        if (is_empty(buf + skip, len - skip))
 460                return 1;
 461        /*
 462         * The area after the common header size is not empty, so the common
 463         * header must be intact. Check it.
 464         */
 465        if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) {
 466                dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
 467                return 0;
 468        }
 469        /* Now we know the corrupt node's length we can skip over it */
 470        skip = ALIGN(offs + dlen, c->min_io_size) - offs;
 471        /* After which there should be empty space */
 472        if (is_empty(buf + skip, len - skip))
 473                return 1;
 474        dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip);
 475        return 0;
 476}
 477
 478/**
 479 * fix_unclean_leb - fix an unclean LEB.
 480 * @c: UBIFS file-system description object
 481 * @sleb: scanned LEB information
 482 * @start: offset where scan started
 483 */
 484static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 485                           int start)
 486{
 487        int lnum = sleb->lnum, endpt = start;
 488
 489        /* Get the end offset of the last node we are keeping */
 490        if (!list_empty(&sleb->nodes)) {
 491                struct ubifs_scan_node *snod;
 492
 493                snod = list_entry(sleb->nodes.prev,
 494                                  struct ubifs_scan_node, list);
 495                endpt = snod->offs + snod->len;
 496        }
 497
 498        if (c->ro_mount && !c->remounting_rw) {
 499                /* Add to recovery list */
 500                struct ubifs_unclean_leb *ucleb;
 501
 502                dbg_rcvry("need to fix LEB %d start %d endpt %d",
 503                          lnum, start, sleb->endpt);
 504                ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS);
 505                if (!ucleb)
 506                        return -ENOMEM;
 507                ucleb->lnum = lnum;
 508                ucleb->endpt = endpt;
 509                list_add_tail(&ucleb->list, &c->unclean_leb_list);
 510        } else {
 511                /* Write the fixed LEB back to flash */
 512                int err;
 513
 514                dbg_rcvry("fixing LEB %d start %d endpt %d",
 515                          lnum, start, sleb->endpt);
 516                if (endpt == 0) {
 517                        err = ubifs_leb_unmap(c, lnum);
 518                        if (err)
 519                                return err;
 520                } else {
 521                        int len = ALIGN(endpt, c->min_io_size);
 522
 523                        if (start) {
 524                                err = ubi_read(c->ubi, lnum, sleb->buf, 0,
 525                                               start);
 526                                if (err)
 527                                        return err;
 528                        }
 529                        /* Pad to min_io_size */
 530                        if (len > endpt) {
 531                                int pad_len = len - ALIGN(endpt, 8);
 532
 533                                if (pad_len > 0) {
 534                                        void *buf = sleb->buf + len - pad_len;
 535
 536                                        ubifs_pad(c, buf, pad_len);
 537                                }
 538                        }
 539                        err = ubi_leb_change(c->ubi, lnum, sleb->buf, len,
 540                                             UBI_UNKNOWN);
 541                        if (err)
 542                                return err;
 543                }
 544        }
 545        return 0;
 546}
 547
 548/**
 549 * drop_incomplete_group - drop nodes from an incomplete group.
 550 * @sleb: scanned LEB information
 551 * @offs: offset of dropped nodes is returned here
 552 *
 553 * This function returns %1 if nodes are dropped and %0 otherwise.
 554 */
 555static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
 556{
 557        int dropped = 0;
 558
 559        while (!list_empty(&sleb->nodes)) {
 560                struct ubifs_scan_node *snod;
 561                struct ubifs_ch *ch;
 562
 563                snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
 564                                  list);
 565                ch = snod->node;
 566                if (ch->group_type != UBIFS_IN_NODE_GROUP)
 567                        return dropped;
 568                dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs);
 569                *offs = snod->offs;
 570                list_del(&snod->list);
 571                kfree(snod);
 572                sleb->nodes_cnt -= 1;
 573                dropped = 1;
 574        }
 575        return dropped;
 576}
 577
 578/**
 579 * ubifs_recover_leb - scan and recover a LEB.
 580 * @c: UBIFS file-system description object
 581 * @lnum: LEB number
 582 * @offs: offset
 583 * @sbuf: LEB-sized buffer to use
 584 * @grouped: nodes may be grouped for recovery
 585 *
 586 * This function does a scan of a LEB, but caters for errors that might have
 587 * been caused by the unclean unmount from which we are attempting to recover.
 588 * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is
 589 * found, and a negative error code in case of failure.
 590 */
 591struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
 592                                         int offs, void *sbuf, int grouped)
 593{
 594        int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
 595        int empty_chkd = 0, start = offs;
 596        struct ubifs_scan_leb *sleb;
 597        void *buf = sbuf + offs;
 598
 599        dbg_rcvry("%d:%d", lnum, offs);
 600
 601        sleb = ubifs_start_scan(c, lnum, offs, sbuf);
 602        if (IS_ERR(sleb))
 603                return sleb;
 604
 605        if (sleb->ecc)
 606                need_clean = 1;
 607
 608        while (len >= 8) {
 609                int ret;
 610
 611                dbg_scan("look at LEB %d:%d (%d bytes left)",
 612                         lnum, offs, len);
 613
 614                cond_resched();
 615
 616                /*
 617                 * Scan quietly until there is an error from which we cannot
 618                 * recover
 619                 */
 620                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
 621
 622                if (ret == SCANNED_A_NODE) {
 623                        /* A valid node, and not a padding node */
 624                        struct ubifs_ch *ch = buf;
 625                        int node_len;
 626
 627                        err = ubifs_add_snod(c, sleb, buf, offs);
 628                        if (err)
 629                                goto error;
 630                        node_len = ALIGN(le32_to_cpu(ch->len), 8);
 631                        offs += node_len;
 632                        buf += node_len;
 633                        len -= node_len;
 634                        continue;
 635                }
 636
 637                if (ret > 0) {
 638                        /* Padding bytes or a valid padding node */
 639                        offs += ret;
 640                        buf += ret;
 641                        len -= ret;
 642                        continue;
 643                }
 644
 645                if (ret == SCANNED_EMPTY_SPACE) {
 646                        if (!is_empty(buf, len)) {
 647                                if (!is_last_write(c, buf, offs))
 648                                        break;
 649                                clean_buf(c, &buf, lnum, &offs, &len);
 650                                need_clean = 1;
 651                        }
 652                        empty_chkd = 1;
 653                        break;
 654                }
 655
 656                if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
 657                        if (is_last_write(c, buf, offs)) {
 658                                clean_buf(c, &buf, lnum, &offs, &len);
 659                                need_clean = 1;
 660                                empty_chkd = 1;
 661                                break;
 662                        }
 663
 664                if (ret == SCANNED_A_CORRUPT_NODE)
 665                        if (no_more_nodes(c, buf, len, lnum, offs)) {
 666                                clean_buf(c, &buf, lnum, &offs, &len);
 667                                need_clean = 1;
 668                                empty_chkd = 1;
 669                                break;
 670                        }
 671
 672                if (quiet) {
 673                        /* Redo the last scan but noisily */
 674                        quiet = 0;
 675                        continue;
 676                }
 677
 678                switch (ret) {
 679                case SCANNED_GARBAGE:
 680                        dbg_err("garbage");
 681                        goto corrupted;
 682                case SCANNED_A_CORRUPT_NODE:
 683                case SCANNED_A_BAD_PAD_NODE:
 684                        dbg_err("bad node");
 685                        goto corrupted;
 686                default:
 687                        dbg_err("unknown");
 688                        err = -EINVAL;
 689                        goto error;
 690                }
 691        }
 692
 693        if (!empty_chkd && !is_empty(buf, len)) {
 694                if (is_last_write(c, buf, offs)) {
 695                        clean_buf(c, &buf, lnum, &offs, &len);
 696                        need_clean = 1;
 697                } else {
 698                        int corruption = first_non_ff(buf, len);
 699
 700                        ubifs_err("corrupt empty space LEB %d:%d, corruption "
 701                                  "starts at %d", lnum, offs, corruption);
 702                        /* Make sure we dump interesting non-0xFF data */
 703                        offs = corruption;
 704                        buf += corruption;
 705                        goto corrupted;
 706                }
 707        }
 708
 709        /* Drop nodes from incomplete group */
 710        if (grouped && drop_incomplete_group(sleb, &offs)) {
 711                buf = sbuf + offs;
 712                len = c->leb_size - offs;
 713                clean_buf(c, &buf, lnum, &offs, &len);
 714                need_clean = 1;
 715        }
 716
 717        if (offs % c->min_io_size) {
 718                clean_buf(c, &buf, lnum, &offs, &len);
 719                need_clean = 1;
 720        }
 721
 722        ubifs_end_scan(c, sleb, lnum, offs);
 723
 724        if (need_clean) {
 725                err = fix_unclean_leb(c, sleb, start);
 726                if (err)
 727                        goto error;
 728        }
 729
 730        return sleb;
 731
 732corrupted:
 733        ubifs_scanned_corruption(c, lnum, offs, buf);
 734        err = -EUCLEAN;
 735error:
 736        ubifs_err("LEB %d scanning failed", lnum);
 737        ubifs_scan_destroy(sleb);
 738        return ERR_PTR(err);
 739}
 740
 741/**
 742 * get_cs_sqnum - get commit start sequence number.
 743 * @c: UBIFS file-system description object
 744 * @lnum: LEB number of commit start node
 745 * @offs: offset of commit start node
 746 * @cs_sqnum: commit start sequence number is returned here
 747 *
 748 * This function returns %0 on success and a negative error code on failure.
 749 */
 750static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
 751                        unsigned long long *cs_sqnum)
 752{
 753        struct ubifs_cs_node *cs_node = NULL;
 754        int err, ret;
 755
 756        dbg_rcvry("at %d:%d", lnum, offs);
 757        cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL);
 758        if (!cs_node)
 759                return -ENOMEM;
 760        if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
 761                goto out_err;
 762        err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ);
 763        if (err && err != -EBADMSG)
 764                goto out_free;
 765        ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
 766        if (ret != SCANNED_A_NODE) {
 767                dbg_err("Not a valid node");
 768                goto out_err;
 769        }
 770        if (cs_node->ch.node_type != UBIFS_CS_NODE) {
 771                dbg_err("Node a CS node, type is %d", cs_node->ch.node_type);
 772                goto out_err;
 773        }
 774        if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
 775                dbg_err("CS node cmt_no %llu != current cmt_no %llu",
 776                        (unsigned long long)le64_to_cpu(cs_node->cmt_no),
 777                        c->cmt_no);
 778                goto out_err;
 779        }
 780        *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
 781        dbg_rcvry("commit start sqnum %llu", *cs_sqnum);
 782        kfree(cs_node);
 783        return 0;
 784
 785out_err:
 786        err = -EINVAL;
 787out_free:
 788        ubifs_err("failed to get CS sqnum");
 789        kfree(cs_node);
 790        return err;
 791}
 792
 793/**
 794 * ubifs_recover_log_leb - scan and recover a log LEB.
 795 * @c: UBIFS file-system description object
 796 * @lnum: LEB number
 797 * @offs: offset
 798 * @sbuf: LEB-sized buffer to use
 799 *
 800 * This function does a scan of a LEB, but caters for errors that might have
 801 * been caused by unclean reboots from which we are attempting to recover
 802 * (assume that only the last log LEB can be corrupted by an unclean reboot).
 803 *
 804 * This function returns %0 on success and a negative error code on failure.
 805 */
 806struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
 807                                             int offs, void *sbuf)
 808{
 809        struct ubifs_scan_leb *sleb;
 810        int next_lnum;
 811
 812        dbg_rcvry("LEB %d", lnum);
 813        next_lnum = lnum + 1;
 814        if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs)
 815                next_lnum = UBIFS_LOG_LNUM;
 816        if (next_lnum != c->ltail_lnum) {
 817                /*
 818                 * We can only recover at the end of the log, so check that the
 819                 * next log LEB is empty or out of date.
 820                 */
 821                sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0);
 822                if (IS_ERR(sleb))
 823                        return sleb;
 824                if (sleb->nodes_cnt) {
 825                        struct ubifs_scan_node *snod;
 826                        unsigned long long cs_sqnum = c->cs_sqnum;
 827
 828                        snod = list_entry(sleb->nodes.next,
 829                                          struct ubifs_scan_node, list);
 830                        if (cs_sqnum == 0) {
 831                                int err;
 832
 833                                err = get_cs_sqnum(c, lnum, offs, &cs_sqnum);
 834                                if (err) {
 835                                        ubifs_scan_destroy(sleb);
 836                                        return ERR_PTR(err);
 837                                }
 838                        }
 839                        if (snod->sqnum > cs_sqnum) {
 840                                ubifs_err("unrecoverable log corruption "
 841                                          "in LEB %d", lnum);
 842                                ubifs_scan_destroy(sleb);
 843                                return ERR_PTR(-EUCLEAN);
 844                        }
 845                }
 846                ubifs_scan_destroy(sleb);
 847        }
 848        return ubifs_recover_leb(c, lnum, offs, sbuf, 0);
 849}
 850
 851/**
 852 * recover_head - recover a head.
 853 * @c: UBIFS file-system description object
 854 * @lnum: LEB number of head to recover
 855 * @offs: offset of head to recover
 856 * @sbuf: LEB-sized buffer to use
 857 *
 858 * This function ensures that there is no data on the flash at a head location.
 859 *
 860 * This function returns %0 on success and a negative error code on failure.
 861 */
 862static int recover_head(const struct ubifs_info *c, int lnum, int offs,
 863                        void *sbuf)
 864{
 865        int len, err;
 866
 867        if (c->min_io_size > 1)
 868                len = c->min_io_size;
 869        else
 870                len = 512;
 871        if (offs + len > c->leb_size)
 872                len = c->leb_size - offs;
 873
 874        if (!len)
 875                return 0;
 876
 877        /* Read at the head location and check it is empty flash */
 878        err = ubi_read(c->ubi, lnum, sbuf, offs, len);
 879        if (err || !is_empty(sbuf, len)) {
 880                dbg_rcvry("cleaning head at %d:%d", lnum, offs);
 881                if (offs == 0)
 882                        return ubifs_leb_unmap(c, lnum);
 883                err = ubi_read(c->ubi, lnum, sbuf, 0, offs);
 884                if (err)
 885                        return err;
 886                return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN);
 887        }
 888
 889        return 0;
 890}
 891
 892/**
 893 * ubifs_recover_inl_heads - recover index and LPT heads.
 894 * @c: UBIFS file-system description object
 895 * @sbuf: LEB-sized buffer to use
 896 *
 897 * This function ensures that there is no data on the flash at the index and
 898 * LPT head locations.
 899 *
 900 * This deals with the recovery of a half-completed journal commit. UBIFS is
 901 * careful never to overwrite the last version of the index or the LPT. Because
 902 * the index and LPT are wandering trees, data from a half-completed commit will
 903 * not be referenced anywhere in UBIFS. The data will be either in LEBs that are
 904 * assumed to be empty and will be unmapped anyway before use, or in the index
 905 * and LPT heads.
 906 *
 907 * This function returns %0 on success and a negative error code on failure.
 908 */
 909int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
 910{
 911        int err;
 912
 913        ubifs_assert(!c->ro_mount || c->remounting_rw);
 914
 915        dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
 916        err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
 917        if (err)
 918                return err;
 919
 920        dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
 921        err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
 922        if (err)
 923                return err;
 924
 925        return 0;
 926}
 927
 928/**
 929 *  clean_an_unclean_leb - read and write a LEB to remove corruption.
 930 * @c: UBIFS file-system description object
 931 * @ucleb: unclean LEB information
 932 * @sbuf: LEB-sized buffer to use
 933 *
 934 * This function reads a LEB up to a point pre-determined by the mount recovery,
 935 * checks the nodes, and writes the result back to the flash, thereby cleaning
 936 * off any following corruption, or non-fatal ECC errors.
 937 *
 938 * This function returns %0 on success and a negative error code on failure.
 939 */
 940static int clean_an_unclean_leb(const struct ubifs_info *c,
 941                                struct ubifs_unclean_leb *ucleb, void *sbuf)
 942{
 943        int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
 944        void *buf = sbuf;
 945
 946        dbg_rcvry("LEB %d len %d", lnum, len);
 947
 948        if (len == 0) {
 949                /* Nothing to read, just unmap it */
 950                err = ubifs_leb_unmap(c, lnum);
 951                if (err)
 952                        return err;
 953                return 0;
 954        }
 955
 956        err = ubi_read(c->ubi, lnum, buf, offs, len);
 957        if (err && err != -EBADMSG)
 958                return err;
 959
 960        while (len >= 8) {
 961                int ret;
 962
 963                cond_resched();
 964
 965                /* Scan quietly until there is an error */
 966                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
 967
 968                if (ret == SCANNED_A_NODE) {
 969                        /* A valid node, and not a padding node */
 970                        struct ubifs_ch *ch = buf;
 971                        int node_len;
 972
 973                        node_len = ALIGN(le32_to_cpu(ch->len), 8);
 974                        offs += node_len;
 975                        buf += node_len;
 976                        len -= node_len;
 977                        continue;
 978                }
 979
 980                if (ret > 0) {
 981                        /* Padding bytes or a valid padding node */
 982                        offs += ret;
 983                        buf += ret;
 984                        len -= ret;
 985                        continue;
 986                }
 987
 988                if (ret == SCANNED_EMPTY_SPACE) {
 989                        ubifs_err("unexpected empty space at %d:%d",
 990                                  lnum, offs);
 991                        return -EUCLEAN;
 992                }
 993
 994                if (quiet) {
 995                        /* Redo the last scan but noisily */
 996                        quiet = 0;
 997                        continue;
 998                }
 999
1000                ubifs_scanned_corruption(c, lnum, offs, buf);
1001                return -EUCLEAN;
1002        }
1003
1004        /* Pad to min_io_size */
1005        len = ALIGN(ucleb->endpt, c->min_io_size);
1006        if (len > ucleb->endpt) {
1007                int pad_len = len - ALIGN(ucleb->endpt, 8);
1008
1009                if (pad_len > 0) {
1010                        buf = c->sbuf + len - pad_len;
1011                        ubifs_pad(c, buf, pad_len);
1012                }
1013        }
1014
1015        /* Write back the LEB atomically */
1016        err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN);
1017        if (err)
1018                return err;
1019
1020        dbg_rcvry("cleaned LEB %d", lnum);
1021
1022        return 0;
1023}
1024
1025/**
1026 * ubifs_clean_lebs - clean LEBs recovered during read-only mount.
1027 * @c: UBIFS file-system description object
1028 * @sbuf: LEB-sized buffer to use
1029 *
1030 * This function cleans a LEB identified during recovery that needs to be
1031 * written but was not because UBIFS was mounted read-only. This happens when
1032 * remounting to read-write mode.
1033 *
1034 * This function returns %0 on success and a negative error code on failure.
1035 */
1036int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
1037{
1038        dbg_rcvry("recovery");
1039        while (!list_empty(&c->unclean_leb_list)) {
1040                struct ubifs_unclean_leb *ucleb;
1041                int err;
1042
1043                ucleb = list_entry(c->unclean_leb_list.next,
1044                                   struct ubifs_unclean_leb, list);
1045                err = clean_an_unclean_leb(c, ucleb, sbuf);
1046                if (err)
1047                        return err;
1048                list_del(&ucleb->list);
1049                kfree(ucleb);
1050        }
1051        return 0;
1052}
1053
1054/**
1055 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1056 * @c: UBIFS file-system description object
1057 *
1058 * Out-of-place garbage collection requires always one empty LEB with which to
1059 * start garbage collection. The LEB number is recorded in c->gc_lnum and is
1060 * written to the master node on unmounting. In the case of an unclean unmount
1061 * the value of gc_lnum recorded in the master node is out of date and cannot
1062 * be used. Instead, recovery must allocate an empty LEB for this purpose.
1063 * However, there may not be enough empty space, in which case it must be
1064 * possible to GC the dirtiest LEB into the GC head LEB.
1065 *
1066 * This function also runs the commit which causes the TNC updates from
1067 * size-recovery and orphans to be written to the flash. That is important to
1068 * ensure correct replay order for subsequent mounts.
1069 *
1070 * This function returns %0 on success and a negative error code on failure.
1071 */
1072int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1073{
1074        struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1075        struct ubifs_lprops lp;
1076        int lnum, err;
1077
1078        c->gc_lnum = -1;
1079        if (wbuf->lnum == -1) {
1080                dbg_rcvry("no GC head LEB");
1081                goto find_free;
1082        }
1083        /*
1084         * See whether the used space in the dirtiest LEB fits in the GC head
1085         * LEB.
1086         */
1087        if (wbuf->offs == c->leb_size) {
1088                dbg_rcvry("no room in GC head LEB");
1089                goto find_free;
1090        }
1091        err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1092        if (err) {
1093                /*
1094                 * There are no dirty or empty LEBs subject to here being
1095                 * enough for the index. Try to use
1096                 * 'ubifs_find_free_leb_for_idx()', which will return any empty
1097                 * LEBs (ignoring index requirements). If the index then
1098                 * doesn't have enough LEBs the recovery commit will fail -
1099                 * which is the  same result anyway i.e. recovery fails. So
1100                 * there is no problem ignoring index  requirements and just
1101                 * grabbing a free LEB since we have already established there
1102                 * is not a dirty LEB we could have used instead.
1103                 */
1104                if (err == -ENOSPC) {
1105                        dbg_rcvry("could not find a dirty LEB");
1106                        goto find_free;
1107                }
1108                return err;
1109        }
1110        ubifs_assert(!(lp.flags & LPROPS_INDEX));
1111        lnum = lp.lnum;
1112        if (lp.free + lp.dirty == c->leb_size) {
1113                /* An empty LEB was returned */
1114                if (lp.free != c->leb_size) {
1115                        err = ubifs_change_one_lp(c, lnum, c->leb_size,
1116                                                  0, 0, 0, 0);
1117                        if (err)
1118                                return err;
1119                }
1120                err = ubifs_leb_unmap(c, lnum);
1121                if (err)
1122                        return err;
1123                c->gc_lnum = lnum;
1124                dbg_rcvry("allocated LEB %d for GC", lnum);
1125                /* Run the commit */
1126                dbg_rcvry("committing");
1127                return ubifs_run_commit(c);
1128        }
1129        /*
1130         * There was no empty LEB so the used space in the dirtiest LEB must fit
1131         * in the GC head LEB.
1132         */
1133        if (lp.free + lp.dirty < wbuf->offs) {
1134                dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
1135                          lnum, wbuf->lnum, wbuf->offs);
1136                err = ubifs_return_leb(c, lnum);
1137                if (err)
1138                        return err;
1139                goto find_free;
1140        }
1141        /*
1142         * We run the commit before garbage collection otherwise subsequent
1143         * mounts will see the GC and orphan deletion in a different order.
1144         */
1145        dbg_rcvry("committing");
1146        err = ubifs_run_commit(c);
1147        if (err)
1148                return err;
1149        /*
1150         * The data in the dirtiest LEB fits in the GC head LEB, so do the GC
1151         * - use locking to keep 'ubifs_assert()' happy.
1152         */
1153        dbg_rcvry("GC'ing LEB %d", lnum);
1154        mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1155        err = ubifs_garbage_collect_leb(c, &lp);
1156        if (err >= 0) {
1157                int err2 = ubifs_wbuf_sync_nolock(wbuf);
1158
1159                if (err2)
1160                        err = err2;
1161        }
1162        mutex_unlock(&wbuf->io_mutex);
1163        if (err < 0) {
1164                dbg_err("GC failed, error %d", err);
1165                if (err == -EAGAIN)
1166                        err = -EINVAL;
1167                return err;
1168        }
1169        if (err != LEB_RETAINED) {
1170                dbg_err("GC returned %d", err);
1171                return -EINVAL;
1172        }
1173        err = ubifs_leb_unmap(c, c->gc_lnum);
1174        if (err)
1175                return err;
1176        dbg_rcvry("allocated LEB %d for GC", lnum);
1177        return 0;
1178
1179find_free:
1180        /*
1181         * There is no GC head LEB or the free space in the GC head LEB is too
1182         * small, or there are not dirty LEBs. Allocate gc_lnum by calling
1183         * 'ubifs_find_free_leb_for_idx()' so GC is not run.
1184         */
1185        lnum = ubifs_find_free_leb_for_idx(c);
1186        if (lnum < 0) {
1187                dbg_err("could not find an empty LEB");
1188                return lnum;
1189        }
1190        /* And reset the index flag */
1191        err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1192                                  LPROPS_INDEX, 0);
1193        if (err)
1194                return err;
1195        c->gc_lnum = lnum;
1196        dbg_rcvry("allocated LEB %d for GC", lnum);
1197        /* Run the commit */
1198        dbg_rcvry("committing");
1199        return ubifs_run_commit(c);
1200}
1201
1202/**
1203 * struct size_entry - inode size information for recovery.
1204 * @rb: link in the RB-tree of sizes
1205 * @inum: inode number
1206 * @i_size: size on inode
1207 * @d_size: maximum size based on data nodes
1208 * @exists: indicates whether the inode exists
1209 * @inode: inode if pinned in memory awaiting rw mode to fix it
1210 */
1211struct size_entry {
1212        struct rb_node rb;
1213        ino_t inum;
1214        loff_t i_size;
1215        loff_t d_size;
1216        int exists;
1217        struct inode *inode;
1218};
1219
1220/**
1221 * add_ino - add an entry to the size tree.
1222 * @c: UBIFS file-system description object
1223 * @inum: inode number
1224 * @i_size: size on inode
1225 * @d_size: maximum size based on data nodes
1226 * @exists: indicates whether the inode exists
1227 */
1228static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size,
1229                   loff_t d_size, int exists)
1230{
1231        struct rb_node **p = &c->size_tree.rb_node, *parent = NULL;
1232        struct size_entry *e;
1233
1234        while (*p) {
1235                parent = *p;
1236                e = rb_entry(parent, struct size_entry, rb);
1237                if (inum < e->inum)
1238                        p = &(*p)->rb_left;
1239                else
1240                        p = &(*p)->rb_right;
1241        }
1242
1243        e = kzalloc(sizeof(struct size_entry), GFP_KERNEL);
1244        if (!e)
1245                return -ENOMEM;
1246
1247        e->inum = inum;
1248        e->i_size = i_size;
1249        e->d_size = d_size;
1250        e->exists = exists;
1251
1252        rb_link_node(&e->rb, parent, p);
1253        rb_insert_color(&e->rb, &c->size_tree);
1254
1255        return 0;
1256}
1257
1258/**
1259 * find_ino - find an entry on the size tree.
1260 * @c: UBIFS file-system description object
1261 * @inum: inode number
1262 */
1263static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum)
1264{
1265        struct rb_node *p = c->size_tree.rb_node;
1266        struct size_entry *e;
1267
1268        while (p) {
1269                e = rb_entry(p, struct size_entry, rb);
1270                if (inum < e->inum)
1271                        p = p->rb_left;
1272                else if (inum > e->inum)
1273                        p = p->rb_right;
1274                else
1275                        return e;
1276        }
1277        return NULL;
1278}
1279
1280/**
1281 * remove_ino - remove an entry from the size tree.
1282 * @c: UBIFS file-system description object
1283 * @inum: inode number
1284 */
1285static void remove_ino(struct ubifs_info *c, ino_t inum)
1286{
1287        struct size_entry *e = find_ino(c, inum);
1288
1289        if (!e)
1290                return;
1291        rb_erase(&e->rb, &c->size_tree);
1292        kfree(e);
1293}
1294
1295/**
1296 * ubifs_destroy_size_tree - free resources related to the size tree.
1297 * @c: UBIFS file-system description object
1298 */
1299void ubifs_destroy_size_tree(struct ubifs_info *c)
1300{
1301        struct rb_node *this = c->size_tree.rb_node;
1302        struct size_entry *e;
1303
1304        while (this) {
1305                if (this->rb_left) {
1306                        this = this->rb_left;
1307                        continue;
1308                } else if (this->rb_right) {
1309                        this = this->rb_right;
1310                        continue;
1311                }
1312                e = rb_entry(this, struct size_entry, rb);
1313                if (e->inode)
1314                        iput(e->inode);
1315                this = rb_parent(this);
1316                if (this) {
1317                        if (this->rb_left == &e->rb)
1318                                this->rb_left = NULL;
1319                        else
1320                                this->rb_right = NULL;
1321                }
1322                kfree(e);
1323        }
1324        c->size_tree = RB_ROOT;
1325}
1326
1327/**
1328 * ubifs_recover_size_accum - accumulate inode sizes for recovery.
1329 * @c: UBIFS file-system description object
1330 * @key: node key
1331 * @deletion: node is for a deletion
1332 * @new_size: inode size
1333 *
1334 * This function has two purposes:
1335 *     1) to ensure there are no data nodes that fall outside the inode size
1336 *     2) to ensure there are no data nodes for inodes that do not exist
1337 * To accomplish those purposes, a rb-tree is constructed containing an entry
1338 * for each inode number in the journal that has not been deleted, and recording
1339 * the size from the inode node, the maximum size of any data node (also altered
1340 * by truncations) and a flag indicating a inode number for which no inode node
1341 * was present in the journal.
1342 *
1343 * Note that there is still the possibility that there are data nodes that have
1344 * been committed that are beyond the inode size, however the only way to find
1345 * them would be to scan the entire index. Alternatively, some provision could
1346 * be made to record the size of inodes at the start of commit, which would seem
1347 * very cumbersome for a scenario that is quite unlikely and the only negative
1348 * consequence of which is wasted space.
1349 *
1350 * This functions returns %0 on success and a negative error code on failure.
1351 */
1352int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
1353                             int deletion, loff_t new_size)
1354{
1355        ino_t inum = key_inum(c, key);
1356        struct size_entry *e;
1357        int err;
1358
1359        switch (key_type(c, key)) {
1360        case UBIFS_INO_KEY:
1361                if (deletion)
1362                        remove_ino(c, inum);
1363                else {
1364                        e = find_ino(c, inum);
1365                        if (e) {
1366                                e->i_size = new_size;
1367                                e->exists = 1;
1368                        } else {
1369                                err = add_ino(c, inum, new_size, 0, 1);
1370                                if (err)
1371                                        return err;
1372                        }
1373                }
1374                break;
1375        case UBIFS_DATA_KEY:
1376                e = find_ino(c, inum);
1377                if (e) {
1378                        if (new_size > e->d_size)
1379                                e->d_size = new_size;
1380                } else {
1381                        err = add_ino(c, inum, 0, new_size, 0);
1382                        if (err)
1383                                return err;
1384                }
1385                break;
1386        case UBIFS_TRUN_KEY:
1387                e = find_ino(c, inum);
1388                if (e)
1389                        e->d_size = new_size;
1390                break;
1391        }
1392        return 0;
1393}
1394
1395/**
1396 * fix_size_in_place - fix inode size in place on flash.
1397 * @c: UBIFS file-system description object
1398 * @e: inode size information for recovery
1399 */
1400static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1401{
1402        struct ubifs_ino_node *ino = c->sbuf;
1403        unsigned char *p;
1404        union ubifs_key key;
1405        int err, lnum, offs, len;
1406        loff_t i_size;
1407        uint32_t crc;
1408
1409        /* Locate the inode node LEB number and offset */
1410        ino_key_init(c, &key, e->inum);
1411        err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs);
1412        if (err)
1413                goto out;
1414        /*
1415         * If the size recorded on the inode node is greater than the size that
1416         * was calculated from nodes in the journal then don't change the inode.
1417         */
1418        i_size = le64_to_cpu(ino->size);
1419        if (i_size >= e->d_size)
1420                return 0;
1421        /* Read the LEB */
1422        err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size);
1423        if (err)
1424                goto out;
1425        /* Change the size field and recalculate the CRC */
1426        ino = c->sbuf + offs;
1427        ino->size = cpu_to_le64(e->d_size);
1428        len = le32_to_cpu(ino->ch.len);
1429        crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8);
1430        ino->ch.crc = cpu_to_le32(crc);
1431        /* Work out where data in the LEB ends and free space begins */
1432        p = c->sbuf;
1433        len = c->leb_size - 1;
1434        while (p[len] == 0xff)
1435                len -= 1;
1436        len = ALIGN(len + 1, c->min_io_size);
1437        /* Atomically write the fixed LEB back again */
1438        err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
1439        if (err)
1440                goto out;
1441        dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ",
1442                  (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
1443        return 0;
1444
1445out:
1446        ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d",
1447                   (unsigned long)e->inum, e->i_size, e->d_size, err);
1448        return err;
1449}
1450
1451/**
1452 * ubifs_recover_size - recover inode size.
1453 * @c: UBIFS file-system description object
1454 *
1455 * This function attempts to fix inode size discrepancies identified by the
1456 * 'ubifs_recover_size_accum()' function.
1457 *
1458 * This functions returns %0 on success and a negative error code on failure.
1459 */
1460int ubifs_recover_size(struct ubifs_info *c)
1461{
1462        struct rb_node *this = rb_first(&c->size_tree);
1463
1464        while (this) {
1465                struct size_entry *e;
1466                int err;
1467
1468                e = rb_entry(this, struct size_entry, rb);
1469                if (!e->exists) {
1470                        union ubifs_key key;
1471
1472                        ino_key_init(c, &key, e->inum);
1473                        err = ubifs_tnc_lookup(c, &key, c->sbuf);
1474                        if (err && err != -ENOENT)
1475                                return err;
1476                        if (err == -ENOENT) {
1477                                /* Remove data nodes that have no inode */
1478                                dbg_rcvry("removing ino %lu",
1479                                          (unsigned long)e->inum);
1480                                err = ubifs_tnc_remove_ino(c, e->inum);
1481                                if (err)
1482                                        return err;
1483                        } else {
1484                                struct ubifs_ino_node *ino = c->sbuf;
1485
1486                                e->exists = 1;
1487                                e->i_size = le64_to_cpu(ino->size);
1488                        }
1489                }
1490                if (e->exists && e->i_size < e->d_size) {
1491                        if (!e->inode && c->ro_mount) {
1492                                /* Fix the inode size and pin it in memory */
1493                                struct inode *inode;
1494
1495                                inode = ubifs_iget(c->vfs_sb, e->inum);
1496                                if (IS_ERR(inode))
1497                                        return PTR_ERR(inode);
1498                                if (inode->i_size < e->d_size) {
1499                                        dbg_rcvry("ino %lu size %lld -> %lld",
1500                                                  (unsigned long)e->inum,
1501                                                  e->d_size, inode->i_size);
1502                                        inode->i_size = e->d_size;
1503                                        ubifs_inode(inode)->ui_size = e->d_size;
1504                                        e->inode = inode;
1505                                        this = rb_next(this);
1506                                        continue;
1507                                }
1508                                iput(inode);
1509                        } else {
1510                                /* Fix the size in place */
1511                                err = fix_size_in_place(c, e);
1512                                if (err)
1513                                        return err;
1514                                if (e->inode)
1515                                        iput(e->inode);
1516                        }
1517                }
1518                this = rb_next(this);
1519                rb_erase(&e->rb, &c->size_tree);
1520                kfree(e);
1521        }
1522        return 0;
1523}
1524