linux/fs/jffs2/gc.c
<<
>>
Prefs
   1/*
   2 * JFFS2 -- Journalling Flash File System, Version 2.
   3 *
   4 * Copyright © 2001-2007 Red Hat, Inc.
   5 *
   6 * Created by David Woodhouse <dwmw2@infradead.org>
   7 *
   8 * For licensing information, see the file 'LICENCE' in this directory.
   9 *
  10 */
  11
  12#include <linux/kernel.h>
  13#include <linux/mtd/mtd.h>
  14#include <linux/slab.h>
  15#include <linux/pagemap.h>
  16#include <linux/crc32.h>
  17#include <linux/compiler.h>
  18#include <linux/stat.h>
  19#include "nodelist.h"
  20#include "compr.h"
  21
  22static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
  23                                          struct jffs2_inode_cache *ic,
  24                                          struct jffs2_raw_node_ref *raw);
  25static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  26                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
  27static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  28                                        struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
  29static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  30                                        struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
  31static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  32                                      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
  33                                      uint32_t start, uint32_t end);
  34static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  35                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
  36                                       uint32_t start, uint32_t end);
  37static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
  38                               struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
  39
  40/* Called with erase_completion_lock held */
  41static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
  42{
  43        struct jffs2_eraseblock *ret;
  44        struct list_head *nextlist = NULL;
  45        int n = jiffies % 128;
  46
  47        /* Pick an eraseblock to garbage collect next. This is where we'll
  48           put the clever wear-levelling algorithms. Eventually.  */
  49        /* We possibly want to favour the dirtier blocks more when the
  50           number of free blocks is low. */
  51again:
  52        if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
  53                D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
  54                nextlist = &c->bad_used_list;
  55        } else if (n < 50 && !list_empty(&c->erasable_list)) {
  56                /* Note that most of them will have gone directly to be erased.
  57                   So don't favour the erasable_list _too_ much. */
  58                D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
  59                nextlist = &c->erasable_list;
  60        } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
  61                /* Most of the time, pick one off the very_dirty list */
  62                D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
  63                nextlist = &c->very_dirty_list;
  64        } else if (n < 126 && !list_empty(&c->dirty_list)) {
  65                D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
  66                nextlist = &c->dirty_list;
  67        } else if (!list_empty(&c->clean_list)) {
  68                D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
  69                nextlist = &c->clean_list;
  70        } else if (!list_empty(&c->dirty_list)) {
  71                D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
  72
  73                nextlist = &c->dirty_list;
  74        } else if (!list_empty(&c->very_dirty_list)) {
  75                D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
  76                nextlist = &c->very_dirty_list;
  77        } else if (!list_empty(&c->erasable_list)) {
  78                D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
  79
  80                nextlist = &c->erasable_list;
  81        } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
  82                /* There are blocks are wating for the wbuf sync */
  83                D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
  84                spin_unlock(&c->erase_completion_lock);
  85                jffs2_flush_wbuf_pad(c);
  86                spin_lock(&c->erase_completion_lock);
  87                goto again;
  88        } else {
  89                /* Eep. All were empty */
  90                D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
  91                return NULL;
  92        }
  93
  94        ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
  95        list_del(&ret->list);
  96        c->gcblock = ret;
  97        ret->gc_node = ret->first_node;
  98        if (!ret->gc_node) {
  99                printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
 100                BUG();
 101        }
 102
 103        /* Have we accidentally picked a clean block with wasted space ? */
 104        if (ret->wasted_size) {
 105                D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
 106                ret->dirty_size += ret->wasted_size;
 107                c->wasted_size -= ret->wasted_size;
 108                c->dirty_size += ret->wasted_size;
 109                ret->wasted_size = 0;
 110        }
 111
 112        return ret;
 113}
 114
 115/* jffs2_garbage_collect_pass
 116 * Make a single attempt to progress GC. Move one node, and possibly
 117 * start erasing one eraseblock.
 118 */
 119int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
 120{
 121        struct jffs2_inode_info *f;
 122        struct jffs2_inode_cache *ic;
 123        struct jffs2_eraseblock *jeb;
 124        struct jffs2_raw_node_ref *raw;
 125        uint32_t gcblock_dirty;
 126        int ret = 0, inum, nlink;
 127        int xattr = 0;
 128
 129        if (down_interruptible(&c->alloc_sem))
 130                return -EINTR;
 131
 132        for (;;) {
 133                spin_lock(&c->erase_completion_lock);
 134                if (!c->unchecked_size)
 135                        break;
 136
 137                /* We can't start doing GC yet. We haven't finished checking
 138                   the node CRCs etc. Do it now. */
 139
 140                /* checked_ino is protected by the alloc_sem */
 141                if (c->checked_ino > c->highest_ino && xattr) {
 142                        printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
 143                               c->unchecked_size);
 144                        jffs2_dbg_dump_block_lists_nolock(c);
 145                        spin_unlock(&c->erase_completion_lock);
 146                        up(&c->alloc_sem);
 147                        return -ENOSPC;
 148                }
 149
 150                spin_unlock(&c->erase_completion_lock);
 151
 152                if (!xattr)
 153                        xattr = jffs2_verify_xattr(c);
 154
 155                spin_lock(&c->inocache_lock);
 156
 157                ic = jffs2_get_ino_cache(c, c->checked_ino++);
 158
 159                if (!ic) {
 160                        spin_unlock(&c->inocache_lock);
 161                        continue;
 162                }
 163
 164                if (!ic->nlink) {
 165                        D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
 166                                  ic->ino));
 167                        spin_unlock(&c->inocache_lock);
 168                        jffs2_xattr_delete_inode(c, ic);
 169                        continue;
 170                }
 171                switch(ic->state) {
 172                case INO_STATE_CHECKEDABSENT:
 173                case INO_STATE_PRESENT:
 174                        D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
 175                        spin_unlock(&c->inocache_lock);
 176                        continue;
 177
 178                case INO_STATE_GC:
 179                case INO_STATE_CHECKING:
 180                        printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
 181                        spin_unlock(&c->inocache_lock);
 182                        BUG();
 183
 184                case INO_STATE_READING:
 185                        /* We need to wait for it to finish, lest we move on
 186                           and trigger the BUG() above while we haven't yet
 187                           finished checking all its nodes */
 188                        D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
 189                        /* We need to come back again for the _same_ inode. We've
 190                         made no progress in this case, but that should be OK */
 191                        c->checked_ino--;
 192
 193                        up(&c->alloc_sem);
 194                        sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 195                        return 0;
 196
 197                default:
 198                        BUG();
 199
 200                case INO_STATE_UNCHECKED:
 201                        ;
 202                }
 203                ic->state = INO_STATE_CHECKING;
 204                spin_unlock(&c->inocache_lock);
 205
 206                D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
 207
 208                ret = jffs2_do_crccheck_inode(c, ic);
 209                if (ret)
 210                        printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
 211
 212                jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
 213                up(&c->alloc_sem);
 214                return ret;
 215        }
 216
 217        /* First, work out which block we're garbage-collecting */
 218        jeb = c->gcblock;
 219
 220        if (!jeb)
 221                jeb = jffs2_find_gc_block(c);
 222
 223        if (!jeb) {
 224                D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
 225                spin_unlock(&c->erase_completion_lock);
 226                up(&c->alloc_sem);
 227                return -EIO;
 228        }
 229
 230        D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
 231        D1(if (c->nextblock)
 232           printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
 233
 234        if (!jeb->used_size) {
 235                up(&c->alloc_sem);
 236                goto eraseit;
 237        }
 238
 239        raw = jeb->gc_node;
 240        gcblock_dirty = jeb->dirty_size;
 241
 242        while(ref_obsolete(raw)) {
 243                D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
 244                raw = ref_next(raw);
 245                if (unlikely(!raw)) {
 246                        printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
 247                        printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
 248                               jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
 249                        jeb->gc_node = raw;
 250                        spin_unlock(&c->erase_completion_lock);
 251                        up(&c->alloc_sem);
 252                        BUG();
 253                }
 254        }
 255        jeb->gc_node = raw;
 256
 257        D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
 258
 259        if (!raw->next_in_ino) {
 260                /* Inode-less node. Clean marker, snapshot or something like that */
 261                spin_unlock(&c->erase_completion_lock);
 262                if (ref_flags(raw) == REF_PRISTINE) {
 263                        /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
 264                        jffs2_garbage_collect_pristine(c, NULL, raw);
 265                } else {
 266                        /* Just mark it obsolete */
 267                        jffs2_mark_node_obsolete(c, raw);
 268                }
 269                up(&c->alloc_sem);
 270                goto eraseit_lock;
 271        }
 272
 273        ic = jffs2_raw_ref_to_ic(raw);
 274
 275#ifdef CONFIG_JFFS2_FS_XATTR
 276        /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
 277         * We can decide whether this node is inode or xattr by ic->class.     */
 278        if (ic->class == RAWNODE_CLASS_XATTR_DATUM
 279            || ic->class == RAWNODE_CLASS_XATTR_REF) {
 280                spin_unlock(&c->erase_completion_lock);
 281
 282                if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
 283                        ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
 284                } else {
 285                        ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
 286                }
 287                goto test_gcnode;
 288        }
 289#endif
 290
 291        /* We need to hold the inocache. Either the erase_completion_lock or
 292           the inocache_lock are sufficient; we trade down since the inocache_lock
 293           causes less contention. */
 294        spin_lock(&c->inocache_lock);
 295
 296        spin_unlock(&c->erase_completion_lock);
 297
 298        D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
 299
 300        /* Three possibilities:
 301           1. Inode is already in-core. We must iget it and do proper
 302              updating to its fragtree, etc.
 303           2. Inode is not in-core, node is REF_PRISTINE. We lock the
 304              inocache to prevent a read_inode(), copy the node intact.
 305           3. Inode is not in-core, node is not pristine. We must iget()
 306              and take the slow path.
 307        */
 308
 309        switch(ic->state) {
 310        case INO_STATE_CHECKEDABSENT:
 311                /* It's been checked, but it's not currently in-core.
 312                   We can just copy any pristine nodes, but have
 313                   to prevent anyone else from doing read_inode() while
 314                   we're at it, so we set the state accordingly */
 315                if (ref_flags(raw) == REF_PRISTINE)
 316                        ic->state = INO_STATE_GC;
 317                else {
 318                        D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
 319                                  ic->ino));
 320                }
 321                break;
 322
 323        case INO_STATE_PRESENT:
 324                /* It's in-core. GC must iget() it. */
 325                break;
 326
 327        case INO_STATE_UNCHECKED:
 328        case INO_STATE_CHECKING:
 329        case INO_STATE_GC:
 330                /* Should never happen. We should have finished checking
 331                   by the time we actually start doing any GC, and since
 332                   we're holding the alloc_sem, no other garbage collection
 333                   can happen.
 334                */
 335                printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
 336                       ic->ino, ic->state);
 337                up(&c->alloc_sem);
 338                spin_unlock(&c->inocache_lock);
 339                BUG();
 340
 341        case INO_STATE_READING:
 342                /* Someone's currently trying to read it. We must wait for
 343                   them to finish and then go through the full iget() route
 344                   to do the GC. However, sometimes read_inode() needs to get
 345                   the alloc_sem() (for marking nodes invalid) so we must
 346                   drop the alloc_sem before sleeping. */
 347
 348                up(&c->alloc_sem);
 349                D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
 350                          ic->ino, ic->state));
 351                sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 352                /* And because we dropped the alloc_sem we must start again from the
 353                   beginning. Ponder chance of livelock here -- we're returning success
 354                   without actually making any progress.
 355
 356                   Q: What are the chances that the inode is back in INO_STATE_READING
 357                   again by the time we next enter this function? And that this happens
 358                   enough times to cause a real delay?
 359
 360                   A: Small enough that I don't care :)
 361                */
 362                return 0;
 363        }
 364
 365        /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
 366           node intact, and we don't have to muck about with the fragtree etc.
 367           because we know it's not in-core. If it _was_ in-core, we go through
 368           all the iget() crap anyway */
 369
 370        if (ic->state == INO_STATE_GC) {
 371                spin_unlock(&c->inocache_lock);
 372
 373                ret = jffs2_garbage_collect_pristine(c, ic, raw);
 374
 375                spin_lock(&c->inocache_lock);
 376                ic->state = INO_STATE_CHECKEDABSENT;
 377                wake_up(&c->inocache_wq);
 378
 379                if (ret != -EBADFD) {
 380                        spin_unlock(&c->inocache_lock);
 381                        goto test_gcnode;
 382                }
 383
 384                /* Fall through if it wanted us to, with inocache_lock held */
 385        }
 386
 387        /* Prevent the fairly unlikely race where the gcblock is
 388           entirely obsoleted by the final close of a file which had
 389           the only valid nodes in the block, followed by erasure,
 390           followed by freeing of the ic because the erased block(s)
 391           held _all_ the nodes of that inode.... never been seen but
 392           it's vaguely possible. */
 393
 394        inum = ic->ino;
 395        nlink = ic->nlink;
 396        spin_unlock(&c->inocache_lock);
 397
 398        f = jffs2_gc_fetch_inode(c, inum, nlink);
 399        if (IS_ERR(f)) {
 400                ret = PTR_ERR(f);
 401                goto release_sem;
 402        }
 403        if (!f) {
 404                ret = 0;
 405                goto release_sem;
 406        }
 407
 408        ret = jffs2_garbage_collect_live(c, jeb, raw, f);
 409
 410        jffs2_gc_release_inode(c, f);
 411
 412 test_gcnode:
 413        if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
 414                /* Eep. This really should never happen. GC is broken */
 415                printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
 416                ret = -ENOSPC;
 417        }
 418 release_sem:
 419        up(&c->alloc_sem);
 420
 421 eraseit_lock:
 422        /* If we've finished this block, start it erasing */
 423        spin_lock(&c->erase_completion_lock);
 424
 425 eraseit:
 426        if (c->gcblock && !c->gcblock->used_size) {
 427                D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
 428                /* We're GC'ing an empty block? */
 429                list_add_tail(&c->gcblock->list, &c->erase_pending_list);
 430                c->gcblock = NULL;
 431                c->nr_erasing_blocks++;
 432                jffs2_erase_pending_trigger(c);
 433        }
 434        spin_unlock(&c->erase_completion_lock);
 435
 436        return ret;
 437}
 438
 439static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
 440                                      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
 441{
 442        struct jffs2_node_frag *frag;
 443        struct jffs2_full_dnode *fn = NULL;
 444        struct jffs2_full_dirent *fd;
 445        uint32_t start = 0, end = 0, nrfrags = 0;
 446        int ret = 0;
 447
 448        down(&f->sem);
 449
 450        /* Now we have the lock for this inode. Check that it's still the one at the head
 451           of the list. */
 452
 453        spin_lock(&c->erase_completion_lock);
 454
 455        if (c->gcblock != jeb) {
 456                spin_unlock(&c->erase_completion_lock);
 457                D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
 458                goto upnout;
 459        }
 460        if (ref_obsolete(raw)) {
 461                spin_unlock(&c->erase_completion_lock);
 462                D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
 463                /* They'll call again */
 464                goto upnout;
 465        }
 466        spin_unlock(&c->erase_completion_lock);
 467
 468        /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
 469        if (f->metadata && f->metadata->raw == raw) {
 470                fn = f->metadata;
 471                ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
 472                goto upnout;
 473        }
 474
 475        /* FIXME. Read node and do lookup? */
 476        for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
 477                if (frag->node && frag->node->raw == raw) {
 478                        fn = frag->node;
 479                        end = frag->ofs + frag->size;
 480                        if (!nrfrags++)
 481                                start = frag->ofs;
 482                        if (nrfrags == frag->node->frags)
 483                                break; /* We've found them all */
 484                }
 485        }
 486        if (fn) {
 487                if (ref_flags(raw) == REF_PRISTINE) {
 488                        ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
 489                        if (!ret) {
 490                                /* Urgh. Return it sensibly. */
 491                                frag->node->raw = f->inocache->nodes;
 492                        }
 493                        if (ret != -EBADFD)
 494                                goto upnout;
 495                }
 496                /* We found a datanode. Do the GC */
 497                if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
 498                        /* It crosses a page boundary. Therefore, it must be a hole. */
 499                        ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
 500                } else {
 501                        /* It could still be a hole. But we GC the page this way anyway */
 502                        ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
 503                }
 504                goto upnout;
 505        }
 506
 507        /* Wasn't a dnode. Try dirent */
 508        for (fd = f->dents; fd; fd=fd->next) {
 509                if (fd->raw == raw)
 510                        break;
 511        }
 512
 513        if (fd && fd->ino) {
 514                ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
 515        } else if (fd) {
 516                ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
 517        } else {
 518                printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
 519                       ref_offset(raw), f->inocache->ino);
 520                if (ref_obsolete(raw)) {
 521                        printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
 522                } else {
 523                        jffs2_dbg_dump_node(c, ref_offset(raw));
 524                        BUG();
 525                }
 526        }
 527 upnout:
 528        up(&f->sem);
 529
 530        return ret;
 531}
 532
 533static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
 534                                          struct jffs2_inode_cache *ic,
 535                                          struct jffs2_raw_node_ref *raw)
 536{
 537        union jffs2_node_union *node;
 538        size_t retlen;
 539        int ret;
 540        uint32_t phys_ofs, alloclen;
 541        uint32_t crc, rawlen;
 542        int retried = 0;
 543
 544        D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
 545
 546        alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
 547
 548        /* Ask for a small amount of space (or the totlen if smaller) because we
 549           don't want to force wastage of the end of a block if splitting would
 550           work. */
 551        if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
 552                alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
 553
 554        ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
 555        /* 'rawlen' is not the exact summary size; it is only an upper estimation */
 556
 557        if (ret)
 558                return ret;
 559
 560        if (alloclen < rawlen) {
 561                /* Doesn't fit untouched. We'll go the old route and split it */
 562                return -EBADFD;
 563        }
 564
 565        node = kmalloc(rawlen, GFP_KERNEL);
 566        if (!node)
 567                return -ENOMEM;
 568
 569        ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
 570        if (!ret && retlen != rawlen)
 571                ret = -EIO;
 572        if (ret)
 573                goto out_node;
 574
 575        crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
 576        if (je32_to_cpu(node->u.hdr_crc) != crc) {
 577                printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 578                       ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
 579                goto bail;
 580        }
 581
 582        switch(je16_to_cpu(node->u.nodetype)) {
 583        case JFFS2_NODETYPE_INODE:
 584                crc = crc32(0, node, sizeof(node->i)-8);
 585                if (je32_to_cpu(node->i.node_crc) != crc) {
 586                        printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 587                               ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
 588                        goto bail;
 589                }
 590
 591                if (je32_to_cpu(node->i.dsize)) {
 592                        crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
 593                        if (je32_to_cpu(node->i.data_crc) != crc) {
 594                                printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 595                                       ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
 596                                goto bail;
 597                        }
 598                }
 599                break;
 600
 601        case JFFS2_NODETYPE_DIRENT:
 602                crc = crc32(0, node, sizeof(node->d)-8);
 603                if (je32_to_cpu(node->d.node_crc) != crc) {
 604                        printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 605                               ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
 606                        goto bail;
 607                }
 608
 609                if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
 610                        printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw));
 611                        goto bail;
 612                }
 613
 614                if (node->d.nsize) {
 615                        crc = crc32(0, node->d.name, node->d.nsize);
 616                        if (je32_to_cpu(node->d.name_crc) != crc) {
 617                                printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 618                                       ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
 619                                goto bail;
 620                        }
 621                }
 622                break;
 623        default:
 624                /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
 625                if (ic) {
 626                        printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
 627                               ref_offset(raw), je16_to_cpu(node->u.nodetype));
 628                        goto bail;
 629                }
 630        }
 631
 632        /* OK, all the CRCs are good; this node can just be copied as-is. */
 633 retry:
 634        phys_ofs = write_ofs(c);
 635
 636        ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
 637
 638        if (ret || (retlen != rawlen)) {
 639                printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
 640                       rawlen, phys_ofs, ret, retlen);
 641                if (retlen) {
 642                        jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
 643                } else {
 644                        printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
 645                }
 646                if (!retried) {
 647                        /* Try to reallocate space and retry */
 648                        uint32_t dummy;
 649                        struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
 650
 651                        retried = 1;
 652
 653                        D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
 654
 655                        jffs2_dbg_acct_sanity_check(c,jeb);
 656                        jffs2_dbg_acct_paranoia_check(c, jeb);
 657
 658                        ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
 659                                                /* this is not the exact summary size of it,
 660                                                        it is only an upper estimation */
 661
 662                        if (!ret) {
 663                                D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
 664
 665                                jffs2_dbg_acct_sanity_check(c,jeb);
 666                                jffs2_dbg_acct_paranoia_check(c, jeb);
 667
 668                                goto retry;
 669                        }
 670                        D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
 671                }
 672
 673                if (!ret)
 674                        ret = -EIO;
 675                goto out_node;
 676        }
 677        jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
 678
 679        jffs2_mark_node_obsolete(c, raw);
 680        D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
 681
 682 out_node:
 683        kfree(node);
 684        return ret;
 685 bail:
 686        ret = -EBADFD;
 687        goto out_node;
 688}
 689
 690static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 691                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
 692{
 693        struct jffs2_full_dnode *new_fn;
 694        struct jffs2_raw_inode ri;
 695        struct jffs2_node_frag *last_frag;
 696        union jffs2_device_node dev;
 697        char *mdata = NULL, mdatalen = 0;
 698        uint32_t alloclen, ilen;
 699        int ret;
 700
 701        if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
 702            S_ISCHR(JFFS2_F_I_MODE(f)) ) {
 703                /* For these, we don't actually need to read the old node */
 704                mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
 705                mdata = (char *)&dev;
 706                D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
 707        } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
 708                mdatalen = fn->size;
 709                mdata = kmalloc(fn->size, GFP_KERNEL);
 710                if (!mdata) {
 711                        printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
 712                        return -ENOMEM;
 713                }
 714                ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
 715                if (ret) {
 716                        printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
 717                        kfree(mdata);
 718                        return ret;
 719                }
 720                D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
 721
 722        }
 723
 724        ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
 725                                JFFS2_SUMMARY_INODE_SIZE);
 726        if (ret) {
 727                printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
 728                       sizeof(ri)+ mdatalen, ret);
 729                goto out;
 730        }
 731
 732        last_frag = frag_last(&f->fragtree);
 733        if (last_frag)
 734                /* Fetch the inode length from the fragtree rather then
 735                 * from i_size since i_size may have not been updated yet */
 736                ilen = last_frag->ofs + last_frag->size;
 737        else
 738                ilen = JFFS2_F_I_SIZE(f);
 739
 740        memset(&ri, 0, sizeof(ri));
 741        ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 742        ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
 743        ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
 744        ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
 745
 746        ri.ino = cpu_to_je32(f->inocache->ino);
 747        ri.version = cpu_to_je32(++f->highest_version);
 748        ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
 749        ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
 750        ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
 751        ri.isize = cpu_to_je32(ilen);
 752        ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
 753        ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
 754        ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
 755        ri.offset = cpu_to_je32(0);
 756        ri.csize = cpu_to_je32(mdatalen);
 757        ri.dsize = cpu_to_je32(mdatalen);
 758        ri.compr = JFFS2_COMPR_NONE;
 759        ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
 760        ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
 761
 762        new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
 763
 764        if (IS_ERR(new_fn)) {
 765                printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
 766                ret = PTR_ERR(new_fn);
 767                goto out;
 768        }
 769        jffs2_mark_node_obsolete(c, fn->raw);
 770        jffs2_free_full_dnode(fn);
 771        f->metadata = new_fn;
 772 out:
 773        if (S_ISLNK(JFFS2_F_I_MODE(f)))
 774                kfree(mdata);
 775        return ret;
 776}
 777
 778static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 779                                        struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
 780{
 781        struct jffs2_full_dirent *new_fd;
 782        struct jffs2_raw_dirent rd;
 783        uint32_t alloclen;
 784        int ret;
 785
 786        rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 787        rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
 788        rd.nsize = strlen(fd->name);
 789        rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
 790        rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
 791
 792        rd.pino = cpu_to_je32(f->inocache->ino);
 793        rd.version = cpu_to_je32(++f->highest_version);
 794        rd.ino = cpu_to_je32(fd->ino);
 795        /* If the times on this inode were set by explicit utime() they can be different,
 796           so refrain from splatting them. */
 797        if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
 798                rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
 799        else
 800                rd.mctime = cpu_to_je32(0);
 801        rd.type = fd->type;
 802        rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
 803        rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
 804
 805        ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
 806                                JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
 807        if (ret) {
 808                printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
 809                       sizeof(rd)+rd.nsize, ret);
 810                return ret;
 811        }
 812        new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
 813
 814        if (IS_ERR(new_fd)) {
 815                printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
 816                return PTR_ERR(new_fd);
 817        }
 818        jffs2_add_fd_to_list(c, new_fd, &f->dents);
 819        return 0;
 820}
 821
 822static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 823                                        struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
 824{
 825        struct jffs2_full_dirent **fdp = &f->dents;
 826        int found = 0;
 827
 828        /* On a medium where we can't actually mark nodes obsolete
 829           pernamently, such as NAND flash, we need to work out
 830           whether this deletion dirent is still needed to actively
 831           delete a 'real' dirent with the same name that's still
 832           somewhere else on the flash. */
 833        if (!jffs2_can_mark_obsolete(c)) {
 834                struct jffs2_raw_dirent *rd;
 835                struct jffs2_raw_node_ref *raw;
 836                int ret;
 837                size_t retlen;
 838                int name_len = strlen(fd->name);
 839                uint32_t name_crc = crc32(0, fd->name, name_len);
 840                uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
 841
 842                rd = kmalloc(rawlen, GFP_KERNEL);
 843                if (!rd)
 844                        return -ENOMEM;
 845
 846                /* Prevent the erase code from nicking the obsolete node refs while
 847                   we're looking at them. I really don't like this extra lock but
 848                   can't see any alternative. Suggestions on a postcard to... */
 849                down(&c->erase_free_sem);
 850
 851                for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
 852
 853                        cond_resched();
 854
 855                        /* We only care about obsolete ones */
 856                        if (!(ref_obsolete(raw)))
 857                                continue;
 858
 859                        /* Any dirent with the same name is going to have the same length... */
 860                        if (ref_totlen(c, NULL, raw) != rawlen)
 861                                continue;
 862
 863                        /* Doesn't matter if there's one in the same erase block. We're going to
 864                           delete it too at the same time. */
 865                        if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
 866                                continue;
 867
 868                        D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
 869
 870                        /* This is an obsolete node belonging to the same directory, and it's of the right
 871                           length. We need to take a closer look...*/
 872                        ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
 873                        if (ret) {
 874                                printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
 875                                /* If we can't read it, we don't need to continue to obsolete it. Continue */
 876                                continue;
 877                        }
 878                        if (retlen != rawlen) {
 879                                printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
 880                                       retlen, rawlen, ref_offset(raw));
 881                                continue;
 882                        }
 883
 884                        if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
 885                                continue;
 886
 887                        /* If the name CRC doesn't match, skip */
 888                        if (je32_to_cpu(rd->name_crc) != name_crc)
 889                                continue;
 890
 891                        /* If the name length doesn't match, or it's another deletion dirent, skip */
 892                        if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
 893                                continue;
 894
 895                        /* OK, check the actual name now */
 896                        if (memcmp(rd->name, fd->name, name_len))
 897                                continue;
 898
 899                        /* OK. The name really does match. There really is still an older node on
 900                           the flash which our deletion dirent obsoletes. So we have to write out
 901                           a new deletion dirent to replace it */
 902                        up(&c->erase_free_sem);
 903
 904                        D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
 905                                  ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
 906                        kfree(rd);
 907
 908                        return jffs2_garbage_collect_dirent(c, jeb, f, fd);
 909                }
 910
 911                up(&c->erase_free_sem);
 912                kfree(rd);
 913        }
 914
 915        /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
 916           we should update the metadata node with those times accordingly */
 917
 918        /* No need for it any more. Just mark it obsolete and remove it from the list */
 919        while (*fdp) {
 920                if ((*fdp) == fd) {
 921                        found = 1;
 922                        *fdp = fd->next;
 923                        break;
 924                }
 925                fdp = &(*fdp)->next;
 926        }
 927        if (!found) {
 928                printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
 929        }
 930        jffs2_mark_node_obsolete(c, fd->raw);
 931        jffs2_free_full_dirent(fd);
 932        return 0;
 933}
 934
 935static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 936                                      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
 937                                      uint32_t start, uint32_t end)
 938{
 939        struct jffs2_raw_inode ri;
 940        struct jffs2_node_frag *frag;
 941        struct jffs2_full_dnode *new_fn;
 942        uint32_t alloclen, ilen;
 943        int ret;
 944
 945        D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
 946                  f->inocache->ino, start, end));
 947
 948        memset(&ri, 0, sizeof(ri));
 949
 950        if(fn->frags > 1) {
 951                size_t readlen;
 952                uint32_t crc;
 953                /* It's partially obsoleted by a later write. So we have to
 954                   write it out again with the _same_ version as before */
 955                ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
 956                if (readlen != sizeof(ri) || ret) {
 957                        printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
 958                        goto fill;
 959                }
 960                if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
 961                        printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
 962                               ref_offset(fn->raw),
 963                               je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
 964                        return -EIO;
 965                }
 966                if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
 967                        printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
 968                               ref_offset(fn->raw),
 969                               je32_to_cpu(ri.totlen), sizeof(ri));
 970                        return -EIO;
 971                }
 972                crc = crc32(0, &ri, sizeof(ri)-8);
 973                if (crc != je32_to_cpu(ri.node_crc)) {
 974                        printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
 975                               ref_offset(fn->raw),
 976                               je32_to_cpu(ri.node_crc), crc);
 977                        /* FIXME: We could possibly deal with this by writing new holes for each frag */
 978                        printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
 979                               start, end, f->inocache->ino);
 980                        goto fill;
 981                }
 982                if (ri.compr != JFFS2_COMPR_ZERO) {
 983                        printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
 984                        printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
 985                               start, end, f->inocache->ino);
 986                        goto fill;
 987                }
 988        } else {
 989        fill:
 990                ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 991                ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
 992                ri.totlen = cpu_to_je32(sizeof(ri));
 993                ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
 994
 995                ri.ino = cpu_to_je32(f->inocache->ino);
 996                ri.version = cpu_to_je32(++f->highest_version);
 997                ri.offset = cpu_to_je32(start);
 998                ri.dsize = cpu_to_je32(end - start);
 999                ri.csize = cpu_to_je32(0);
1000                ri.compr = JFFS2_COMPR_ZERO;
1001        }
1002
1003        frag = frag_last(&f->fragtree);
1004        if (frag)
1005                /* Fetch the inode length from the fragtree rather then
1006                 * from i_size since i_size may have not been updated yet */
1007                ilen = frag->ofs + frag->size;
1008        else
1009                ilen = JFFS2_F_I_SIZE(f);
1010
1011        ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1012        ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1013        ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1014        ri.isize = cpu_to_je32(ilen);
1015        ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1016        ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1017        ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1018        ri.data_crc = cpu_to_je32(0);
1019        ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1020
1021        ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1022                                     JFFS2_SUMMARY_INODE_SIZE);
1023        if (ret) {
1024                printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1025                       sizeof(ri), ret);
1026                return ret;
1027        }
1028        new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1029
1030        if (IS_ERR(new_fn)) {
1031                printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1032                return PTR_ERR(new_fn);
1033        }
1034        if (je32_to_cpu(ri.version) == f->highest_version) {
1035                jffs2_add_full_dnode_to_inode(c, f, new_fn);
1036                if (f->metadata) {
1037                        jffs2_mark_node_obsolete(c, f->metadata->raw);
1038                        jffs2_free_full_dnode(f->metadata);
1039                        f->metadata = NULL;
1040                }
1041                return 0;
1042        }
1043
1044        /*
1045         * We should only get here in the case where the node we are
1046         * replacing had more than one frag, so we kept the same version
1047         * number as before. (Except in case of error -- see 'goto fill;'
1048         * above.)
1049         */
1050        D1(if(unlikely(fn->frags <= 1)) {
1051                printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1052                       fn->frags, je32_to_cpu(ri.version), f->highest_version,
1053                       je32_to_cpu(ri.ino));
1054        });
1055
1056        /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1057        mark_ref_normal(new_fn->raw);
1058
1059        for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1060             frag; frag = frag_next(frag)) {
1061                if (frag->ofs > fn->size + fn->ofs)
1062                        break;
1063                if (frag->node == fn) {
1064                        frag->node = new_fn;
1065                        new_fn->frags++;
1066                        fn->frags--;
1067                }
1068        }
1069        if (fn->frags) {
1070                printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1071                BUG();
1072        }
1073        if (!new_fn->frags) {
1074                printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1075                BUG();
1076        }
1077
1078        jffs2_mark_node_obsolete(c, fn->raw);
1079        jffs2_free_full_dnode(fn);
1080
1081        return 0;
1082}
1083
1084static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1085                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1086                                       uint32_t start, uint32_t end)
1087{
1088        struct jffs2_full_dnode *new_fn;
1089        struct jffs2_raw_inode ri;
1090        uint32_t alloclen, offset, orig_end, orig_start;
1091        int ret = 0;
1092        unsigned char *comprbuf = NULL, *writebuf;
1093        unsigned long pg;
1094        unsigned char *pg_ptr;
1095
1096        memset(&ri, 0, sizeof(ri));
1097
1098        D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1099                  f->inocache->ino, start, end));
1100
1101        orig_end = end;
1102        orig_start = start;
1103
1104        if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1105                /* Attempt to do some merging. But only expand to cover logically
1106                   adjacent frags if the block containing them is already considered
1107                   to be dirty. Otherwise we end up with GC just going round in
1108                   circles dirtying the nodes it already wrote out, especially
1109                   on NAND where we have small eraseblocks and hence a much higher
1110                   chance of nodes having to be split to cross boundaries. */
1111
1112                struct jffs2_node_frag *frag;
1113                uint32_t min, max;
1114
1115                min = start & ~(PAGE_CACHE_SIZE-1);
1116                max = min + PAGE_CACHE_SIZE;
1117
1118                frag = jffs2_lookup_node_frag(&f->fragtree, start);
1119
1120                /* BUG_ON(!frag) but that'll happen anyway... */
1121
1122                BUG_ON(frag->ofs != start);
1123
1124                /* First grow down... */
1125                while((frag = frag_prev(frag)) && frag->ofs >= min) {
1126
1127                        /* If the previous frag doesn't even reach the beginning, there's
1128                           excessive fragmentation. Just merge. */
1129                        if (frag->ofs > min) {
1130                                D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1131                                          frag->ofs, frag->ofs+frag->size));
1132                                start = frag->ofs;
1133                                continue;
1134                        }
1135                        /* OK. This frag holds the first byte of the page. */
1136                        if (!frag->node || !frag->node->raw) {
1137                                D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1138                                          frag->ofs, frag->ofs+frag->size));
1139                                break;
1140                        } else {
1141
1142                                /* OK, it's a frag which extends to the beginning of the page. Does it live
1143                                   in a block which is still considered clean? If so, don't obsolete it.
1144                                   If not, cover it anyway. */
1145
1146                                struct jffs2_raw_node_ref *raw = frag->node->raw;
1147                                struct jffs2_eraseblock *jeb;
1148
1149                                jeb = &c->blocks[raw->flash_offset / c->sector_size];
1150
1151                                if (jeb == c->gcblock) {
1152                                        D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1153                                                  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1154                                        start = frag->ofs;
1155                                        break;
1156                                }
1157                                if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1158                                        D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1159                                                  frag->ofs, frag->ofs+frag->size, jeb->offset));
1160                                        break;
1161                                }
1162
1163                                D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1164                                                  frag->ofs, frag->ofs+frag->size, jeb->offset));
1165                                start = frag->ofs;
1166                                break;
1167                        }
1168                }
1169
1170                /* ... then up */
1171
1172                /* Find last frag which is actually part of the node we're to GC. */
1173                frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1174
1175                while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1176
1177                        /* If the previous frag doesn't even reach the beginning, there's lots
1178                           of fragmentation. Just merge. */
1179                        if (frag->ofs+frag->size < max) {
1180                                D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1181                                          frag->ofs, frag->ofs+frag->size));
1182                                end = frag->ofs + frag->size;
1183                                continue;
1184                        }
1185
1186                        if (!frag->node || !frag->node->raw) {
1187                                D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1188                                          frag->ofs, frag->ofs+frag->size));
1189                                break;
1190                        } else {
1191
1192                                /* OK, it's a frag which extends to the beginning of the page. Does it live
1193                                   in a block which is still considered clean? If so, don't obsolete it.
1194                                   If not, cover it anyway. */
1195
1196                                struct jffs2_raw_node_ref *raw = frag->node->raw;
1197                                struct jffs2_eraseblock *jeb;
1198
1199                                jeb = &c->blocks[raw->flash_offset / c->sector_size];
1200
1201                                if (jeb == c->gcblock) {
1202                                        D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1203                                                  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1204                                        end = frag->ofs + frag->size;
1205                                        break;
1206                                }
1207                                if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1208                                        D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1209                                                  frag->ofs, frag->ofs+frag->size, jeb->offset));
1210                                        break;
1211                                }
1212
1213                                D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1214                                                  frag->ofs, frag->ofs+frag->size, jeb->offset));
1215                                end = frag->ofs + frag->size;
1216                                break;
1217                        }
1218                }
1219                D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1220                          orig_start, orig_end, start, end));
1221
1222                D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1223                BUG_ON(end < orig_end);
1224                BUG_ON(start > orig_start);
1225        }
1226
1227        /* First, use readpage() to read the appropriate page into the page cache */
1228        /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1229         *    triggered garbage collection in the first place?
1230         * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1231         *    page OK. We'll actually write it out again in commit_write, which is a little
1232         *    suboptimal, but at least we're correct.
1233         */
1234        pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1235
1236        if (IS_ERR(pg_ptr)) {
1237                printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1238                return PTR_ERR(pg_ptr);
1239        }
1240
1241        offset = start;
1242        while(offset < orig_end) {
1243                uint32_t datalen;
1244                uint32_t cdatalen;
1245                uint16_t comprtype = JFFS2_COMPR_NONE;
1246
1247                ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1248                                        &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1249
1250                if (ret) {
1251                        printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1252                               sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1253                        break;
1254                }
1255                cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1256                datalen = end - offset;
1257
1258                writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1259
1260                comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1261
1262                ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1263                ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1264                ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1265                ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1266
1267                ri.ino = cpu_to_je32(f->inocache->ino);
1268                ri.version = cpu_to_je32(++f->highest_version);
1269                ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1270                ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1271                ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1272                ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1273                ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1274                ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1275                ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1276                ri.offset = cpu_to_je32(offset);
1277                ri.csize = cpu_to_je32(cdatalen);
1278                ri.dsize = cpu_to_je32(datalen);
1279                ri.compr = comprtype & 0xff;
1280                ri.usercompr = (comprtype >> 8) & 0xff;
1281                ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1282                ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1283
1284                new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1285
1286                jffs2_free_comprbuf(comprbuf, writebuf);
1287
1288                if (IS_ERR(new_fn)) {
1289                        printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1290                        ret = PTR_ERR(new_fn);
1291                        break;
1292                }
1293                ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1294                offset += datalen;
1295                if (f->metadata) {
1296                        jffs2_mark_node_obsolete(c, f->metadata->raw);
1297                        jffs2_free_full_dnode(f->metadata);
1298                        f->metadata = NULL;
1299                }
1300        }
1301
1302        jffs2_gc_release_page(c, pg_ptr, &pg);
1303        return ret;
1304}
1305
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.