linux/fs/jbd2/checkpoint.c
<<
>>
Prefs
   1/*
   2 * linux/fs/jbd2/checkpoint.c
   3 *
   4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
   5 *
   6 * Copyright 1999 Red Hat Software --- All Rights Reserved
   7 *
   8 * This file is part of the Linux kernel and is made available under
   9 * the terms of the GNU General Public License, version 2, or at your
  10 * option, any later version, incorporated herein by reference.
  11 *
  12 * Checkpoint routines for the generic filesystem journaling code.
  13 * Part of the ext2fs journaling system.
  14 *
  15 * Checkpointing is the process of ensuring that a section of the log is
  16 * committed fully to disk, so that that portion of the log can be
  17 * reused.
  18 */
  19
  20#include <linux/time.h>
  21#include <linux/fs.h>
  22#include <linux/jbd2.h>
  23#include <linux/errno.h>
  24#include <linux/slab.h>
  25#include <linux/blkdev.h>
  26#include <trace/events/jbd2.h>
  27
  28/*
  29 * Unlink a buffer from a transaction checkpoint list.
  30 *
  31 * Called with j_list_lock held.
  32 */
  33static inline void __buffer_unlink_first(struct journal_head *jh)
  34{
  35        transaction_t *transaction = jh->b_cp_transaction;
  36
  37        jh->b_cpnext->b_cpprev = jh->b_cpprev;
  38        jh->b_cpprev->b_cpnext = jh->b_cpnext;
  39        if (transaction->t_checkpoint_list == jh) {
  40                transaction->t_checkpoint_list = jh->b_cpnext;
  41                if (transaction->t_checkpoint_list == jh)
  42                        transaction->t_checkpoint_list = NULL;
  43        }
  44}
  45
  46/*
  47 * Unlink a buffer from a transaction checkpoint(io) list.
  48 *
  49 * Called with j_list_lock held.
  50 */
  51static inline void __buffer_unlink(struct journal_head *jh)
  52{
  53        transaction_t *transaction = jh->b_cp_transaction;
  54
  55        __buffer_unlink_first(jh);
  56        if (transaction->t_checkpoint_io_list == jh) {
  57                transaction->t_checkpoint_io_list = jh->b_cpnext;
  58                if (transaction->t_checkpoint_io_list == jh)
  59                        transaction->t_checkpoint_io_list = NULL;
  60        }
  61}
  62
  63/*
  64 * Move a buffer from the checkpoint list to the checkpoint io list
  65 *
  66 * Called with j_list_lock held
  67 */
  68static inline void __buffer_relink_io(struct journal_head *jh)
  69{
  70        transaction_t *transaction = jh->b_cp_transaction;
  71
  72        __buffer_unlink_first(jh);
  73
  74        if (!transaction->t_checkpoint_io_list) {
  75                jh->b_cpnext = jh->b_cpprev = jh;
  76        } else {
  77                jh->b_cpnext = transaction->t_checkpoint_io_list;
  78                jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
  79                jh->b_cpprev->b_cpnext = jh;
  80                jh->b_cpnext->b_cpprev = jh;
  81        }
  82        transaction->t_checkpoint_io_list = jh;
  83}
  84
  85/*
  86 * Try to release a checkpointed buffer from its transaction.
  87 * Returns 1 if we released it and 2 if we also released the
  88 * whole transaction.
  89 *
  90 * Requires j_list_lock
  91 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
  92 */
  93static int __try_to_free_cp_buf(struct journal_head *jh)
  94{
  95        int ret = 0;
  96        struct buffer_head *bh = jh2bh(jh);
  97
  98        if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
  99            !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
 100                /*
 101                 * Get our reference so that bh cannot be freed before
 102                 * we unlock it
 103                 */
 104                get_bh(bh);
 105                JBUFFER_TRACE(jh, "remove from checkpoint list");
 106                ret = __jbd2_journal_remove_checkpoint(jh) + 1;
 107                jbd_unlock_bh_state(bh);
 108                BUFFER_TRACE(bh, "release");
 109                __brelse(bh);
 110        } else {
 111                jbd_unlock_bh_state(bh);
 112        }
 113        return ret;
 114}
 115
 116/*
 117 * __jbd2_log_wait_for_space: wait until there is space in the journal.
 118 *
 119 * Called under j-state_lock *only*.  It will be unlocked if we have to wait
 120 * for a checkpoint to free up some space in the log.
 121 */
 122void __jbd2_log_wait_for_space(journal_t *journal)
 123{
 124        int nblocks, space_left;
 125        /* assert_spin_locked(&journal->j_state_lock); */
 126
 127        nblocks = jbd_space_needed(journal);
 128        while (__jbd2_log_space_left(journal) < nblocks) {
 129                if (journal->j_flags & JBD2_ABORT)
 130                        return;
 131                write_unlock(&journal->j_state_lock);
 132                mutex_lock(&journal->j_checkpoint_mutex);
 133
 134                /*
 135                 * Test again, another process may have checkpointed while we
 136                 * were waiting for the checkpoint lock. If there are no
 137                 * transactions ready to be checkpointed, try to recover
 138                 * journal space by calling cleanup_journal_tail(), and if
 139                 * that doesn't work, by waiting for the currently committing
 140                 * transaction to complete.  If there is absolutely no way
 141                 * to make progress, this is either a BUG or corrupted
 142                 * filesystem, so abort the journal and leave a stack
 143                 * trace for forensic evidence.
 144                 */
 145                write_lock(&journal->j_state_lock);
 146                spin_lock(&journal->j_list_lock);
 147                nblocks = jbd_space_needed(journal);
 148                space_left = __jbd2_log_space_left(journal);
 149                if (space_left < nblocks) {
 150                        int chkpt = journal->j_checkpoint_transactions != NULL;
 151                        tid_t tid = 0;
 152
 153                        if (journal->j_committing_transaction)
 154                                tid = journal->j_committing_transaction->t_tid;
 155                        spin_unlock(&journal->j_list_lock);
 156                        write_unlock(&journal->j_state_lock);
 157                        if (chkpt) {
 158                                jbd2_log_do_checkpoint(journal);
 159                        } else if (jbd2_cleanup_journal_tail(journal) == 0) {
 160                                /* We were able to recover space; yay! */
 161                                ;
 162                        } else if (tid) {
 163                                jbd2_log_wait_commit(journal, tid);
 164                        } else {
 165                                printk(KERN_ERR "%s: needed %d blocks and "
 166                                       "only had %d space available\n",
 167                                       __func__, nblocks, space_left);
 168                                printk(KERN_ERR "%s: no way to get more "
 169                                       "journal space in %s\n", __func__,
 170                                       journal->j_devname);
 171                                WARN_ON(1);
 172                                jbd2_journal_abort(journal, 0);
 173                        }
 174                        write_lock(&journal->j_state_lock);
 175                } else {
 176                        spin_unlock(&journal->j_list_lock);
 177                }
 178                mutex_unlock(&journal->j_checkpoint_mutex);
 179        }
 180}
 181
 182/*
 183 * We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
 184 * The caller must restart a list walk.  Wait for someone else to run
 185 * jbd_unlock_bh_state().
 186 */
 187static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
 188        __releases(journal->j_list_lock)
 189{
 190        get_bh(bh);
 191        spin_unlock(&journal->j_list_lock);
 192        jbd_lock_bh_state(bh);
 193        jbd_unlock_bh_state(bh);
 194        put_bh(bh);
 195}
 196
 197/*
 198 * Clean up transaction's list of buffers submitted for io.
 199 * We wait for any pending IO to complete and remove any clean
 200 * buffers. Note that we take the buffers in the opposite ordering
 201 * from the one in which they were submitted for IO.
 202 *
 203 * Return 0 on success, and return <0 if some buffers have failed
 204 * to be written out.
 205 *
 206 * Called with j_list_lock held.
 207 */
 208static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
 209{
 210        struct journal_head *jh;
 211        struct buffer_head *bh;
 212        tid_t this_tid;
 213        int released = 0;
 214        int ret = 0;
 215
 216        this_tid = transaction->t_tid;
 217restart:
 218        /* Did somebody clean up the transaction in the meanwhile? */
 219        if (journal->j_checkpoint_transactions != transaction ||
 220                        transaction->t_tid != this_tid)
 221                return ret;
 222        while (!released && transaction->t_checkpoint_io_list) {
 223                jh = transaction->t_checkpoint_io_list;
 224                bh = jh2bh(jh);
 225                if (!jbd_trylock_bh_state(bh)) {
 226                        jbd_sync_bh(journal, bh);
 227                        spin_lock(&journal->j_list_lock);
 228                        goto restart;
 229                }
 230                get_bh(bh);
 231                if (buffer_locked(bh)) {
 232                        spin_unlock(&journal->j_list_lock);
 233                        jbd_unlock_bh_state(bh);
 234                        wait_on_buffer(bh);
 235                        /* the journal_head may have gone by now */
 236                        BUFFER_TRACE(bh, "brelse");
 237                        __brelse(bh);
 238                        spin_lock(&journal->j_list_lock);
 239                        goto restart;
 240                }
 241                if (unlikely(buffer_write_io_error(bh)))
 242                        ret = -EIO;
 243
 244                /*
 245                 * Now in whatever state the buffer currently is, we know that
 246                 * it has been written out and so we can drop it from the list
 247                 */
 248                released = __jbd2_journal_remove_checkpoint(jh);
 249                jbd_unlock_bh_state(bh);
 250                __brelse(bh);
 251        }
 252
 253        return ret;
 254}
 255
 256static void
 257__flush_batch(journal_t *journal, int *batch_count)
 258{
 259        int i;
 260        struct blk_plug plug;
 261
 262        blk_start_plug(&plug);
 263        for (i = 0; i < *batch_count; i++)
 264                write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE_SYNC);
 265        blk_finish_plug(&plug);
 266
 267        for (i = 0; i < *batch_count; i++) {
 268                struct buffer_head *bh = journal->j_chkpt_bhs[i];
 269                clear_buffer_jwrite(bh);
 270                BUFFER_TRACE(bh, "brelse");
 271                __brelse(bh);
 272        }
 273        *batch_count = 0;
 274}
 275
 276/*
 277 * Try to flush one buffer from the checkpoint list to disk.
 278 *
 279 * Return 1 if something happened which requires us to abort the current
 280 * scan of the checkpoint list.  Return <0 if the buffer has failed to
 281 * be written out.
 282 *
 283 * Called with j_list_lock held and drops it if 1 is returned
 284 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
 285 */
 286static int __process_buffer(journal_t *journal, struct journal_head *jh,
 287                            int *batch_count, transaction_t *transaction)
 288{
 289        struct buffer_head *bh = jh2bh(jh);
 290        int ret = 0;
 291
 292        if (buffer_locked(bh)) {
 293                get_bh(bh);
 294                spin_unlock(&journal->j_list_lock);
 295                jbd_unlock_bh_state(bh);
 296                wait_on_buffer(bh);
 297                /* the journal_head may have gone by now */
 298                BUFFER_TRACE(bh, "brelse");
 299                __brelse(bh);
 300                ret = 1;
 301        } else if (jh->b_transaction != NULL) {
 302                transaction_t *t = jh->b_transaction;
 303                tid_t tid = t->t_tid;
 304
 305                transaction->t_chp_stats.cs_forced_to_close++;
 306                spin_unlock(&journal->j_list_lock);
 307                jbd_unlock_bh_state(bh);
 308                if (unlikely(journal->j_flags & JBD2_UNMOUNT))
 309                        /*
 310                         * The journal thread is dead; so starting and
 311                         * waiting for a commit to finish will cause
 312                         * us to wait for a _very_ long time.
 313                         */
 314                        printk(KERN_ERR "JBD2: %s: "
 315                               "Waiting for Godot: block %llu\n",
 316                               journal->j_devname,
 317                               (unsigned long long) bh->b_blocknr);
 318                jbd2_log_start_commit(journal, tid);
 319                jbd2_log_wait_commit(journal, tid);
 320                ret = 1;
 321        } else if (!buffer_dirty(bh)) {
 322                ret = 1;
 323                if (unlikely(buffer_write_io_error(bh)))
 324                        ret = -EIO;
 325                get_bh(bh);
 326                J_ASSERT_JH(jh, !buffer_jbddirty(bh));
 327                BUFFER_TRACE(bh, "remove from checkpoint");
 328                __jbd2_journal_remove_checkpoint(jh);
 329                spin_unlock(&journal->j_list_lock);
 330                jbd_unlock_bh_state(bh);
 331                __brelse(bh);
 332        } else {
 333                /*
 334                 * Important: we are about to write the buffer, and
 335                 * possibly block, while still holding the journal lock.
 336                 * We cannot afford to let the transaction logic start
 337                 * messing around with this buffer before we write it to
 338                 * disk, as that would break recoverability.
 339                 */
 340                BUFFER_TRACE(bh, "queue");
 341                get_bh(bh);
 342                J_ASSERT_BH(bh, !buffer_jwrite(bh));
 343                set_buffer_jwrite(bh);
 344                journal->j_chkpt_bhs[*batch_count] = bh;
 345                __buffer_relink_io(jh);
 346                jbd_unlock_bh_state(bh);
 347                transaction->t_chp_stats.cs_written++;
 348                (*batch_count)++;
 349                if (*batch_count == JBD2_NR_BATCH) {
 350                        spin_unlock(&journal->j_list_lock);
 351                        __flush_batch(journal, batch_count);
 352                        ret = 1;
 353                }
 354        }
 355        return ret;
 356}
 357
 358/*
 359 * Perform an actual checkpoint. We take the first transaction on the
 360 * list of transactions to be checkpointed and send all its buffers
 361 * to disk. We submit larger chunks of data at once.
 362 *
 363 * The journal should be locked before calling this function.
 364 * Called with j_checkpoint_mutex held.
 365 */
 366int jbd2_log_do_checkpoint(journal_t *journal)
 367{
 368        transaction_t *transaction;
 369        tid_t this_tid;
 370        int result;
 371
 372        jbd_debug(1, "Start checkpoint\n");
 373
 374        /*
 375         * First thing: if there are any transactions in the log which
 376         * don't need checkpointing, just eliminate them from the
 377         * journal straight away.
 378         */
 379        result = jbd2_cleanup_journal_tail(journal);
 380        trace_jbd2_checkpoint(journal, result);
 381        jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
 382        if (result <= 0)
 383                return result;
 384
 385        /*
 386         * OK, we need to start writing disk blocks.  Take one transaction
 387         * and write it.
 388         */
 389        result = 0;
 390        spin_lock(&journal->j_list_lock);
 391        if (!journal->j_checkpoint_transactions)
 392                goto out;
 393        transaction = journal->j_checkpoint_transactions;
 394        if (transaction->t_chp_stats.cs_chp_time == 0)
 395                transaction->t_chp_stats.cs_chp_time = jiffies;
 396        this_tid = transaction->t_tid;
 397restart:
 398        /*
 399         * If someone cleaned up this transaction while we slept, we're
 400         * done (maybe it's a new transaction, but it fell at the same
 401         * address).
 402         */
 403        if (journal->j_checkpoint_transactions == transaction &&
 404                        transaction->t_tid == this_tid) {
 405                int batch_count = 0;
 406                struct journal_head *jh;
 407                int retry = 0, err;
 408
 409                while (!retry && transaction->t_checkpoint_list) {
 410                        struct buffer_head *bh;
 411
 412                        jh = transaction->t_checkpoint_list;
 413                        bh = jh2bh(jh);
 414                        if (!jbd_trylock_bh_state(bh)) {
 415                                jbd_sync_bh(journal, bh);
 416                                retry = 1;
 417                                break;
 418                        }
 419                        retry = __process_buffer(journal, jh, &batch_count,
 420                                                 transaction);
 421                        if (retry < 0 && !result)
 422                                result = retry;
 423                        if (!retry && (need_resched() ||
 424                                spin_needbreak(&journal->j_list_lock))) {
 425                                spin_unlock(&journal->j_list_lock);
 426                                retry = 1;
 427                                break;
 428                        }
 429                }
 430
 431                if (batch_count) {
 432                        if (!retry) {
 433                                spin_unlock(&journal->j_list_lock);
 434                                retry = 1;
 435                        }
 436                        __flush_batch(journal, &batch_count);
 437                }
 438
 439                if (retry) {
 440                        spin_lock(&journal->j_list_lock);
 441                        goto restart;
 442                }
 443                /*
 444                 * Now we have cleaned up the first transaction's checkpoint
 445                 * list. Let's clean up the second one
 446                 */
 447                err = __wait_cp_io(journal, transaction);
 448                if (!result)
 449                        result = err;
 450        }
 451out:
 452        spin_unlock(&journal->j_list_lock);
 453        if (result < 0)
 454                jbd2_journal_abort(journal, result);
 455        else
 456                result = jbd2_cleanup_journal_tail(journal);
 457
 458        return (result < 0) ? result : 0;
 459}
 460
 461/*
 462 * Check the list of checkpoint transactions for the journal to see if
 463 * we have already got rid of any since the last update of the log tail
 464 * in the journal superblock.  If so, we can instantly roll the
 465 * superblock forward to remove those transactions from the log.
 466 *
 467 * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
 468 *
 469 * Called with the journal lock held.
 470 *
 471 * This is the only part of the journaling code which really needs to be
 472 * aware of transaction aborts.  Checkpointing involves writing to the
 473 * main filesystem area rather than to the journal, so it can proceed
 474 * even in abort state, but we must not update the super block if
 475 * checkpointing may have failed.  Otherwise, we would lose some metadata
 476 * buffers which should be written-back to the filesystem.
 477 */
 478
 479int jbd2_cleanup_journal_tail(journal_t *journal)
 480{
 481        transaction_t * transaction;
 482        tid_t           first_tid;
 483        unsigned long   blocknr, freed;
 484
 485        if (is_journal_aborted(journal))
 486                return 1;
 487
 488        /* OK, work out the oldest transaction remaining in the log, and
 489         * the log block it starts at.
 490         *
 491         * If the log is now empty, we need to work out which is the
 492         * next transaction ID we will write, and where it will
 493         * start. */
 494
 495        write_lock(&journal->j_state_lock);
 496        spin_lock(&journal->j_list_lock);
 497        transaction = journal->j_checkpoint_transactions;
 498        if (transaction) {
 499                first_tid = transaction->t_tid;
 500                blocknr = transaction->t_log_start;
 501        } else if ((transaction = journal->j_committing_transaction) != NULL) {
 502                first_tid = transaction->t_tid;
 503                blocknr = transaction->t_log_start;
 504        } else if ((transaction = journal->j_running_transaction) != NULL) {
 505                first_tid = transaction->t_tid;
 506                blocknr = journal->j_head;
 507        } else {
 508                first_tid = journal->j_transaction_sequence;
 509                blocknr = journal->j_head;
 510        }
 511        spin_unlock(&journal->j_list_lock);
 512        J_ASSERT(blocknr != 0);
 513
 514        /* If the oldest pinned transaction is at the tail of the log
 515           already then there's not much we can do right now. */
 516        if (journal->j_tail_sequence == first_tid) {
 517                write_unlock(&journal->j_state_lock);
 518                return 1;
 519        }
 520
 521        /* OK, update the superblock to recover the freed space.
 522         * Physical blocks come first: have we wrapped beyond the end of
 523         * the log?  */
 524        freed = blocknr - journal->j_tail;
 525        if (blocknr < journal->j_tail)
 526                freed = freed + journal->j_last - journal->j_first;
 527
 528        trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
 529        jbd_debug(1,
 530                  "Cleaning journal tail from %d to %d (offset %lu), "
 531                  "freeing %lu\n",
 532                  journal->j_tail_sequence, first_tid, blocknr, freed);
 533
 534        journal->j_free += freed;
 535        journal->j_tail_sequence = first_tid;
 536        journal->j_tail = blocknr;
 537        write_unlock(&journal->j_state_lock);
 538
 539        /*
 540         * If there is an external journal, we need to make sure that
 541         * any data blocks that were recently written out --- perhaps
 542         * by jbd2_log_do_checkpoint() --- are flushed out before we
 543         * drop the transactions from the external journal.  It's
 544         * unlikely this will be necessary, especially with a
 545         * appropriately sized journal, but we need this to guarantee
 546         * correctness.  Fortunately jbd2_cleanup_journal_tail()
 547         * doesn't get called all that often.
 548         */
 549        if ((journal->j_fs_dev != journal->j_dev) &&
 550            (journal->j_flags & JBD2_BARRIER))
 551                blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
 552        if (!(journal->j_flags & JBD2_ABORT))
 553                jbd2_journal_update_superblock(journal, 1);
 554        return 0;
 555}
 556
 557
 558/* Checkpoint list management */
 559
 560/*
 561 * journal_clean_one_cp_list
 562 *
 563 * Find all the written-back checkpoint buffers in the given list and
 564 * release them.
 565 *
 566 * Called with the journal locked.
 567 * Called with j_list_lock held.
 568 * Returns number of buffers reaped (for debug)
 569 */
 570
 571static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
 572{
 573        struct journal_head *last_jh;
 574        struct journal_head *next_jh = jh;
 575        int ret, freed = 0;
 576
 577        *released = 0;
 578        if (!jh)
 579                return 0;
 580
 581        last_jh = jh->b_cpprev;
 582        do {
 583                jh = next_jh;
 584                next_jh = jh->b_cpnext;
 585                /* Use trylock because of the ranking */
 586                if (jbd_trylock_bh_state(jh2bh(jh))) {
 587                        ret = __try_to_free_cp_buf(jh);
 588                        if (ret) {
 589                                freed++;
 590                                if (ret == 2) {
 591                                        *released = 1;
 592                                        return freed;
 593                                }
 594                        }
 595                }
 596                /*
 597                 * This function only frees up some memory
 598                 * if possible so we dont have an obligation
 599                 * to finish processing. Bail out if preemption
 600                 * requested:
 601                 */
 602                if (need_resched())
 603                        return freed;
 604        } while (jh != last_jh);
 605
 606        return freed;
 607}
 608
 609/*
 610 * journal_clean_checkpoint_list
 611 *
 612 * Find all the written-back checkpoint buffers in the journal and release them.
 613 *
 614 * Called with the journal locked.
 615 * Called with j_list_lock held.
 616 * Returns number of buffers reaped (for debug)
 617 */
 618
 619int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
 620{
 621        transaction_t *transaction, *last_transaction, *next_transaction;
 622        int ret = 0;
 623        int released;
 624
 625        transaction = journal->j_checkpoint_transactions;
 626        if (!transaction)
 627                goto out;
 628
 629        last_transaction = transaction->t_cpprev;
 630        next_transaction = transaction;
 631        do {
 632                transaction = next_transaction;
 633                next_transaction = transaction->t_cpnext;
 634                ret += journal_clean_one_cp_list(transaction->
 635                                t_checkpoint_list, &released);
 636                /*
 637                 * This function only frees up some memory if possible so we
 638                 * dont have an obligation to finish processing. Bail out if
 639                 * preemption requested:
 640                 */
 641                if (need_resched())
 642                        goto out;
 643                if (released)
 644                        continue;
 645                /*
 646                 * It is essential that we are as careful as in the case of
 647                 * t_checkpoint_list with removing the buffer from the list as
 648                 * we can possibly see not yet submitted buffers on io_list
 649                 */
 650                ret += journal_clean_one_cp_list(transaction->
 651                                t_checkpoint_io_list, &released);
 652                if (need_resched())
 653                        goto out;
 654        } while (transaction != last_transaction);
 655out:
 656        return ret;
 657}
 658
 659/*
 660 * journal_remove_checkpoint: called after a buffer has been committed
 661 * to disk (either by being write-back flushed to disk, or being
 662 * committed to the log).
 663 *
 664 * We cannot safely clean a transaction out of the log until all of the
 665 * buffer updates committed in that transaction have safely been stored
 666 * elsewhere on disk.  To achieve this, all of the buffers in a
 667 * transaction need to be maintained on the transaction's checkpoint
 668 * lists until they have been rewritten, at which point this function is
 669 * called to remove the buffer from the existing transaction's
 670 * checkpoint lists.
 671 *
 672 * The function returns 1 if it frees the transaction, 0 otherwise.
 673 * The function can free jh and bh.
 674 *
 675 * This function is called with j_list_lock held.
 676 * This function is called with jbd_lock_bh_state(jh2bh(jh))
 677 */
 678
 679int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 680{
 681        struct transaction_chp_stats_s *stats;
 682        transaction_t *transaction;
 683        journal_t *journal;
 684        int ret = 0;
 685
 686        JBUFFER_TRACE(jh, "entry");
 687
 688        if ((transaction = jh->b_cp_transaction) == NULL) {
 689                JBUFFER_TRACE(jh, "not on transaction");
 690                goto out;
 691        }
 692        journal = transaction->t_journal;
 693
 694        JBUFFER_TRACE(jh, "removing from transaction");
 695        __buffer_unlink(jh);
 696        jh->b_cp_transaction = NULL;
 697        jbd2_journal_put_journal_head(jh);
 698
 699        if (transaction->t_checkpoint_list != NULL ||
 700            transaction->t_checkpoint_io_list != NULL)
 701                goto out;
 702
 703        /*
 704         * There is one special case to worry about: if we have just pulled the
 705         * buffer off a running or committing transaction's checkpoing list,
 706         * then even if the checkpoint list is empty, the transaction obviously
 707         * cannot be dropped!
 708         *
 709         * The locking here around t_state is a bit sleazy.
 710         * See the comment at the end of jbd2_journal_commit_transaction().
 711         */
 712        if (transaction->t_state != T_FINISHED)
 713                goto out;
 714
 715        /* OK, that was the last buffer for the transaction: we can now
 716           safely remove this transaction from the log */
 717        stats = &transaction->t_chp_stats;
 718        if (stats->cs_chp_time)
 719                stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time,
 720                                                    jiffies);
 721        trace_jbd2_checkpoint_stats(journal->j_fs_dev->bd_dev,
 722                                    transaction->t_tid, stats);
 723
 724        __jbd2_journal_drop_transaction(journal, transaction);
 725        kfree(transaction);
 726
 727        /* Just in case anybody was waiting for more transactions to be
 728           checkpointed... */
 729        wake_up(&journal->j_wait_logspace);
 730        ret = 1;
 731out:
 732        return ret;
 733}
 734
 735/*
 736 * journal_insert_checkpoint: put a committed buffer onto a checkpoint
 737 * list so that we know when it is safe to clean the transaction out of
 738 * the log.
 739 *
 740 * Called with the journal locked.
 741 * Called with j_list_lock held.
 742 */
 743void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
 744                               transaction_t *transaction)
 745{
 746        JBUFFER_TRACE(jh, "entry");
 747        J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
 748        J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
 749
 750        /* Get reference for checkpointing transaction */
 751        jbd2_journal_grab_journal_head(jh2bh(jh));
 752        jh->b_cp_transaction = transaction;
 753
 754        if (!transaction->t_checkpoint_list) {
 755                jh->b_cpnext = jh->b_cpprev = jh;
 756        } else {
 757                jh->b_cpnext = transaction->t_checkpoint_list;
 758                jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
 759                jh->b_cpprev->b_cpnext = jh;
 760                jh->b_cpnext->b_cpprev = jh;
 761        }
 762        transaction->t_checkpoint_list = jh;
 763}
 764
 765/*
 766 * We've finished with this transaction structure: adios...
 767 *
 768 * The transaction must have no links except for the checkpoint by this
 769 * point.
 770 *
 771 * Called with the journal locked.
 772 * Called with j_list_lock held.
 773 */
 774
 775void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 776{
 777        assert_spin_locked(&journal->j_list_lock);
 778        if (transaction->t_cpnext) {
 779                transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
 780                transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
 781                if (journal->j_checkpoint_transactions == transaction)
 782                        journal->j_checkpoint_transactions =
 783                                transaction->t_cpnext;
 784                if (journal->j_checkpoint_transactions == transaction)
 785                        journal->j_checkpoint_transactions = NULL;
 786        }
 787
 788        J_ASSERT(transaction->t_state == T_FINISHED);
 789        J_ASSERT(transaction->t_buffers == NULL);
 790        J_ASSERT(transaction->t_forget == NULL);
 791        J_ASSERT(transaction->t_iobuf_list == NULL);
 792        J_ASSERT(transaction->t_shadow_list == NULL);
 793        J_ASSERT(transaction->t_log_list == NULL);
 794        J_ASSERT(transaction->t_checkpoint_list == NULL);
 795        J_ASSERT(transaction->t_checkpoint_io_list == NULL);
 796        J_ASSERT(atomic_read(&transaction->t_updates) == 0);
 797        J_ASSERT(journal->j_committing_transaction != transaction);
 798        J_ASSERT(journal->j_running_transaction != transaction);
 799
 800        jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
 801}
 802