linux/fs/btrfs/transaction.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2007 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/fs.h>
  20#include <linux/slab.h>
  21#include <linux/sched.h>
  22#include <linux/writeback.h>
  23#include <linux/pagemap.h>
  24#include <linux/blkdev.h>
  25#include <linux/uuid.h>
  26#include "ctree.h"
  27#include "disk-io.h"
  28#include "transaction.h"
  29#include "locking.h"
  30#include "tree-log.h"
  31#include "inode-map.h"
  32#include "volumes.h"
  33
  34#define BTRFS_ROOT_TRANS_TAG 0
  35
  36void put_transaction(struct btrfs_transaction *transaction)
  37{
  38        WARN_ON(atomic_read(&transaction->use_count) == 0);
  39        if (atomic_dec_and_test(&transaction->use_count)) {
  40                BUG_ON(!list_empty(&transaction->list));
  41                WARN_ON(transaction->delayed_refs.root.rb_node);
  42                memset(transaction, 0, sizeof(*transaction));
  43                kmem_cache_free(btrfs_transaction_cachep, transaction);
  44        }
  45}
  46
  47static noinline void switch_commit_root(struct btrfs_root *root)
  48{
  49        free_extent_buffer(root->commit_root);
  50        root->commit_root = btrfs_root_node(root);
  51}
  52
  53/*
  54 * either allocate a new transaction or hop into the existing one
  55 */
  56static noinline int join_transaction(struct btrfs_root *root, int nofail)
  57{
  58        struct btrfs_transaction *cur_trans;
  59        struct btrfs_fs_info *fs_info = root->fs_info;
  60
  61        spin_lock(&fs_info->trans_lock);
  62loop:
  63        /* The file system has been taken offline. No new transactions. */
  64        if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
  65                spin_unlock(&fs_info->trans_lock);
  66                return -EROFS;
  67        }
  68
  69        if (fs_info->trans_no_join) {
  70                if (!nofail) {
  71                        spin_unlock(&fs_info->trans_lock);
  72                        return -EBUSY;
  73                }
  74        }
  75
  76        cur_trans = fs_info->running_transaction;
  77        if (cur_trans) {
  78                if (cur_trans->aborted) {
  79                        spin_unlock(&fs_info->trans_lock);
  80                        return cur_trans->aborted;
  81                }
  82                atomic_inc(&cur_trans->use_count);
  83                atomic_inc(&cur_trans->num_writers);
  84                cur_trans->num_joined++;
  85                spin_unlock(&fs_info->trans_lock);
  86                return 0;
  87        }
  88        spin_unlock(&fs_info->trans_lock);
  89
  90        cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
  91        if (!cur_trans)
  92                return -ENOMEM;
  93
  94        spin_lock(&fs_info->trans_lock);
  95        if (fs_info->running_transaction) {
  96                /*
  97                 * someone started a transaction after we unlocked.  Make sure
  98                 * to redo the trans_no_join checks above
  99                 */
 100                kmem_cache_free(btrfs_transaction_cachep, cur_trans);
 101                cur_trans = fs_info->running_transaction;
 102                goto loop;
 103        } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
 104                spin_unlock(&fs_info->trans_lock);
 105                kmem_cache_free(btrfs_transaction_cachep, cur_trans);
 106                return -EROFS;
 107        }
 108
 109        atomic_set(&cur_trans->num_writers, 1);
 110        cur_trans->num_joined = 0;
 111        init_waitqueue_head(&cur_trans->writer_wait);
 112        init_waitqueue_head(&cur_trans->commit_wait);
 113        cur_trans->in_commit = 0;
 114        cur_trans->blocked = 0;
 115        /*
 116         * One for this trans handle, one so it will live on until we
 117         * commit the transaction.
 118         */
 119        atomic_set(&cur_trans->use_count, 2);
 120        cur_trans->commit_done = 0;
 121        cur_trans->start_time = get_seconds();
 122
 123        cur_trans->delayed_refs.root = RB_ROOT;
 124        cur_trans->delayed_refs.num_entries = 0;
 125        cur_trans->delayed_refs.num_heads_ready = 0;
 126        cur_trans->delayed_refs.num_heads = 0;
 127        cur_trans->delayed_refs.flushing = 0;
 128        cur_trans->delayed_refs.run_delayed_start = 0;
 129
 130        /*
 131         * although the tree mod log is per file system and not per transaction,
 132         * the log must never go across transaction boundaries.
 133         */
 134        smp_mb();
 135        if (!list_empty(&fs_info->tree_mod_seq_list)) {
 136                printk(KERN_ERR "btrfs: tree_mod_seq_list not empty when "
 137                        "creating a fresh transaction\n");
 138                WARN_ON(1);
 139        }
 140        if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) {
 141                printk(KERN_ERR "btrfs: tree_mod_log rb tree not empty when "
 142                        "creating a fresh transaction\n");
 143                WARN_ON(1);
 144        }
 145        atomic_set(&fs_info->tree_mod_seq, 0);
 146
 147        spin_lock_init(&cur_trans->commit_lock);
 148        spin_lock_init(&cur_trans->delayed_refs.lock);
 149
 150        INIT_LIST_HEAD(&cur_trans->pending_snapshots);
 151        list_add_tail(&cur_trans->list, &fs_info->trans_list);
 152        extent_io_tree_init(&cur_trans->dirty_pages,
 153                             fs_info->btree_inode->i_mapping);
 154        fs_info->generation++;
 155        cur_trans->transid = fs_info->generation;
 156        fs_info->running_transaction = cur_trans;
 157        cur_trans->aborted = 0;
 158        spin_unlock(&fs_info->trans_lock);
 159
 160        return 0;
 161}
 162
 163/*
 164 * this does all the record keeping required to make sure that a reference
 165 * counted root is properly recorded in a given transaction.  This is required
 166 * to make sure the old root from before we joined the transaction is deleted
 167 * when the transaction commits
 168 */
 169static int record_root_in_trans(struct btrfs_trans_handle *trans,
 170                               struct btrfs_root *root)
 171{
 172        if (root->ref_cows && root->last_trans < trans->transid) {
 173                WARN_ON(root == root->fs_info->extent_root);
 174                WARN_ON(root->commit_root != root->node);
 175
 176                /*
 177                 * see below for in_trans_setup usage rules
 178                 * we have the reloc mutex held now, so there
 179                 * is only one writer in this function
 180                 */
 181                root->in_trans_setup = 1;
 182
 183                /* make sure readers find in_trans_setup before
 184                 * they find our root->last_trans update
 185                 */
 186                smp_wmb();
 187
 188                spin_lock(&root->fs_info->fs_roots_radix_lock);
 189                if (root->last_trans == trans->transid) {
 190                        spin_unlock(&root->fs_info->fs_roots_radix_lock);
 191                        return 0;
 192                }
 193                radix_tree_tag_set(&root->fs_info->fs_roots_radix,
 194                           (unsigned long)root->root_key.objectid,
 195                           BTRFS_ROOT_TRANS_TAG);
 196                spin_unlock(&root->fs_info->fs_roots_radix_lock);
 197                root->last_trans = trans->transid;
 198
 199                /* this is pretty tricky.  We don't want to
 200                 * take the relocation lock in btrfs_record_root_in_trans
 201                 * unless we're really doing the first setup for this root in
 202                 * this transaction.
 203                 *
 204                 * Normally we'd use root->last_trans as a flag to decide
 205                 * if we want to take the expensive mutex.
 206                 *
 207                 * But, we have to set root->last_trans before we
 208                 * init the relocation root, otherwise, we trip over warnings
 209                 * in ctree.c.  The solution used here is to flag ourselves
 210                 * with root->in_trans_setup.  When this is 1, we're still
 211                 * fixing up the reloc trees and everyone must wait.
 212                 *
 213                 * When this is zero, they can trust root->last_trans and fly
 214                 * through btrfs_record_root_in_trans without having to take the
 215                 * lock.  smp_wmb() makes sure that all the writes above are
 216                 * done before we pop in the zero below
 217                 */
 218                btrfs_init_reloc_root(trans, root);
 219                smp_wmb();
 220                root->in_trans_setup = 0;
 221        }
 222        return 0;
 223}
 224
 225
 226int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
 227                               struct btrfs_root *root)
 228{
 229        if (!root->ref_cows)
 230                return 0;
 231
 232        /*
 233         * see record_root_in_trans for comments about in_trans_setup usage
 234         * and barriers
 235         */
 236        smp_rmb();
 237        if (root->last_trans == trans->transid &&
 238            !root->in_trans_setup)
 239                return 0;
 240
 241        mutex_lock(&root->fs_info->reloc_mutex);
 242        record_root_in_trans(trans, root);
 243        mutex_unlock(&root->fs_info->reloc_mutex);
 244
 245        return 0;
 246}
 247
 248/* wait for commit against the current transaction to become unblocked
 249 * when this is done, it is safe to start a new transaction, but the current
 250 * transaction might not be fully on disk.
 251 */
 252static void wait_current_trans(struct btrfs_root *root)
 253{
 254        struct btrfs_transaction *cur_trans;
 255
 256        spin_lock(&root->fs_info->trans_lock);
 257        cur_trans = root->fs_info->running_transaction;
 258        if (cur_trans && cur_trans->blocked) {
 259                atomic_inc(&cur_trans->use_count);
 260                spin_unlock(&root->fs_info->trans_lock);
 261
 262                wait_event(root->fs_info->transaction_wait,
 263                           !cur_trans->blocked);
 264                put_transaction(cur_trans);
 265        } else {
 266                spin_unlock(&root->fs_info->trans_lock);
 267        }
 268}
 269
 270enum btrfs_trans_type {
 271        TRANS_START,
 272        TRANS_JOIN,
 273        TRANS_USERSPACE,
 274        TRANS_JOIN_NOLOCK,
 275};
 276
 277static int may_wait_transaction(struct btrfs_root *root, int type)
 278{
 279        if (root->fs_info->log_root_recovering)
 280                return 0;
 281
 282        if (type == TRANS_USERSPACE)
 283                return 1;
 284
 285        if (type == TRANS_START &&
 286            !atomic_read(&root->fs_info->open_ioctl_trans))
 287                return 1;
 288
 289        return 0;
 290}
 291
 292static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 293                                                    u64 num_items, int type)
 294{
 295        struct btrfs_trans_handle *h;
 296        struct btrfs_transaction *cur_trans;
 297        u64 num_bytes = 0;
 298        int ret;
 299        u64 qgroup_reserved = 0;
 300
 301        if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
 302                return ERR_PTR(-EROFS);
 303
 304        if (current->journal_info) {
 305                WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
 306                h = current->journal_info;
 307                h->use_count++;
 308                h->orig_rsv = h->block_rsv;
 309                h->block_rsv = NULL;
 310                goto got_it;
 311        }
 312
 313        /*
 314         * Do the reservation before we join the transaction so we can do all
 315         * the appropriate flushing if need be.
 316         */
 317        if (num_items > 0 && root != root->fs_info->chunk_root) {
 318                if (root->fs_info->quota_enabled &&
 319                    is_fstree(root->root_key.objectid)) {
 320                        qgroup_reserved = num_items * root->leafsize;
 321                        ret = btrfs_qgroup_reserve(root, qgroup_reserved);
 322                        if (ret)
 323                                return ERR_PTR(ret);
 324                }
 325
 326                num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
 327                ret = btrfs_block_rsv_add(root,
 328                                          &root->fs_info->trans_block_rsv,
 329                                          num_bytes);
 330                if (ret)
 331                        return ERR_PTR(ret);
 332        }
 333again:
 334        h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
 335        if (!h)
 336                return ERR_PTR(-ENOMEM);
 337
 338        sb_start_intwrite(root->fs_info->sb);
 339
 340        if (may_wait_transaction(root, type))
 341                wait_current_trans(root);
 342
 343        do {
 344                ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
 345                if (ret == -EBUSY)
 346                        wait_current_trans(root);
 347        } while (ret == -EBUSY);
 348
 349        if (ret < 0) {
 350                sb_end_intwrite(root->fs_info->sb);
 351                kmem_cache_free(btrfs_trans_handle_cachep, h);
 352                return ERR_PTR(ret);
 353        }
 354
 355        cur_trans = root->fs_info->running_transaction;
 356
 357        h->transid = cur_trans->transid;
 358        h->transaction = cur_trans;
 359        h->blocks_used = 0;
 360        h->bytes_reserved = 0;
 361        h->root = root;
 362        h->delayed_ref_updates = 0;
 363        h->use_count = 1;
 364        h->adding_csums = 0;
 365        h->block_rsv = NULL;
 366        h->orig_rsv = NULL;
 367        h->aborted = 0;
 368        h->qgroup_reserved = qgroup_reserved;
 369        h->delayed_ref_elem.seq = 0;
 370        INIT_LIST_HEAD(&h->qgroup_ref_list);
 371
 372        smp_mb();
 373        if (cur_trans->blocked && may_wait_transaction(root, type)) {
 374                btrfs_commit_transaction(h, root);
 375                goto again;
 376        }
 377
 378        if (num_bytes) {
 379                trace_btrfs_space_reservation(root->fs_info, "transaction",
 380                                              h->transid, num_bytes, 1);
 381                h->block_rsv = &root->fs_info->trans_block_rsv;
 382                h->bytes_reserved = num_bytes;
 383        }
 384
 385got_it:
 386        btrfs_record_root_in_trans(h, root);
 387
 388        if (!current->journal_info && type != TRANS_USERSPACE)
 389                current->journal_info = h;
 390        return h;
 391}
 392
 393struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 394                                                   int num_items)
 395{
 396        return start_transaction(root, num_items, TRANS_START);
 397}
 398struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
 399{
 400        return start_transaction(root, 0, TRANS_JOIN);
 401}
 402
 403struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
 404{
 405        return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
 406}
 407
 408struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
 409{
 410        return start_transaction(root, 0, TRANS_USERSPACE);
 411}
 412
 413/* wait for a transaction commit to be fully complete */
 414static noinline void wait_for_commit(struct btrfs_root *root,
 415                                    struct btrfs_transaction *commit)
 416{
 417        wait_event(commit->commit_wait, commit->commit_done);
 418}
 419
 420int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
 421{
 422        struct btrfs_transaction *cur_trans = NULL, *t;
 423        int ret;
 424
 425        ret = 0;
 426        if (transid) {
 427                if (transid <= root->fs_info->last_trans_committed)
 428                        goto out;
 429
 430                /* find specified transaction */
 431                spin_lock(&root->fs_info->trans_lock);
 432                list_for_each_entry(t, &root->fs_info->trans_list, list) {
 433                        if (t->transid == transid) {
 434                                cur_trans = t;
 435                                atomic_inc(&cur_trans->use_count);
 436                                break;
 437                        }
 438                        if (t->transid > transid)
 439                                break;
 440                }
 441                spin_unlock(&root->fs_info->trans_lock);
 442                ret = -EINVAL;
 443                if (!cur_trans)
 444                        goto out;  /* bad transid */
 445        } else {
 446                /* find newest transaction that is committing | committed */
 447                spin_lock(&root->fs_info->trans_lock);
 448                list_for_each_entry_reverse(t, &root->fs_info->trans_list,
 449                                            list) {
 450                        if (t->in_commit) {
 451                                if (t->commit_done)
 452                                        break;
 453                                cur_trans = t;
 454                                atomic_inc(&cur_trans->use_count);
 455                                break;
 456                        }
 457                }
 458                spin_unlock(&root->fs_info->trans_lock);
 459                if (!cur_trans)
 460                        goto out;  /* nothing committing|committed */
 461        }
 462
 463        wait_for_commit(root, cur_trans);
 464
 465        put_transaction(cur_trans);
 466        ret = 0;
 467out:
 468        return ret;
 469}
 470
 471void btrfs_throttle(struct btrfs_root *root)
 472{
 473        if (!atomic_read(&root->fs_info->open_ioctl_trans))
 474                wait_current_trans(root);
 475}
 476
 477static int should_end_transaction(struct btrfs_trans_handle *trans,
 478                                  struct btrfs_root *root)
 479{
 480        int ret;
 481
 482        ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
 483        return ret ? 1 : 0;
 484}
 485
 486int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
 487                                 struct btrfs_root *root)
 488{
 489        struct btrfs_transaction *cur_trans = trans->transaction;
 490        int updates;
 491        int err;
 492
 493        smp_mb();
 494        if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
 495                return 1;
 496
 497        updates = trans->delayed_ref_updates;
 498        trans->delayed_ref_updates = 0;
 499        if (updates) {
 500                err = btrfs_run_delayed_refs(trans, root, updates);
 501                if (err) /* Error code will also eval true */
 502                        return err;
 503        }
 504
 505        return should_end_transaction(trans, root);
 506}
 507
 508static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 509                          struct btrfs_root *root, int throttle, int lock)
 510{
 511        struct btrfs_transaction *cur_trans = trans->transaction;
 512        struct btrfs_fs_info *info = root->fs_info;
 513        int count = 0;
 514        int err = 0;
 515
 516        if (--trans->use_count) {
 517                trans->block_rsv = trans->orig_rsv;
 518                return 0;
 519        }
 520
 521        /*
 522         * do the qgroup accounting as early as possible
 523         */
 524        err = btrfs_delayed_refs_qgroup_accounting(trans, info);
 525
 526        btrfs_trans_release_metadata(trans, root);
 527        trans->block_rsv = NULL;
 528        /*
 529         * the same root has to be passed to start_transaction and
 530         * end_transaction. Subvolume quota depends on this.
 531         */
 532        WARN_ON(trans->root != root);
 533
 534        if (trans->qgroup_reserved) {
 535                btrfs_qgroup_free(root, trans->qgroup_reserved);
 536                trans->qgroup_reserved = 0;
 537        }
 538
 539        while (count < 2) {
 540                unsigned long cur = trans->delayed_ref_updates;
 541                trans->delayed_ref_updates = 0;
 542                if (cur &&
 543                    trans->transaction->delayed_refs.num_heads_ready > 64) {
 544                        trans->delayed_ref_updates = 0;
 545                        btrfs_run_delayed_refs(trans, root, cur);
 546                } else {
 547                        break;
 548                }
 549                count++;
 550        }
 551        btrfs_trans_release_metadata(trans, root);
 552        trans->block_rsv = NULL;
 553
 554        sb_end_intwrite(root->fs_info->sb);
 555
 556        if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
 557            should_end_transaction(trans, root)) {
 558                trans->transaction->blocked = 1;
 559                smp_wmb();
 560        }
 561
 562        if (lock && cur_trans->blocked && !cur_trans->in_commit) {
 563                if (throttle) {
 564                        /*
 565                         * We may race with somebody else here so end up having
 566                         * to call end_transaction on ourselves again, so inc
 567                         * our use_count.
 568                         */
 569                        trans->use_count++;
 570                        return btrfs_commit_transaction(trans, root);
 571                } else {
 572                        wake_up_process(info->transaction_kthread);
 573                }
 574        }
 575
 576        WARN_ON(cur_trans != info->running_transaction);
 577        WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
 578        atomic_dec(&cur_trans->num_writers);
 579
 580        smp_mb();
 581        if (waitqueue_active(&cur_trans->writer_wait))
 582                wake_up(&cur_trans->writer_wait);
 583        put_transaction(cur_trans);
 584
 585        if (current->journal_info == trans)
 586                current->journal_info = NULL;
 587
 588        if (throttle)
 589                btrfs_run_delayed_iputs(root);
 590
 591        if (trans->aborted ||
 592            root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
 593                err = -EIO;
 594        }
 595        assert_qgroups_uptodate(trans);
 596
 597        memset(trans, 0, sizeof(*trans));
 598        kmem_cache_free(btrfs_trans_handle_cachep, trans);
 599        return err;
 600}
 601
 602int btrfs_end_transaction(struct btrfs_trans_handle *trans,
 603                          struct btrfs_root *root)
 604{
 605        int ret;
 606
 607        ret = __btrfs_end_transaction(trans, root, 0, 1);
 608        if (ret)
 609                return ret;
 610        return 0;
 611}
 612
 613int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
 614                                   struct btrfs_root *root)
 615{
 616        int ret;
 617
 618        ret = __btrfs_end_transaction(trans, root, 1, 1);
 619        if (ret)
 620                return ret;
 621        return 0;
 622}
 623
 624int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
 625                                 struct btrfs_root *root)
 626{
 627        int ret;
 628
 629        ret = __btrfs_end_transaction(trans, root, 0, 0);
 630        if (ret)
 631                return ret;
 632        return 0;
 633}
 634
 635int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
 636                                struct btrfs_root *root)
 637{
 638        return __btrfs_end_transaction(trans, root, 1, 1);
 639}
 640
 641/*
 642 * when btree blocks are allocated, they have some corresponding bits set for
 643 * them in one of two extent_io trees.  This is used to make sure all of
 644 * those extents are sent to disk but does not wait on them
 645 */
 646int btrfs_write_marked_extents(struct btrfs_root *root,
 647                               struct extent_io_tree *dirty_pages, int mark)
 648{
 649        int err = 0;
 650        int werr = 0;
 651        struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
 652        u64 start = 0;
 653        u64 end;
 654
 655        while (!find_first_extent_bit(dirty_pages, start, &start, &end,
 656                                      mark)) {
 657                convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark,
 658                                   GFP_NOFS);
 659                err = filemap_fdatawrite_range(mapping, start, end);
 660                if (err)
 661                        werr = err;
 662                cond_resched();
 663                start = end + 1;
 664        }
 665        if (err)
 666                werr = err;
 667        return werr;
 668}
 669
 670/*
 671 * when btree blocks are allocated, they have some corresponding bits set for
 672 * them in one of two extent_io trees.  This is used to make sure all of
 673 * those extents are on disk for transaction or log commit.  We wait
 674 * on all the pages and clear them from the dirty pages state tree
 675 */
 676int btrfs_wait_marked_extents(struct btrfs_root *root,
 677                              struct extent_io_tree *dirty_pages, int mark)
 678{
 679        int err = 0;
 680        int werr = 0;
 681        struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
 682        u64 start = 0;
 683        u64 end;
 684
 685        while (!find_first_extent_bit(dirty_pages, start, &start, &end,
 686                                      EXTENT_NEED_WAIT)) {
 687                clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS);
 688                err = filemap_fdatawait_range(mapping, start, end);
 689                if (err)
 690                        werr = err;
 691                cond_resched();
 692                start = end + 1;
 693        }
 694        if (err)
 695                werr = err;
 696        return werr;
 697}
 698
 699/*
 700 * when btree blocks are allocated, they have some corresponding bits set for
 701 * them in one of two extent_io trees.  This is used to make sure all of
 702 * those extents are on disk for transaction or log commit
 703 */
 704int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
 705                                struct extent_io_tree *dirty_pages, int mark)
 706{
 707        int ret;
 708        int ret2;
 709
 710        ret = btrfs_write_marked_extents(root, dirty_pages, mark);
 711        ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
 712
 713        if (ret)
 714                return ret;
 715        if (ret2)
 716                return ret2;
 717        return 0;
 718}
 719
 720int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 721                                     struct btrfs_root *root)
 722{
 723        if (!trans || !trans->transaction) {
 724                struct inode *btree_inode;
 725                btree_inode = root->fs_info->btree_inode;
 726                return filemap_write_and_wait(btree_inode->i_mapping);
 727        }
 728        return btrfs_write_and_wait_marked_extents(root,
 729                                           &trans->transaction->dirty_pages,
 730                                           EXTENT_DIRTY);
 731}
 732
 733/*
 734 * this is used to update the root pointer in the tree of tree roots.
 735 *
 736 * But, in the case of the extent allocation tree, updating the root
 737 * pointer may allocate blocks which may change the root of the extent
 738 * allocation tree.
 739 *
 740 * So, this loops and repeats and makes sure the cowonly root didn't
 741 * change while the root pointer was being updated in the metadata.
 742 */
 743static int update_cowonly_root(struct btrfs_trans_handle *trans,
 744                               struct btrfs_root *root)
 745{
 746        int ret;
 747        u64 old_root_bytenr;
 748        u64 old_root_used;
 749        struct btrfs_root *tree_root = root->fs_info->tree_root;
 750
 751        old_root_used = btrfs_root_used(&root->root_item);
 752        btrfs_write_dirty_block_groups(trans, root);
 753
 754        while (1) {
 755                old_root_bytenr = btrfs_root_bytenr(&root->root_item);
 756                if (old_root_bytenr == root->node->start &&
 757                    old_root_used == btrfs_root_used(&root->root_item))
 758                        break;
 759
 760                btrfs_set_root_node(&root->root_item, root->node);
 761                ret = btrfs_update_root(trans, tree_root,
 762                                        &root->root_key,
 763                                        &root->root_item);
 764                if (ret)
 765                        return ret;
 766
 767                old_root_used = btrfs_root_used(&root->root_item);
 768                ret = btrfs_write_dirty_block_groups(trans, root);
 769                if (ret)
 770                        return ret;
 771        }
 772
 773        if (root != root->fs_info->extent_root)
 774                switch_commit_root(root);
 775
 776        return 0;
 777}
 778
 779/*
 780 * update all the cowonly tree roots on disk
 781 *
 782 * The error handling in this function may not be obvious. Any of the
 783 * failures will cause the file system to go offline. We still need
 784 * to clean up the delayed refs.
 785 */
 786static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 787                                         struct btrfs_root *root)
 788{
 789        struct btrfs_fs_info *fs_info = root->fs_info;
 790        struct list_head *next;
 791        struct extent_buffer *eb;
 792        int ret;
 793
 794        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 795        if (ret)
 796                return ret;
 797
 798        eb = btrfs_lock_root_node(fs_info->tree_root);
 799        ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
 800                              0, &eb);
 801        btrfs_tree_unlock(eb);
 802        free_extent_buffer(eb);
 803
 804        if (ret)
 805                return ret;
 806
 807        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 808        if (ret)
 809                return ret;
 810
 811        ret = btrfs_run_dev_stats(trans, root->fs_info);
 812        BUG_ON(ret);
 813
 814        ret = btrfs_run_qgroups(trans, root->fs_info);
 815        BUG_ON(ret);
 816
 817        /* run_qgroups might have added some more refs */
 818        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 819        BUG_ON(ret);
 820
 821        while (!list_empty(&fs_info->dirty_cowonly_roots)) {
 822                next = fs_info->dirty_cowonly_roots.next;
 823                list_del_init(next);
 824                root = list_entry(next, struct btrfs_root, dirty_list);
 825
 826                ret = update_cowonly_root(trans, root);
 827                if (ret)
 828                        return ret;
 829        }
 830
 831        down_write(&fs_info->extent_commit_sem);
 832        switch_commit_root(fs_info->extent_root);
 833        up_write(&fs_info->extent_commit_sem);
 834
 835        return 0;
 836}
 837
 838/*
 839 * dead roots are old snapshots that need to be deleted.  This allocates
 840 * a dirty root struct and adds it into the list of dead roots that need to
 841 * be deleted
 842 */
 843int btrfs_add_dead_root(struct btrfs_root *root)
 844{
 845        spin_lock(&root->fs_info->trans_lock);
 846        list_add(&root->root_list, &root->fs_info->dead_roots);
 847        spin_unlock(&root->fs_info->trans_lock);
 848        return 0;
 849}
 850
 851/*
 852 * update all the cowonly tree roots on disk
 853 */
 854static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
 855                                    struct btrfs_root *root)
 856{
 857        struct btrfs_root *gang[8];
 858        struct btrfs_fs_info *fs_info = root->fs_info;
 859        int i;
 860        int ret;
 861        int err = 0;
 862
 863        spin_lock(&fs_info->fs_roots_radix_lock);
 864        while (1) {
 865                ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
 866                                                 (void **)gang, 0,
 867                                                 ARRAY_SIZE(gang),
 868                                                 BTRFS_ROOT_TRANS_TAG);
 869                if (ret == 0)
 870                        break;
 871                for (i = 0; i < ret; i++) {
 872                        root = gang[i];
 873                        radix_tree_tag_clear(&fs_info->fs_roots_radix,
 874                                        (unsigned long)root->root_key.objectid,
 875                                        BTRFS_ROOT_TRANS_TAG);
 876                        spin_unlock(&fs_info->fs_roots_radix_lock);
 877
 878                        btrfs_free_log(trans, root);
 879                        btrfs_update_reloc_root(trans, root);
 880                        btrfs_orphan_commit_root(trans, root);
 881
 882                        btrfs_save_ino_cache(root, trans);
 883
 884                        /* see comments in should_cow_block() */
 885                        root->force_cow = 0;
 886                        smp_wmb();
 887
 888                        if (root->commit_root != root->node) {
 889                                mutex_lock(&root->fs_commit_mutex);
 890                                switch_commit_root(root);
 891                                btrfs_unpin_free_ino(root);
 892                                mutex_unlock(&root->fs_commit_mutex);
 893
 894                                btrfs_set_root_node(&root->root_item,
 895                                                    root->node);
 896                        }
 897
 898                        err = btrfs_update_root(trans, fs_info->tree_root,
 899                                                &root->root_key,
 900                                                &root->root_item);
 901                        spin_lock(&fs_info->fs_roots_radix_lock);
 902                        if (err)
 903                                break;
 904                }
 905        }
 906        spin_unlock(&fs_info->fs_roots_radix_lock);
 907        return err;
 908}
 909
 910/*
 911 * defrag a given btree.  If cacheonly == 1, this won't read from the disk,
 912 * otherwise every leaf in the btree is read and defragged.
 913 */
 914int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 915{
 916        struct btrfs_fs_info *info = root->fs_info;
 917        struct btrfs_trans_handle *trans;
 918        int ret;
 919        unsigned long nr;
 920
 921        if (xchg(&root->defrag_running, 1))
 922                return 0;
 923
 924        while (1) {
 925                trans = btrfs_start_transaction(root, 0);
 926                if (IS_ERR(trans))
 927                        return PTR_ERR(trans);
 928
 929                ret = btrfs_defrag_leaves(trans, root, cacheonly);
 930
 931                nr = trans->blocks_used;
 932                btrfs_end_transaction(trans, root);
 933                btrfs_btree_balance_dirty(info->tree_root, nr);
 934                cond_resched();
 935
 936                if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
 937                        break;
 938        }
 939        root->defrag_running = 0;
 940        return ret;
 941}
 942
 943/*
 944 * new snapshots need to be created at a very specific time in the
 945 * transaction commit.  This does the actual creation
 946 */
 947static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 948                                   struct btrfs_fs_info *fs_info,
 949                                   struct btrfs_pending_snapshot *pending)
 950{
 951        struct btrfs_key key;
 952        struct btrfs_root_item *new_root_item;
 953        struct btrfs_root *tree_root = fs_info->tree_root;
 954        struct btrfs_root *root = pending->root;
 955        struct btrfs_root *parent_root;
 956        struct btrfs_block_rsv *rsv;
 957        struct inode *parent_inode;
 958        struct dentry *parent;
 959        struct dentry *dentry;
 960        struct extent_buffer *tmp;
 961        struct extent_buffer *old;
 962        struct timespec cur_time = CURRENT_TIME;
 963        int ret;
 964        u64 to_reserve = 0;
 965        u64 index = 0;
 966        u64 objectid;
 967        u64 root_flags;
 968        uuid_le new_uuid;
 969
 970        rsv = trans->block_rsv;
 971
 972        new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
 973        if (!new_root_item) {
 974                ret = pending->error = -ENOMEM;
 975                goto fail;
 976        }
 977
 978        ret = btrfs_find_free_objectid(tree_root, &objectid);
 979        if (ret) {
 980                pending->error = ret;
 981                goto fail;
 982        }
 983
 984        btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
 985
 986        if (to_reserve > 0) {
 987                ret = btrfs_block_rsv_add_noflush(root, &pending->block_rsv,
 988                                                  to_reserve);
 989                if (ret) {
 990                        pending->error = ret;
 991                        goto fail;
 992                }
 993        }
 994
 995        ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid,
 996                                   objectid, pending->inherit);
 997        kfree(pending->inherit);
 998        if (ret) {
 999                pending->error = ret;
1000                goto fail;
1001        }
1002
1003        key.objectid = objectid;
1004        key.offset = (u64)-1;
1005        key.type = BTRFS_ROOT_ITEM_KEY;
1006
1007        trans->block_rsv = &pending->block_rsv;
1008
1009        dentry = pending->dentry;
1010        parent = dget_parent(dentry);
1011        parent_inode = parent->d_inode;
1012        parent_root = BTRFS_I(parent_inode)->root;
1013        record_root_in_trans(trans, parent_root);
1014
1015        /*
1016         * insert the directory item
1017         */
1018        ret = btrfs_set_inode_index(parent_inode, &index);
1019        BUG_ON(ret); /* -ENOMEM */
1020        ret = btrfs_insert_dir_item(trans, parent_root,
1021                                dentry->d_name.name, dentry->d_name.len,
1022                                parent_inode, &key,
1023                                BTRFS_FT_DIR, index);
1024        if (ret == -EEXIST) {
1025                pending->error = -EEXIST;
1026                dput(parent);
1027                goto fail;
1028        } else if (ret) {
1029                goto abort_trans_dput;
1030        }
1031
1032        btrfs_i_size_write(parent_inode, parent_inode->i_size +
1033                                         dentry->d_name.len * 2);
1034        parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1035        ret = btrfs_update_inode(trans, parent_root, parent_inode);
1036        if (ret)
1037                goto abort_trans_dput;
1038
1039        /*
1040         * pull in the delayed directory update
1041         * and the delayed inode item
1042         * otherwise we corrupt the FS during
1043         * snapshot
1044         */
1045        ret = btrfs_run_delayed_items(trans, root);
1046        if (ret) { /* Transaction aborted */
1047                dput(parent);
1048                goto fail;
1049        }
1050
1051        record_root_in_trans(trans, root);
1052        btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
1053        memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
1054        btrfs_check_and_init_root_item(new_root_item);
1055
1056        root_flags = btrfs_root_flags(new_root_item);
1057        if (pending->readonly)
1058                root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
1059        else
1060                root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
1061        btrfs_set_root_flags(new_root_item, root_flags);
1062
1063        btrfs_set_root_generation_v2(new_root_item,
1064                        trans->transid);
1065        uuid_le_gen(&new_uuid);
1066        memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
1067        memcpy(new_root_item->parent_uuid, root->root_item.uuid,
1068                        BTRFS_UUID_SIZE);
1069        new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
1070        new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
1071        btrfs_set_root_otransid(new_root_item, trans->transid);
1072        memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
1073        memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
1074        btrfs_set_root_stransid(new_root_item, 0);
1075        btrfs_set_root_rtransid(new_root_item, 0);
1076
1077        old = btrfs_lock_root_node(root);
1078        ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
1079        if (ret) {
1080                btrfs_tree_unlock(old);
1081                free_extent_buffer(old);
1082                goto abort_trans_dput;
1083        }
1084
1085        btrfs_set_lock_blocking(old);
1086
1087        ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
1088        /* clean up in any case */
1089        btrfs_tree_unlock(old);
1090        free_extent_buffer(old);
1091        if (ret)
1092                goto abort_trans_dput;
1093
1094        /* see comments in should_cow_block() */
1095        root->force_cow = 1;
1096        smp_wmb();
1097
1098        btrfs_set_root_node(new_root_item, tmp);
1099        /* record when the snapshot was created in key.offset */
1100        key.offset = trans->transid;
1101        ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
1102        btrfs_tree_unlock(tmp);
1103        free_extent_buffer(tmp);
1104        if (ret)
1105                goto abort_trans_dput;
1106
1107        /*
1108         * insert root back/forward references
1109         */
1110        ret = btrfs_add_root_ref(trans, tree_root, objectid,
1111                                 parent_root->root_key.objectid,
1112                                 btrfs_ino(parent_inode), index,
1113                                 dentry->d_name.name, dentry->d_name.len);
1114        dput(parent);
1115        if (ret)
1116                goto fail;
1117
1118        key.offset = (u64)-1;
1119        pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
1120        if (IS_ERR(pending->snap)) {
1121                ret = PTR_ERR(pending->snap);
1122                goto abort_trans;
1123        }
1124
1125        ret = btrfs_reloc_post_snapshot(trans, pending);
1126        if (ret)
1127                goto abort_trans;
1128        ret = 0;
1129fail:
1130        kfree(new_root_item);
1131        trans->block_rsv = rsv;
1132        btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1);
1133        return ret;
1134
1135abort_trans_dput:
1136        dput(parent);
1137abort_trans:
1138        btrfs_abort_transaction(trans, root, ret);
1139        goto fail;
1140}
1141
1142/*
1143 * create all the snapshots we've scheduled for creation
1144 */
1145static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
1146                                             struct btrfs_fs_info *fs_info)
1147{
1148        struct btrfs_pending_snapshot *pending;
1149        struct list_head *head = &trans->transaction->pending_snapshots;
1150
1151        list_for_each_entry(pending, head, list)
1152                create_pending_snapshot(trans, fs_info, pending);
1153        return 0;
1154}
1155
1156static void update_super_roots(struct btrfs_root *root)
1157{
1158        struct btrfs_root_item *root_item;
1159        struct btrfs_super_block *super;
1160
1161        super = root->fs_info->super_copy;
1162
1163        root_item = &root->fs_info->chunk_root->root_item;
1164        super->chunk_root = root_item->bytenr;
1165        super->chunk_root_generation = root_item->generation;
1166        super->chunk_root_level = root_item->level;
1167
1168        root_item = &root->fs_info->tree_root->root_item;
1169        super->root = root_item->bytenr;
1170        super->generation = root_item->generation;
1171        super->root_level = root_item->level;
1172        if (btrfs_test_opt(root, SPACE_CACHE))
1173                super->cache_generation = root_item->generation;
1174}
1175
1176int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
1177{
1178        int ret = 0;
1179        spin_lock(&info->trans_lock);
1180        if (info->running_transaction)
1181                ret = info->running_transaction->in_commit;
1182        spin_unlock(&info->trans_lock);
1183        return ret;
1184}
1185
1186int btrfs_transaction_blocked(struct btrfs_fs_info *info)
1187{
1188        int ret = 0;
1189        spin_lock(&info->trans_lock);
1190        if (info->running_transaction)
1191                ret = info->running_transaction->blocked;
1192        spin_unlock(&info->trans_lock);
1193        return ret;
1194}
1195
1196/*
1197 * wait for the current transaction commit to start and block subsequent
1198 * transaction joins
1199 */
1200static void wait_current_trans_commit_start(struct btrfs_root *root,
1201                                            struct btrfs_transaction *trans)
1202{
1203        wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit);
1204}
1205
1206/*
1207 * wait for the current transaction to start and then become unblocked.
1208 * caller holds ref.
1209 */
1210static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1211                                         struct btrfs_transaction *trans)
1212{
1213        wait_event(root->fs_info->transaction_wait,
1214                   trans->commit_done || (trans->in_commit && !trans->blocked));
1215}
1216
1217/*
1218 * commit transactions asynchronously. once btrfs_commit_transaction_async
1219 * returns, any subsequent transaction will not be allowed to join.
1220 */
1221struct btrfs_async_commit {
1222        struct btrfs_trans_handle *newtrans;
1223        struct btrfs_root *root;
1224        struct delayed_work work;
1225};
1226
1227static void do_async_commit(struct work_struct *work)
1228{
1229        struct btrfs_async_commit *ac =
1230                container_of(work, struct btrfs_async_commit, work.work);
1231
1232        btrfs_commit_transaction(ac->newtrans, ac->root);
1233        kfree(ac);
1234}
1235
1236int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1237                                   struct btrfs_root *root,
1238                                   int wait_for_unblock)
1239{
1240        struct btrfs_async_commit *ac;
1241        struct btrfs_transaction *cur_trans;
1242
1243        ac = kmalloc(sizeof(*ac), GFP_NOFS);
1244        if (!ac)
1245                return -ENOMEM;
1246
1247        INIT_DELAYED_WORK(&ac->work, do_async_commit);
1248        ac->root = root;
1249        ac->newtrans = btrfs_join_transaction(root);
1250        if (IS_ERR(ac->newtrans)) {
1251                int err = PTR_ERR(ac->newtrans);
1252                kfree(ac);
1253                return err;
1254        }
1255
1256        /* take transaction reference */
1257        cur_trans = trans->transaction;
1258        atomic_inc(&cur_trans->use_count);
1259
1260        btrfs_end_transaction(trans, root);
1261        schedule_delayed_work(&ac->work, 0);
1262
1263        /* wait for transaction to start and unblock */
1264        if (wait_for_unblock)
1265                wait_current_trans_commit_start_and_unblock(root, cur_trans);
1266        else
1267                wait_current_trans_commit_start(root, cur_trans);
1268
1269        if (current->journal_info == trans)
1270                current->journal_info = NULL;
1271
1272        put_transaction(cur_trans);
1273        return 0;
1274}
1275
1276
1277static void cleanup_transaction(struct btrfs_trans_handle *trans,
1278                                struct btrfs_root *root, int err)
1279{
1280        struct btrfs_transaction *cur_trans = trans->transaction;
1281
1282        WARN_ON(trans->use_count > 1);
1283
1284        btrfs_abort_transaction(trans, root, err);
1285
1286        spin_lock(&root->fs_info->trans_lock);
1287        list_del_init(&cur_trans->list);
1288        if (cur_trans == root->fs_info->running_transaction) {
1289                root->fs_info->running_transaction = NULL;
1290                root->fs_info->trans_no_join = 0;
1291        }
1292        spin_unlock(&root->fs_info->trans_lock);
1293
1294        btrfs_cleanup_one_transaction(trans->transaction, root);
1295
1296        put_transaction(cur_trans);
1297        put_transaction(cur_trans);
1298
1299        trace_btrfs_transaction_commit(root);
1300
1301        btrfs_scrub_continue(root);
1302
1303        if (current->journal_info == trans)
1304                current->journal_info = NULL;
1305
1306        kmem_cache_free(btrfs_trans_handle_cachep, trans);
1307}
1308
1309/*
1310 * btrfs_transaction state sequence:
1311 *    in_commit = 0, blocked = 0  (initial)
1312 *    in_commit = 1, blocked = 1
1313 *    blocked = 0
1314 *    commit_done = 1
1315 */
1316int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1317                             struct btrfs_root *root)
1318{
1319        unsigned long joined = 0;
1320        struct btrfs_transaction *cur_trans = trans->transaction;
1321        struct btrfs_transaction *prev_trans = NULL;
1322        DEFINE_WAIT(wait);
1323        int ret = -EIO;
1324        int should_grow = 0;
1325        unsigned long now = get_seconds();
1326        int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
1327
1328        btrfs_run_ordered_operations(root, 0);
1329
1330        if (cur_trans->aborted)
1331                goto cleanup_transaction;
1332
1333        /* make a pass through all the delayed refs we have so far
1334         * any runnings procs may add more while we are here
1335         */
1336        ret = btrfs_run_delayed_refs(trans, root, 0);
1337        if (ret)
1338                goto cleanup_transaction;
1339
1340        btrfs_trans_release_metadata(trans, root);
1341        trans->block_rsv = NULL;
1342
1343        cur_trans = trans->transaction;
1344
1345        /*
1346         * set the flushing flag so procs in this transaction have to
1347         * start sending their work down.
1348         */
1349        cur_trans->delayed_refs.flushing = 1;
1350
1351        ret = btrfs_run_delayed_refs(trans, root, 0);
1352        if (ret)
1353                goto cleanup_transaction;
1354
1355        spin_lock(&cur_trans->commit_lock);
1356        if (cur_trans->in_commit) {
1357                spin_unlock(&cur_trans->commit_lock);
1358                atomic_inc(&cur_trans->use_count);
1359                ret = btrfs_end_transaction(trans, root);
1360
1361                wait_for_commit(root, cur_trans);
1362
1363                put_transaction(cur_trans);
1364
1365                return ret;
1366        }
1367
1368        trans->transaction->in_commit = 1;
1369        trans->transaction->blocked = 1;
1370        spin_unlock(&cur_trans->commit_lock);
1371        wake_up(&root->fs_info->transaction_blocked_wait);
1372
1373        spin_lock(&root->fs_info->trans_lock);
1374        if (cur_trans->list.prev != &root->fs_info->trans_list) {
1375                prev_trans = list_entry(cur_trans->list.prev,
1376                                        struct btrfs_transaction, list);
1377                if (!prev_trans->commit_done) {
1378                        atomic_inc(&prev_trans->use_count);
1379                        spin_unlock(&root->fs_info->trans_lock);
1380
1381                        wait_for_commit(root, prev_trans);
1382
1383                        put_transaction(prev_trans);
1384                } else {
1385                        spin_unlock(&root->fs_info->trans_lock);
1386                }
1387        } else {
1388                spin_unlock(&root->fs_info->trans_lock);
1389        }
1390
1391        if (!btrfs_test_opt(root, SSD) &&
1392            (now < cur_trans->start_time || now - cur_trans->start_time < 1))
1393                should_grow = 1;
1394
1395        do {
1396                int snap_pending = 0;
1397
1398                joined = cur_trans->num_joined;
1399                if (!list_empty(&trans->transaction->pending_snapshots))
1400                        snap_pending = 1;
1401
1402                WARN_ON(cur_trans != trans->transaction);
1403
1404                if (flush_on_commit || snap_pending) {
1405                        btrfs_start_delalloc_inodes(root, 1);
1406                        btrfs_wait_ordered_extents(root, 0, 1);
1407                }
1408
1409                ret = btrfs_run_delayed_items(trans, root);
1410                if (ret)
1411                        goto cleanup_transaction;
1412
1413                /*
1414                 * running the delayed items may have added new refs. account
1415                 * them now so that they hinder processing of more delayed refs
1416                 * as little as possible.
1417                 */
1418                btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
1419
1420                /*
1421                 * rename don't use btrfs_join_transaction, so, once we
1422                 * set the transaction to blocked above, we aren't going
1423                 * to get any new ordered operations.  We can safely run
1424                 * it here and no for sure that nothing new will be added
1425                 * to the list
1426                 */
1427                btrfs_run_ordered_operations(root, 1);
1428
1429                prepare_to_wait(&cur_trans->writer_wait, &wait,
1430                                TASK_UNINTERRUPTIBLE);
1431
1432                if (atomic_read(&cur_trans->num_writers) > 1)
1433                        schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1434                else if (should_grow)
1435                        schedule_timeout(1);
1436
1437                finish_wait(&cur_trans->writer_wait, &wait);
1438        } while (atomic_read(&cur_trans->num_writers) > 1 ||
1439                 (should_grow && cur_trans->num_joined != joined));
1440
1441        /*
1442         * Ok now we need to make sure to block out any other joins while we
1443         * commit the transaction.  We could have started a join before setting
1444         * no_join so make sure to wait for num_writers to == 1 again.
1445         */
1446        spin_lock(&root->fs_info->trans_lock);
1447        root->fs_info->trans_no_join = 1;
1448        spin_unlock(&root->fs_info->trans_lock);
1449        wait_event(cur_trans->writer_wait,
1450                   atomic_read(&cur_trans->num_writers) == 1);
1451
1452        /*
1453         * the reloc mutex makes sure that we stop
1454         * the balancing code from coming in and moving
1455         * extents around in the middle of the commit
1456         */
1457        mutex_lock(&root->fs_info->reloc_mutex);
1458
1459        ret = btrfs_run_delayed_items(trans, root);
1460        if (ret) {
1461                mutex_unlock(&root->fs_info->reloc_mutex);
1462                goto cleanup_transaction;
1463        }
1464
1465        ret = create_pending_snapshots(trans, root->fs_info);
1466        if (ret) {
1467                mutex_unlock(&root->fs_info->reloc_mutex);
1468                goto cleanup_transaction;
1469        }
1470
1471        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1472        if (ret) {
1473                mutex_unlock(&root->fs_info->reloc_mutex);
1474                goto cleanup_transaction;
1475        }
1476
1477        /*
1478         * make sure none of the code above managed to slip in a
1479         * delayed item
1480         */
1481        btrfs_assert_delayed_root_empty(root);
1482
1483        WARN_ON(cur_trans != trans->transaction);
1484
1485        btrfs_scrub_pause(root);
1486        /* btrfs_commit_tree_roots is responsible for getting the
1487         * various roots consistent with each other.  Every pointer
1488         * in the tree of tree roots has to point to the most up to date
1489         * root for every subvolume and other tree.  So, we have to keep
1490         * the tree logging code from jumping in and changing any
1491         * of the trees.
1492         *
1493         * At this point in the commit, there can't be any tree-log
1494         * writers, but a little lower down we drop the trans mutex
1495         * and let new people in.  By holding the tree_log_mutex
1496         * from now until after the super is written, we avoid races
1497         * with the tree-log code.
1498         */
1499        mutex_lock(&root->fs_info->tree_log_mutex);
1500
1501        ret = commit_fs_roots(trans, root);
1502        if (ret) {
1503                mutex_unlock(&root->fs_info->tree_log_mutex);
1504                mutex_unlock(&root->fs_info->reloc_mutex);
1505                goto cleanup_transaction;
1506        }
1507
1508        /* commit_fs_roots gets rid of all the tree log roots, it is now
1509         * safe to free the root of tree log roots
1510         */
1511        btrfs_free_log_root_tree(trans, root->fs_info);
1512
1513        ret = commit_cowonly_roots(trans, root);
1514        if (ret) {
1515                mutex_unlock(&root->fs_info->tree_log_mutex);
1516                mutex_unlock(&root->fs_info->reloc_mutex);
1517                goto cleanup_transaction;
1518        }
1519
1520        btrfs_prepare_extent_commit(trans, root);
1521
1522        cur_trans = root->fs_info->running_transaction;
1523
1524        btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1525                            root->fs_info->tree_root->node);
1526        switch_commit_root(root->fs_info->tree_root);
1527
1528        btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1529                            root->fs_info->chunk_root->node);
1530        switch_commit_root(root->fs_info->chunk_root);
1531
1532        assert_qgroups_uptodate(trans);
1533        update_super_roots(root);
1534
1535        if (!root->fs_info->log_root_recovering) {
1536                btrfs_set_super_log_root(root->fs_info->super_copy, 0);
1537                btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
1538        }
1539
1540        memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
1541               sizeof(*root->fs_info->super_copy));
1542
1543        trans->transaction->blocked = 0;
1544        spin_lock(&root->fs_info->trans_lock);
1545        root->fs_info->running_transaction = NULL;
1546        root->fs_info->trans_no_join = 0;
1547        spin_unlock(&root->fs_info->trans_lock);
1548        mutex_unlock(&root->fs_info->reloc_mutex);
1549
1550        wake_up(&root->fs_info->transaction_wait);
1551
1552        ret = btrfs_write_and_wait_transaction(trans, root);
1553        if (ret) {
1554                btrfs_error(root->fs_info, ret,
1555                            "Error while writing out transaction.");
1556                mutex_unlock(&root->fs_info->tree_log_mutex);
1557                goto cleanup_transaction;
1558        }
1559
1560        ret = write_ctree_super(trans, root, 0);
1561        if (ret) {
1562                mutex_unlock(&root->fs_info->tree_log_mutex);
1563                goto cleanup_transaction;
1564        }
1565
1566        /*
1567         * the super is written, we can safely allow the tree-loggers
1568         * to go about their business
1569         */
1570        mutex_unlock(&root->fs_info->tree_log_mutex);
1571
1572        btrfs_finish_extent_commit(trans, root);
1573
1574        cur_trans->commit_done = 1;
1575
1576        root->fs_info->last_trans_committed = cur_trans->transid;
1577
1578        wake_up(&cur_trans->commit_wait);
1579
1580        spin_lock(&root->fs_info->trans_lock);
1581        list_del_init(&cur_trans->list);
1582        spin_unlock(&root->fs_info->trans_lock);
1583
1584        put_transaction(cur_trans);
1585        put_transaction(cur_trans);
1586
1587        sb_end_intwrite(root->fs_info->sb);
1588
1589        trace_btrfs_transaction_commit(root);
1590
1591        btrfs_scrub_continue(root);
1592
1593        if (current->journal_info == trans)
1594                current->journal_info = NULL;
1595
1596        kmem_cache_free(btrfs_trans_handle_cachep, trans);
1597
1598        if (current != root->fs_info->transaction_kthread)
1599                btrfs_run_delayed_iputs(root);
1600
1601        return ret;
1602
1603cleanup_transaction:
1604        btrfs_trans_release_metadata(trans, root);
1605        trans->block_rsv = NULL;
1606        btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
1607//      WARN_ON(1);
1608        if (current->journal_info == trans)
1609                current->journal_info = NULL;
1610        cleanup_transaction(trans, root, ret);
1611
1612        return ret;
1613}
1614
1615/*
1616 * interface function to delete all the snapshots we have scheduled for deletion
1617 */
1618int btrfs_clean_old_snapshots(struct btrfs_root *root)
1619{
1620        LIST_HEAD(list);
1621        struct btrfs_fs_info *fs_info = root->fs_info;
1622
1623        spin_lock(&fs_info->trans_lock);
1624        list_splice_init(&fs_info->dead_roots, &list);
1625        spin_unlock(&fs_info->trans_lock);
1626
1627        while (!list_empty(&list)) {
1628                int ret;
1629
1630                root = list_entry(list.next, struct btrfs_root, root_list);
1631                list_del(&root->root_list);
1632
1633                btrfs_kill_all_delayed_nodes(root);
1634
1635                if (btrfs_header_backref_rev(root->node) <
1636                    BTRFS_MIXED_BACKREF_REV)
1637                        ret = btrfs_drop_snapshot(root, NULL, 0, 0);
1638                else
1639                        ret =btrfs_drop_snapshot(root, NULL, 1, 0);
1640                BUG_ON(ret < 0);
1641        }
1642        return 0;
1643}
1644
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.