linux/fs/btrfs/delayed-inode.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011 Fujitsu.  All rights reserved.
   3 * Written by Miao Xie <miaox@cn.fujitsu.com>
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public
   7 * License v2 as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12 * General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public
  15 * License along with this program; if not, write to the
  16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  17 * Boston, MA 021110-1307, USA.
  18 */
  19
  20#include <linux/slab.h>
  21#include "delayed-inode.h"
  22#include "disk-io.h"
  23#include "transaction.h"
  24
  25#define BTRFS_DELAYED_WRITEBACK         400
  26#define BTRFS_DELAYED_BACKGROUND        100
  27
  28static struct kmem_cache *delayed_node_cache;
  29
  30int __init btrfs_delayed_inode_init(void)
  31{
  32        delayed_node_cache = kmem_cache_create("delayed_node",
  33                                        sizeof(struct btrfs_delayed_node),
  34                                        0,
  35                                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
  36                                        NULL);
  37        if (!delayed_node_cache)
  38                return -ENOMEM;
  39        return 0;
  40}
  41
  42void btrfs_delayed_inode_exit(void)
  43{
  44        if (delayed_node_cache)
  45                kmem_cache_destroy(delayed_node_cache);
  46}
  47
  48static inline void btrfs_init_delayed_node(
  49                                struct btrfs_delayed_node *delayed_node,
  50                                struct btrfs_root *root, u64 inode_id)
  51{
  52        delayed_node->root = root;
  53        delayed_node->inode_id = inode_id;
  54        atomic_set(&delayed_node->refs, 0);
  55        delayed_node->count = 0;
  56        delayed_node->in_list = 0;
  57        delayed_node->inode_dirty = 0;
  58        delayed_node->ins_root = RB_ROOT;
  59        delayed_node->del_root = RB_ROOT;
  60        mutex_init(&delayed_node->mutex);
  61        delayed_node->index_cnt = 0;
  62        INIT_LIST_HEAD(&delayed_node->n_list);
  63        INIT_LIST_HEAD(&delayed_node->p_list);
  64        delayed_node->bytes_reserved = 0;
  65        memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item));
  66}
  67
  68static inline int btrfs_is_continuous_delayed_item(
  69                                        struct btrfs_delayed_item *item1,
  70                                        struct btrfs_delayed_item *item2)
  71{
  72        if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
  73            item1->key.objectid == item2->key.objectid &&
  74            item1->key.type == item2->key.type &&
  75            item1->key.offset + 1 == item2->key.offset)
  76                return 1;
  77        return 0;
  78}
  79
  80static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
  81                                                        struct btrfs_root *root)
  82{
  83        return root->fs_info->delayed_root;
  84}
  85
  86static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
  87{
  88        struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
  89        struct btrfs_root *root = btrfs_inode->root;
  90        u64 ino = btrfs_ino(inode);
  91        struct btrfs_delayed_node *node;
  92
  93        node = ACCESS_ONCE(btrfs_inode->delayed_node);
  94        if (node) {
  95                atomic_inc(&node->refs);
  96                return node;
  97        }
  98
  99        spin_lock(&root->inode_lock);
 100        node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
 101        if (node) {
 102                if (btrfs_inode->delayed_node) {
 103                        atomic_inc(&node->refs);        /* can be accessed */
 104                        BUG_ON(btrfs_inode->delayed_node != node);
 105                        spin_unlock(&root->inode_lock);
 106                        return node;
 107                }
 108                btrfs_inode->delayed_node = node;
 109                atomic_inc(&node->refs);        /* can be accessed */
 110                atomic_inc(&node->refs);        /* cached in the inode */
 111                spin_unlock(&root->inode_lock);
 112                return node;
 113        }
 114        spin_unlock(&root->inode_lock);
 115
 116        return NULL;
 117}
 118
 119/* Will return either the node or PTR_ERR(-ENOMEM) */
 120static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
 121                                                        struct inode *inode)
 122{
 123        struct btrfs_delayed_node *node;
 124        struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
 125        struct btrfs_root *root = btrfs_inode->root;
 126        u64 ino = btrfs_ino(inode);
 127        int ret;
 128
 129again:
 130        node = btrfs_get_delayed_node(inode);
 131        if (node)
 132                return node;
 133
 134        node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
 135        if (!node)
 136                return ERR_PTR(-ENOMEM);
 137        btrfs_init_delayed_node(node, root, ino);
 138
 139        atomic_inc(&node->refs);        /* cached in the btrfs inode */
 140        atomic_inc(&node->refs);        /* can be accessed */
 141
 142        ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
 143        if (ret) {
 144                kmem_cache_free(delayed_node_cache, node);
 145                return ERR_PTR(ret);
 146        }
 147
 148        spin_lock(&root->inode_lock);
 149        ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
 150        if (ret == -EEXIST) {
 151                kmem_cache_free(delayed_node_cache, node);
 152                spin_unlock(&root->inode_lock);
 153                radix_tree_preload_end();
 154                goto again;
 155        }
 156        btrfs_inode->delayed_node = node;
 157        spin_unlock(&root->inode_lock);
 158        radix_tree_preload_end();
 159
 160        return node;
 161}
 162
 163/*
 164 * Call it when holding delayed_node->mutex
 165 *
 166 * If mod = 1, add this node into the prepared list.
 167 */
 168static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
 169                                     struct btrfs_delayed_node *node,
 170                                     int mod)
 171{
 172        spin_lock(&root->lock);
 173        if (node->in_list) {
 174                if (!list_empty(&node->p_list))
 175                        list_move_tail(&node->p_list, &root->prepare_list);
 176                else if (mod)
 177                        list_add_tail(&node->p_list, &root->prepare_list);
 178        } else {
 179                list_add_tail(&node->n_list, &root->node_list);
 180                list_add_tail(&node->p_list, &root->prepare_list);
 181                atomic_inc(&node->refs);        /* inserted into list */
 182                root->nodes++;
 183                node->in_list = 1;
 184        }
 185        spin_unlock(&root->lock);
 186}
 187
 188/* Call it when holding delayed_node->mutex */
 189static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
 190                                       struct btrfs_delayed_node *node)
 191{
 192        spin_lock(&root->lock);
 193        if (node->in_list) {
 194                root->nodes--;
 195                atomic_dec(&node->refs);        /* not in the list */
 196                list_del_init(&node->n_list);
 197                if (!list_empty(&node->p_list))
 198                        list_del_init(&node->p_list);
 199                node->in_list = 0;
 200        }
 201        spin_unlock(&root->lock);
 202}
 203
 204struct btrfs_delayed_node *btrfs_first_delayed_node(
 205                        struct btrfs_delayed_root *delayed_root)
 206{
 207        struct list_head *p;
 208        struct btrfs_delayed_node *node = NULL;
 209
 210        spin_lock(&delayed_root->lock);
 211        if (list_empty(&delayed_root->node_list))
 212                goto out;
 213
 214        p = delayed_root->node_list.next;
 215        node = list_entry(p, struct btrfs_delayed_node, n_list);
 216        atomic_inc(&node->refs);
 217out:
 218        spin_unlock(&delayed_root->lock);
 219
 220        return node;
 221}
 222
 223struct btrfs_delayed_node *btrfs_next_delayed_node(
 224                                                struct btrfs_delayed_node *node)
 225{
 226        struct btrfs_delayed_root *delayed_root;
 227        struct list_head *p;
 228        struct btrfs_delayed_node *next = NULL;
 229
 230        delayed_root = node->root->fs_info->delayed_root;
 231        spin_lock(&delayed_root->lock);
 232        if (!node->in_list) {   /* not in the list */
 233                if (list_empty(&delayed_root->node_list))
 234                        goto out;
 235                p = delayed_root->node_list.next;
 236        } else if (list_is_last(&node->n_list, &delayed_root->node_list))
 237                goto out;
 238        else
 239                p = node->n_list.next;
 240
 241        next = list_entry(p, struct btrfs_delayed_node, n_list);
 242        atomic_inc(&next->refs);
 243out:
 244        spin_unlock(&delayed_root->lock);
 245
 246        return next;
 247}
 248
 249static void __btrfs_release_delayed_node(
 250                                struct btrfs_delayed_node *delayed_node,
 251                                int mod)
 252{
 253        struct btrfs_delayed_root *delayed_root;
 254
 255        if (!delayed_node)
 256                return;
 257
 258        delayed_root = delayed_node->root->fs_info->delayed_root;
 259
 260        mutex_lock(&delayed_node->mutex);
 261        if (delayed_node->count)
 262                btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
 263        else
 264                btrfs_dequeue_delayed_node(delayed_root, delayed_node);
 265        mutex_unlock(&delayed_node->mutex);
 266
 267        if (atomic_dec_and_test(&delayed_node->refs)) {
 268                struct btrfs_root *root = delayed_node->root;
 269                spin_lock(&root->inode_lock);
 270                if (atomic_read(&delayed_node->refs) == 0) {
 271                        radix_tree_delete(&root->delayed_nodes_tree,
 272                                          delayed_node->inode_id);
 273                        kmem_cache_free(delayed_node_cache, delayed_node);
 274                }
 275                spin_unlock(&root->inode_lock);
 276        }
 277}
 278
 279static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
 280{
 281        __btrfs_release_delayed_node(node, 0);
 282}
 283
 284struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
 285                                        struct btrfs_delayed_root *delayed_root)
 286{
 287        struct list_head *p;
 288        struct btrfs_delayed_node *node = NULL;
 289
 290        spin_lock(&delayed_root->lock);
 291        if (list_empty(&delayed_root->prepare_list))
 292                goto out;
 293
 294        p = delayed_root->prepare_list.next;
 295        list_del_init(p);
 296        node = list_entry(p, struct btrfs_delayed_node, p_list);
 297        atomic_inc(&node->refs);
 298out:
 299        spin_unlock(&delayed_root->lock);
 300
 301        return node;
 302}
 303
 304static inline void btrfs_release_prepared_delayed_node(
 305                                        struct btrfs_delayed_node *node)
 306{
 307        __btrfs_release_delayed_node(node, 1);
 308}
 309
 310struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
 311{
 312        struct btrfs_delayed_item *item;
 313        item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
 314        if (item) {
 315                item->data_len = data_len;
 316                item->ins_or_del = 0;
 317                item->bytes_reserved = 0;
 318                item->delayed_node = NULL;
 319                atomic_set(&item->refs, 1);
 320        }
 321        return item;
 322}
 323
 324/*
 325 * __btrfs_lookup_delayed_item - look up the delayed item by key
 326 * @delayed_node: pointer to the delayed node
 327 * @key:          the key to look up
 328 * @prev:         used to store the prev item if the right item isn't found
 329 * @next:         used to store the next item if the right item isn't found
 330 *
 331 * Note: if we don't find the right item, we will return the prev item and
 332 * the next item.
 333 */
 334static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
 335                                struct rb_root *root,
 336                                struct btrfs_key *key,
 337                                struct btrfs_delayed_item **prev,
 338                                struct btrfs_delayed_item **next)
 339{
 340        struct rb_node *node, *prev_node = NULL;
 341        struct btrfs_delayed_item *delayed_item = NULL;
 342        int ret = 0;
 343
 344        node = root->rb_node;
 345
 346        while (node) {
 347                delayed_item = rb_entry(node, struct btrfs_delayed_item,
 348                                        rb_node);
 349                prev_node = node;
 350                ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
 351                if (ret < 0)
 352                        node = node->rb_right;
 353                else if (ret > 0)
 354                        node = node->rb_left;
 355                else
 356                        return delayed_item;
 357        }
 358
 359        if (prev) {
 360                if (!prev_node)
 361                        *prev = NULL;
 362                else if (ret < 0)
 363                        *prev = delayed_item;
 364                else if ((node = rb_prev(prev_node)) != NULL) {
 365                        *prev = rb_entry(node, struct btrfs_delayed_item,
 366                                         rb_node);
 367                } else
 368                        *prev = NULL;
 369        }
 370
 371        if (next) {
 372                if (!prev_node)
 373                        *next = NULL;
 374                else if (ret > 0)
 375                        *next = delayed_item;
 376                else if ((node = rb_next(prev_node)) != NULL) {
 377                        *next = rb_entry(node, struct btrfs_delayed_item,
 378                                         rb_node);
 379                } else
 380                        *next = NULL;
 381        }
 382        return NULL;
 383}
 384
 385struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
 386                                        struct btrfs_delayed_node *delayed_node,
 387                                        struct btrfs_key *key)
 388{
 389        struct btrfs_delayed_item *item;
 390
 391        item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
 392                                           NULL, NULL);
 393        return item;
 394}
 395
 396struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
 397                                        struct btrfs_delayed_node *delayed_node,
 398                                        struct btrfs_key *key)
 399{
 400        struct btrfs_delayed_item *item;
 401
 402        item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
 403                                           NULL, NULL);
 404        return item;
 405}
 406
 407struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
 408                                        struct btrfs_delayed_node *delayed_node,
 409                                        struct btrfs_key *key)
 410{
 411        struct btrfs_delayed_item *item, *next;
 412
 413        item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
 414                                           NULL, &next);
 415        if (!item)
 416                item = next;
 417
 418        return item;
 419}
 420
 421struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
 422                                        struct btrfs_delayed_node *delayed_node,
 423                                        struct btrfs_key *key)
 424{
 425        struct btrfs_delayed_item *item, *next;
 426
 427        item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
 428                                           NULL, &next);
 429        if (!item)
 430                item = next;
 431
 432        return item;
 433}
 434
 435static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
 436                                    struct btrfs_delayed_item *ins,
 437                                    int action)
 438{
 439        struct rb_node **p, *node;
 440        struct rb_node *parent_node = NULL;
 441        struct rb_root *root;
 442        struct btrfs_delayed_item *item;
 443        int cmp;
 444
 445        if (action == BTRFS_DELAYED_INSERTION_ITEM)
 446                root = &delayed_node->ins_root;
 447        else if (action == BTRFS_DELAYED_DELETION_ITEM)
 448                root = &delayed_node->del_root;
 449        else
 450                BUG();
 451        p = &root->rb_node;
 452        node = &ins->rb_node;
 453
 454        while (*p) {
 455                parent_node = *p;
 456                item = rb_entry(parent_node, struct btrfs_delayed_item,
 457                                 rb_node);
 458
 459                cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
 460                if (cmp < 0)
 461                        p = &(*p)->rb_right;
 462                else if (cmp > 0)
 463                        p = &(*p)->rb_left;
 464                else
 465                        return -EEXIST;
 466        }
 467
 468        rb_link_node(node, parent_node, p);
 469        rb_insert_color(node, root);
 470        ins->delayed_node = delayed_node;
 471        ins->ins_or_del = action;
 472
 473        if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
 474            action == BTRFS_DELAYED_INSERTION_ITEM &&
 475            ins->key.offset >= delayed_node->index_cnt)
 476                        delayed_node->index_cnt = ins->key.offset + 1;
 477
 478        delayed_node->count++;
 479        atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
 480        return 0;
 481}
 482
 483static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
 484                                              struct btrfs_delayed_item *item)
 485{
 486        return __btrfs_add_delayed_item(node, item,
 487                                        BTRFS_DELAYED_INSERTION_ITEM);
 488}
 489
 490static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
 491                                             struct btrfs_delayed_item *item)
 492{
 493        return __btrfs_add_delayed_item(node, item,
 494                                        BTRFS_DELAYED_DELETION_ITEM);
 495}
 496
 497static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
 498{
 499        struct rb_root *root;
 500        struct btrfs_delayed_root *delayed_root;
 501
 502        delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
 503
 504        BUG_ON(!delayed_root);
 505        BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
 506               delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
 507
 508        if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
 509                root = &delayed_item->delayed_node->ins_root;
 510        else
 511                root = &delayed_item->delayed_node->del_root;
 512
 513        rb_erase(&delayed_item->rb_node, root);
 514        delayed_item->delayed_node->count--;
 515        if (atomic_dec_return(&delayed_root->items) <
 516            BTRFS_DELAYED_BACKGROUND &&
 517            waitqueue_active(&delayed_root->wait))
 518                wake_up(&delayed_root->wait);
 519}
 520
 521static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
 522{
 523        if (item) {
 524                __btrfs_remove_delayed_item(item);
 525                if (atomic_dec_and_test(&item->refs))
 526                        kfree(item);
 527        }
 528}
 529
 530struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
 531                                        struct btrfs_delayed_node *delayed_node)
 532{
 533        struct rb_node *p;
 534        struct btrfs_delayed_item *item = NULL;
 535
 536        p = rb_first(&delayed_node->ins_root);
 537        if (p)
 538                item = rb_entry(p, struct btrfs_delayed_item, rb_node);
 539
 540        return item;
 541}
 542
 543struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
 544                                        struct btrfs_delayed_node *delayed_node)
 545{
 546        struct rb_node *p;
 547        struct btrfs_delayed_item *item = NULL;
 548
 549        p = rb_first(&delayed_node->del_root);
 550        if (p)
 551                item = rb_entry(p, struct btrfs_delayed_item, rb_node);
 552
 553        return item;
 554}
 555
 556struct btrfs_delayed_item *__btrfs_next_delayed_item(
 557                                                struct btrfs_delayed_item *item)
 558{
 559        struct rb_node *p;
 560        struct btrfs_delayed_item *next = NULL;
 561
 562        p = rb_next(&item->rb_node);
 563        if (p)
 564                next = rb_entry(p, struct btrfs_delayed_item, rb_node);
 565
 566        return next;
 567}
 568
 569static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
 570                                                   u64 root_id)
 571{
 572        struct btrfs_key root_key;
 573
 574        if (root->objectid == root_id)
 575                return root;
 576
 577        root_key.objectid = root_id;
 578        root_key.type = BTRFS_ROOT_ITEM_KEY;
 579        root_key.offset = (u64)-1;
 580        return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
 581}
 582
 583static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
 584                                               struct btrfs_root *root,
 585                                               struct btrfs_delayed_item *item)
 586{
 587        struct btrfs_block_rsv *src_rsv;
 588        struct btrfs_block_rsv *dst_rsv;
 589        u64 num_bytes;
 590        int ret;
 591
 592        if (!trans->bytes_reserved)
 593                return 0;
 594
 595        src_rsv = trans->block_rsv;
 596        dst_rsv = &root->fs_info->delayed_block_rsv;
 597
 598        num_bytes = btrfs_calc_trans_metadata_size(root, 1);
 599        ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
 600        if (!ret) {
 601                trace_btrfs_space_reservation(root->fs_info, "delayed_item",
 602                                              item->key.objectid,
 603                                              num_bytes, 1);
 604                item->bytes_reserved = num_bytes;
 605        }
 606
 607        return ret;
 608}
 609
 610static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
 611                                                struct btrfs_delayed_item *item)
 612{
 613        struct btrfs_block_rsv *rsv;
 614
 615        if (!item->bytes_reserved)
 616                return;
 617
 618        rsv = &root->fs_info->delayed_block_rsv;
 619        trace_btrfs_space_reservation(root->fs_info, "delayed_item",
 620                                      item->key.objectid, item->bytes_reserved,
 621                                      0);
 622        btrfs_block_rsv_release(root, rsv,
 623                                item->bytes_reserved);
 624}
 625
 626static int btrfs_delayed_inode_reserve_metadata(
 627                                        struct btrfs_trans_handle *trans,
 628                                        struct btrfs_root *root,
 629                                        struct inode *inode,
 630                                        struct btrfs_delayed_node *node)
 631{
 632        struct btrfs_block_rsv *src_rsv;
 633        struct btrfs_block_rsv *dst_rsv;
 634        u64 num_bytes;
 635        int ret;
 636        bool release = false;
 637
 638        src_rsv = trans->block_rsv;
 639        dst_rsv = &root->fs_info->delayed_block_rsv;
 640
 641        num_bytes = btrfs_calc_trans_metadata_size(root, 1);
 642
 643        /*
 644         * btrfs_dirty_inode will update the inode under btrfs_join_transaction
 645         * which doesn't reserve space for speed.  This is a problem since we
 646         * still need to reserve space for this update, so try to reserve the
 647         * space.
 648         *
 649         * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
 650         * we're accounted for.
 651         */
 652        if (!src_rsv || (!trans->bytes_reserved &&
 653            src_rsv != &root->fs_info->delalloc_block_rsv)) {
 654                ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
 655                /*
 656                 * Since we're under a transaction reserve_metadata_bytes could
 657                 * try to commit the transaction which will make it return
 658                 * EAGAIN to make us stop the transaction we have, so return
 659                 * ENOSPC instead so that btrfs_dirty_inode knows what to do.
 660                 */
 661                if (ret == -EAGAIN)
 662                        ret = -ENOSPC;
 663                if (!ret) {
 664                        node->bytes_reserved = num_bytes;
 665                        trace_btrfs_space_reservation(root->fs_info,
 666                                                      "delayed_inode",
 667                                                      btrfs_ino(inode),
 668                                                      num_bytes, 1);
 669                }
 670                return ret;
 671        } else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
 672                spin_lock(&BTRFS_I(inode)->lock);
 673                if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
 674                                       &BTRFS_I(inode)->runtime_flags)) {
 675                        spin_unlock(&BTRFS_I(inode)->lock);
 676                        release = true;
 677                        goto migrate;
 678                }
 679                spin_unlock(&BTRFS_I(inode)->lock);
 680
 681                /* Ok we didn't have space pre-reserved.  This shouldn't happen
 682                 * too often but it can happen if we do delalloc to an existing
 683                 * inode which gets dirtied because of the time update, and then
 684                 * isn't touched again until after the transaction commits and
 685                 * then we try to write out the data.  First try to be nice and
 686                 * reserve something strictly for us.  If not be a pain and try
 687                 * to steal from the delalloc block rsv.
 688                 */
 689                ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
 690                if (!ret)
 691                        goto out;
 692
 693                ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
 694                if (!ret)
 695                        goto out;
 696
 697                /*
 698                 * Ok this is a problem, let's just steal from the global rsv
 699                 * since this really shouldn't happen that often.
 700                 */
 701                WARN_ON(1);
 702                ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
 703                                              dst_rsv, num_bytes);
 704                goto out;
 705        }
 706
 707migrate:
 708        ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
 709
 710out:
 711        /*
 712         * Migrate only takes a reservation, it doesn't touch the size of the
 713         * block_rsv.  This is to simplify people who don't normally have things
 714         * migrated from their block rsv.  If they go to release their
 715         * reservation, that will decrease the size as well, so if migrate
 716         * reduced size we'd end up with a negative size.  But for the
 717         * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
 718         * but we could in fact do this reserve/migrate dance several times
 719         * between the time we did the original reservation and we'd clean it
 720         * up.  So to take care of this, release the space for the meta
 721         * reservation here.  I think it may be time for a documentation page on
 722         * how block rsvs. work.
 723         */
 724        if (!ret) {
 725                trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
 726                                              btrfs_ino(inode), num_bytes, 1);
 727                node->bytes_reserved = num_bytes;
 728        }
 729
 730        if (release) {
 731                trace_btrfs_space_reservation(root->fs_info, "delalloc",
 732                                              btrfs_ino(inode), num_bytes, 0);
 733                btrfs_block_rsv_release(root, src_rsv, num_bytes);
 734        }
 735
 736        return ret;
 737}
 738
 739static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
 740                                                struct btrfs_delayed_node *node)
 741{
 742        struct btrfs_block_rsv *rsv;
 743
 744        if (!node->bytes_reserved)
 745                return;
 746
 747        rsv = &root->fs_info->delayed_block_rsv;
 748        trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
 749                                      node->inode_id, node->bytes_reserved, 0);
 750        btrfs_block_rsv_release(root, rsv,
 751                                node->bytes_reserved);
 752        node->bytes_reserved = 0;
 753}
 754
 755/*
 756 * This helper will insert some continuous items into the same leaf according
 757 * to the free space of the leaf.
 758 */
 759static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
 760                                struct btrfs_root *root,
 761                                struct btrfs_path *path,
 762                                struct btrfs_delayed_item *item)
 763{
 764        struct btrfs_delayed_item *curr, *next;
 765        int free_space;
 766        int total_data_size = 0, total_size = 0;
 767        struct extent_buffer *leaf;
 768        char *data_ptr;
 769        struct btrfs_key *keys;
 770        u32 *data_size;
 771        struct list_head head;
 772        int slot;
 773        int nitems;
 774        int i;
 775        int ret = 0;
 776
 777        BUG_ON(!path->nodes[0]);
 778
 779        leaf = path->nodes[0];
 780        free_space = btrfs_leaf_free_space(root, leaf);
 781        INIT_LIST_HEAD(&head);
 782
 783        next = item;
 784        nitems = 0;
 785
 786        /*
 787         * count the number of the continuous items that we can insert in batch
 788         */
 789        while (total_size + next->data_len + sizeof(struct btrfs_item) <=
 790               free_space) {
 791                total_data_size += next->data_len;
 792                total_size += next->data_len + sizeof(struct btrfs_item);
 793                list_add_tail(&next->tree_list, &head);
 794                nitems++;
 795
 796                curr = next;
 797                next = __btrfs_next_delayed_item(curr);
 798                if (!next)
 799                        break;
 800
 801                if (!btrfs_is_continuous_delayed_item(curr, next))
 802                        break;
 803        }
 804
 805        if (!nitems) {
 806                ret = 0;
 807                goto out;
 808        }
 809
 810        /*
 811         * we need allocate some memory space, but it might cause the task
 812         * to sleep, so we set all locked nodes in the path to blocking locks
 813         * first.
 814         */
 815        btrfs_set_path_blocking(path);
 816
 817        keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
 818        if (!keys) {
 819                ret = -ENOMEM;
 820                goto out;
 821        }
 822
 823        data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
 824        if (!data_size) {
 825                ret = -ENOMEM;
 826                goto error;
 827        }
 828
 829        /* get keys of all the delayed items */
 830        i = 0;
 831        list_for_each_entry(next, &head, tree_list) {
 832                keys[i] = next->key;
 833                data_size[i] = next->data_len;
 834                i++;
 835        }
 836
 837        /* reset all the locked nodes in the patch to spinning locks. */
 838        btrfs_clear_path_blocking(path, NULL, 0);
 839
 840        /* insert the keys of the items */
 841        setup_items_for_insert(trans, root, path, keys, data_size,
 842                               total_data_size, total_size, nitems);
 843
 844        /* insert the dir index items */
 845        slot = path->slots[0];
 846        list_for_each_entry_safe(curr, next, &head, tree_list) {
 847                data_ptr = btrfs_item_ptr(leaf, slot, char);
 848                write_extent_buffer(leaf, &curr->data,
 849                                    (unsigned long)data_ptr,
 850                                    curr->data_len);
 851                slot++;
 852
 853                btrfs_delayed_item_release_metadata(root, curr);
 854
 855                list_del(&curr->tree_list);
 856                btrfs_release_delayed_item(curr);
 857        }
 858
 859error:
 860        kfree(data_size);
 861        kfree(keys);
 862out:
 863        return ret;
 864}
 865
 866/*
 867 * This helper can just do simple insertion that needn't extend item for new
 868 * data, such as directory name index insertion, inode insertion.
 869 */
 870static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
 871                                     struct btrfs_root *root,
 872                                     struct btrfs_path *path,
 873                                     struct btrfs_delayed_item *delayed_item)
 874{
 875        struct extent_buffer *leaf;
 876        struct btrfs_item *item;
 877        char *ptr;
 878        int ret;
 879
 880        ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
 881                                      delayed_item->data_len);
 882        if (ret < 0 && ret != -EEXIST)
 883                return ret;
 884
 885        leaf = path->nodes[0];
 886
 887        item = btrfs_item_nr(leaf, path->slots[0]);
 888        ptr = btrfs_item_ptr(leaf, path->slots[0], char);
 889
 890        write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
 891                            delayed_item->data_len);
 892        btrfs_mark_buffer_dirty(leaf);
 893
 894        btrfs_delayed_item_release_metadata(root, delayed_item);
 895        return 0;
 896}
 897
 898/*
 899 * we insert an item first, then if there are some continuous items, we try
 900 * to insert those items into the same leaf.
 901 */
 902static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
 903                                      struct btrfs_path *path,
 904                                      struct btrfs_root *root,
 905                                      struct btrfs_delayed_node *node)
 906{
 907        struct btrfs_delayed_item *curr, *prev;
 908        int ret = 0;
 909
 910do_again:
 911        mutex_lock(&node->mutex);
 912        curr = __btrfs_first_delayed_insertion_item(node);
 913        if (!curr)
 914                goto insert_end;
 915
 916        ret = btrfs_insert_delayed_item(trans, root, path, curr);
 917        if (ret < 0) {
 918                btrfs_release_path(path);
 919                goto insert_end;
 920        }
 921
 922        prev = curr;
 923        curr = __btrfs_next_delayed_item(prev);
 924        if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
 925                /* insert the continuous items into the same leaf */
 926                path->slots[0]++;
 927                btrfs_batch_insert_items(trans, root, path, curr);
 928        }
 929        btrfs_release_delayed_item(prev);
 930        btrfs_mark_buffer_dirty(path->nodes[0]);
 931
 932        btrfs_release_path(path);
 933        mutex_unlock(&node->mutex);
 934        goto do_again;
 935
 936insert_end:
 937        mutex_unlock(&node->mutex);
 938        return ret;
 939}
 940
 941static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
 942                                    struct btrfs_root *root,
 943                                    struct btrfs_path *path,
 944                                    struct btrfs_delayed_item *item)
 945{
 946        struct btrfs_delayed_item *curr, *next;
 947        struct extent_buffer *leaf;
 948        struct btrfs_key key;
 949        struct list_head head;
 950        int nitems, i, last_item;
 951        int ret = 0;
 952
 953        BUG_ON(!path->nodes[0]);
 954
 955        leaf = path->nodes[0];
 956
 957        i = path->slots[0];
 958        last_item = btrfs_header_nritems(leaf) - 1;
 959        if (i > last_item)
 960                return -ENOENT; /* FIXME: Is errno suitable? */
 961
 962        next = item;
 963        INIT_LIST_HEAD(&head);
 964        btrfs_item_key_to_cpu(leaf, &key, i);
 965        nitems = 0;
 966        /*
 967         * count the number of the dir index items that we can delete in batch
 968         */
 969        while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
 970                list_add_tail(&next->tree_list, &head);
 971                nitems++;
 972
 973                curr = next;
 974                next = __btrfs_next_delayed_item(curr);
 975                if (!next)
 976                        break;
 977
 978                if (!btrfs_is_continuous_delayed_item(curr, next))
 979                        break;
 980
 981                i++;
 982                if (i > last_item)
 983                        break;
 984                btrfs_item_key_to_cpu(leaf, &key, i);
 985        }
 986
 987        if (!nitems)
 988                return 0;
 989
 990        ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
 991        if (ret)
 992                goto out;
 993
 994        list_for_each_entry_safe(curr, next, &head, tree_list) {
 995                btrfs_delayed_item_release_metadata(root, curr);
 996                list_del(&curr->tree_list);
 997                btrfs_release_delayed_item(curr);
 998        }
 999
1000out:
1001        return ret;
1002}
1003
1004static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
1005                                      struct btrfs_path *path,
1006                                      struct btrfs_root *root,
1007                                      struct btrfs_delayed_node *node)
1008{
1009        struct btrfs_delayed_item *curr, *prev;
1010        int ret = 0;
1011
1012do_again:
1013        mutex_lock(&node->mutex);
1014        curr = __btrfs_first_delayed_deletion_item(node);
1015        if (!curr)
1016                goto delete_fail;
1017
1018        ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
1019        if (ret < 0)
1020                goto delete_fail;
1021        else if (ret > 0) {
1022                /*
1023                 * can't find the item which the node points to, so this node
1024                 * is invalid, just drop it.
1025                 */
1026                prev = curr;
1027                curr = __btrfs_next_delayed_item(prev);
1028                btrfs_release_delayed_item(prev);
1029                ret = 0;
1030                btrfs_release_path(path);
1031                if (curr) {
1032                        mutex_unlock(&node->mutex);
1033                        goto do_again;
1034                } else
1035                        goto delete_fail;
1036        }
1037
1038        btrfs_batch_delete_items(trans, root, path, curr);
1039        btrfs_release_path(path);
1040        mutex_unlock(&node->mutex);
1041        goto do_again;
1042
1043delete_fail:
1044        btrfs_release_path(path);
1045        mutex_unlock(&node->mutex);
1046        return ret;
1047}
1048
1049static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1050{
1051        struct btrfs_delayed_root *delayed_root;
1052
1053        if (delayed_node && delayed_node->inode_dirty) {
1054                BUG_ON(!delayed_node->root);
1055                delayed_node->inode_dirty = 0;
1056                delayed_node->count--;
1057
1058                delayed_root = delayed_node->root->fs_info->delayed_root;
1059                if (atomic_dec_return(&delayed_root->items) <
1060                    BTRFS_DELAYED_BACKGROUND &&
1061                    waitqueue_active(&delayed_root->wait))
1062                        wake_up(&delayed_root->wait);
1063        }
1064}
1065
1066static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1067                                      struct btrfs_root *root,
1068                                      struct btrfs_path *path,
1069                                      struct btrfs_delayed_node *node)
1070{
1071        struct btrfs_key key;
1072        struct btrfs_inode_item *inode_item;
1073        struct extent_buffer *leaf;
1074        int ret;
1075
1076        mutex_lock(&node->mutex);
1077        if (!node->inode_dirty) {
1078                mutex_unlock(&node->mutex);
1079                return 0;
1080        }
1081
1082        key.objectid = node->inode_id;
1083        btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1084        key.offset = 0;
1085        ret = btrfs_lookup_inode(trans, root, path, &key, 1);
1086        if (ret > 0) {
1087                btrfs_release_path(path);
1088                mutex_unlock(&node->mutex);
1089                return -ENOENT;
1090        } else if (ret < 0) {
1091                mutex_unlock(&node->mutex);
1092                return ret;
1093        }
1094
1095        btrfs_unlock_up_safe(path, 1);
1096        leaf = path->nodes[0];
1097        inode_item = btrfs_item_ptr(leaf, path->slots[0],
1098                                    struct btrfs_inode_item);
1099        write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1100                            sizeof(struct btrfs_inode_item));
1101        btrfs_mark_buffer_dirty(leaf);
1102        btrfs_release_path(path);
1103
1104        btrfs_delayed_inode_release_metadata(root, node);
1105        btrfs_release_delayed_inode(node);
1106        mutex_unlock(&node->mutex);
1107
1108        return 0;
1109}
1110
1111/*
1112 * Called when committing the transaction.
1113 * Returns 0 on success.
1114 * Returns < 0 on error and returns with an aborted transaction with any
1115 * outstanding delayed items cleaned up.
1116 */
1117static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1118                                     struct btrfs_root *root, int nr)
1119{
1120        struct btrfs_root *curr_root = root;
1121        struct btrfs_delayed_root *delayed_root;
1122        struct btrfs_delayed_node *curr_node, *prev_node;
1123        struct btrfs_path *path;
1124        struct btrfs_block_rsv *block_rsv;
1125        int ret = 0;
1126        bool count = (nr > 0);
1127
1128        if (trans->aborted)
1129                return -EIO;
1130
1131        path = btrfs_alloc_path();
1132        if (!path)
1133                return -ENOMEM;
1134        path->leave_spinning = 1;
1135
1136        block_rsv = trans->block_rsv;
1137        trans->block_rsv = &root->fs_info->delayed_block_rsv;
1138
1139        delayed_root = btrfs_get_delayed_root(root);
1140
1141        curr_node = btrfs_first_delayed_node(delayed_root);
1142        while (curr_node && (!count || (count && nr--))) {
1143                curr_root = curr_node->root;
1144                ret = btrfs_insert_delayed_items(trans, path, curr_root,
1145                                                 curr_node);
1146                if (!ret)
1147                        ret = btrfs_delete_delayed_items(trans, path,
1148                                                curr_root, curr_node);
1149                if (!ret)
1150                        ret = btrfs_update_delayed_inode(trans, curr_root,
1151                                                path, curr_node);
1152                if (ret) {
1153                        btrfs_release_delayed_node(curr_node);
1154                        curr_node = NULL;
1155                        btrfs_abort_transaction(trans, root, ret);
1156                        break;
1157                }
1158
1159                prev_node = curr_node;
1160                curr_node = btrfs_next_delayed_node(curr_node);
1161                btrfs_release_delayed_node(prev_node);
1162        }
1163
1164        if (curr_node)
1165                btrfs_release_delayed_node(curr_node);
1166        btrfs_free_path(path);
1167        trans->block_rsv = block_rsv;
1168
1169        return ret;
1170}
1171
1172int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1173                            struct btrfs_root *root)
1174{
1175        return __btrfs_run_delayed_items(trans, root, -1);
1176}
1177
1178int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
1179                               struct btrfs_root *root, int nr)
1180{
1181        return __btrfs_run_delayed_items(trans, root, nr);
1182}
1183
1184static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1185                                              struct btrfs_delayed_node *node)
1186{
1187        struct btrfs_path *path;
1188        struct btrfs_block_rsv *block_rsv;
1189        int ret;
1190
1191        path = btrfs_alloc_path();
1192        if (!path)
1193                return -ENOMEM;
1194        path->leave_spinning = 1;
1195
1196        block_rsv = trans->block_rsv;
1197        trans->block_rsv = &node->root->fs_info->delayed_block_rsv;
1198
1199        ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1200        if (!ret)
1201                ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1202        if (!ret)
1203                ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1204        btrfs_free_path(path);
1205
1206        trans->block_rsv = block_rsv;
1207        return ret;
1208}
1209
1210int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1211                                     struct inode *inode)
1212{
1213        struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1214        int ret;
1215
1216        if (!delayed_node)
1217                return 0;
1218
1219        mutex_lock(&delayed_node->mutex);
1220        if (!delayed_node->count) {
1221                mutex_unlock(&delayed_node->mutex);
1222                btrfs_release_delayed_node(delayed_node);
1223                return 0;
1224        }
1225        mutex_unlock(&delayed_node->mutex);
1226
1227        ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
1228        btrfs_release_delayed_node(delayed_node);
1229        return ret;
1230}
1231
1232void btrfs_remove_delayed_node(struct inode *inode)
1233{
1234        struct btrfs_delayed_node *delayed_node;
1235
1236        delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
1237        if (!delayed_node)
1238                return;
1239
1240        BTRFS_I(inode)->delayed_node = NULL;
1241        btrfs_release_delayed_node(delayed_node);
1242}
1243
1244struct btrfs_async_delayed_node {
1245        struct btrfs_root *root;
1246        struct btrfs_delayed_node *delayed_node;
1247        struct btrfs_work work;
1248};
1249
1250static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1251{
1252        struct btrfs_async_delayed_node *async_node;
1253        struct btrfs_trans_handle *trans;
1254        struct btrfs_path *path;
1255        struct btrfs_delayed_node *delayed_node = NULL;
1256        struct btrfs_root *root;
1257        struct btrfs_block_rsv *block_rsv;
1258        unsigned long nr = 0;
1259        int need_requeue = 0;
1260        int ret;
1261
1262        async_node = container_of(work, struct btrfs_async_delayed_node, work);
1263
1264        path = btrfs_alloc_path();
1265        if (!path)
1266                goto out;
1267        path->leave_spinning = 1;
1268
1269        delayed_node = async_node->delayed_node;
1270        root = delayed_node->root;
1271
1272        trans = btrfs_join_transaction(root);
1273        if (IS_ERR(trans))
1274                goto free_path;
1275
1276        block_rsv = trans->block_rsv;
1277        trans->block_rsv = &root->fs_info->delayed_block_rsv;
1278
1279        ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
1280        if (!ret)
1281                ret = btrfs_delete_delayed_items(trans, path, root,
1282                                                 delayed_node);
1283
1284        if (!ret)
1285                btrfs_update_delayed_inode(trans, root, path, delayed_node);
1286
1287        /*
1288         * Maybe new delayed items have been inserted, so we need requeue
1289         * the work. Besides that, we must dequeue the empty delayed nodes
1290         * to avoid the race between delayed items balance and the worker.
1291         * The race like this:
1292         *      Task1                           Worker thread
1293         *                                      count == 0, needn't requeue
1294         *                                        also needn't insert the
1295         *                                        delayed node into prepare
1296         *                                        list again.
1297         *      add lots of delayed items
1298         *      queue the delayed node
1299         *        already in the list,
1300         *        and not in the prepare
1301         *        list, it means the delayed
1302         *        node is being dealt with
1303         *        by the worker.
1304         *      do delayed items balance
1305         *        the delayed node is being
1306         *        dealt with by the worker
1307         *        now, just wait.
1308         *                                      the worker goto idle.
1309         * Task1 will sleep until the transaction is commited.
1310         */
1311        mutex_lock(&delayed_node->mutex);
1312        if (delayed_node->count)
1313                need_requeue = 1;
1314        else
1315                btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
1316                                           delayed_node);
1317        mutex_unlock(&delayed_node->mutex);
1318
1319        nr = trans->blocks_used;
1320
1321        trans->block_rsv = block_rsv;
1322        btrfs_end_transaction_dmeta(trans, root);
1323        __btrfs_btree_balance_dirty(root, nr);
1324free_path:
1325        btrfs_free_path(path);
1326out:
1327        if (need_requeue)
1328                btrfs_requeue_work(&async_node->work);
1329        else {
1330                btrfs_release_prepared_delayed_node(delayed_node);
1331                kfree(async_node);
1332        }
1333}
1334
1335static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1336                                     struct btrfs_root *root, int all)
1337{
1338        struct btrfs_async_delayed_node *async_node;
1339        struct btrfs_delayed_node *curr;
1340        int count = 0;
1341
1342again:
1343        curr = btrfs_first_prepared_delayed_node(delayed_root);
1344        if (!curr)
1345                return 0;
1346
1347        async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
1348        if (!async_node) {
1349                btrfs_release_prepared_delayed_node(curr);
1350                return -ENOMEM;
1351        }
1352
1353        async_node->root = root;
1354        async_node->delayed_node = curr;
1355
1356        async_node->work.func = btrfs_async_run_delayed_node_done;
1357        async_node->work.flags = 0;
1358
1359        btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
1360        count++;
1361
1362        if (all || count < 4)
1363                goto again;
1364
1365        return 0;
1366}
1367
1368void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1369{
1370        struct btrfs_delayed_root *delayed_root;
1371        delayed_root = btrfs_get_delayed_root(root);
1372        WARN_ON(btrfs_first_delayed_node(delayed_root));
1373}
1374
1375void btrfs_balance_delayed_items(struct btrfs_root *root)
1376{
1377        struct btrfs_delayed_root *delayed_root;
1378
1379        delayed_root = btrfs_get_delayed_root(root);
1380
1381        if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1382                return;
1383
1384        if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1385                int ret;
1386                ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
1387                if (ret)
1388                        return;
1389
1390                wait_event_interruptible_timeout(
1391                                delayed_root->wait,
1392                                (atomic_read(&delayed_root->items) <
1393                                 BTRFS_DELAYED_BACKGROUND),
1394                                HZ);
1395                return;
1396        }
1397
1398        btrfs_wq_run_delayed_node(delayed_root, root, 0);
1399}
1400
1401/* Will return 0 or -ENOMEM */
1402int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1403                                   struct btrfs_root *root, const char *name,
1404                                   int name_len, struct inode *dir,
1405                                   struct btrfs_disk_key *disk_key, u8 type,
1406                                   u64 index)
1407{
1408        struct btrfs_delayed_node *delayed_node;
1409        struct btrfs_delayed_item *delayed_item;
1410        struct btrfs_dir_item *dir_item;
1411        int ret;
1412
1413        delayed_node = btrfs_get_or_create_delayed_node(dir);
1414        if (IS_ERR(delayed_node))
1415                return PTR_ERR(delayed_node);
1416
1417        delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1418        if (!delayed_item) {
1419                ret = -ENOMEM;
1420                goto release_node;
1421        }
1422
1423        delayed_item->key.objectid = btrfs_ino(dir);
1424        btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
1425        delayed_item->key.offset = index;
1426
1427        dir_item = (struct btrfs_dir_item *)delayed_item->data;
1428        dir_item->location = *disk_key;
1429        dir_item->transid = cpu_to_le64(trans->transid);
1430        dir_item->data_len = 0;
1431        dir_item->name_len = cpu_to_le16(name_len);
1432        dir_item->type = type;
1433        memcpy((char *)(dir_item + 1), name, name_len);
1434
1435        ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
1436        /*
1437         * we have reserved enough space when we start a new transaction,
1438         * so reserving metadata failure is impossible
1439         */
1440        BUG_ON(ret);
1441
1442
1443        mutex_lock(&delayed_node->mutex);
1444        ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1445        if (unlikely(ret)) {
1446                printk(KERN_ERR "err add delayed dir index item(name: %s) into "
1447                                "the insertion tree of the delayed node"
1448                                "(root id: %llu, inode id: %llu, errno: %d)\n",
1449                                name,
1450                                (unsigned long long)delayed_node->root->objectid,
1451                                (unsigned long long)delayed_node->inode_id,
1452                                ret);
1453                BUG();
1454        }
1455        mutex_unlock(&delayed_node->mutex);
1456
1457release_node:
1458        btrfs_release_delayed_node(delayed_node);
1459        return ret;
1460}
1461
1462static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
1463                                               struct btrfs_delayed_node *node,
1464                                               struct btrfs_key *key)
1465{
1466        struct btrfs_delayed_item *item;
1467
1468        mutex_lock(&node->mutex);
1469        item = __btrfs_lookup_delayed_insertion_item(node, key);
1470        if (!item) {
1471                mutex_unlock(&node->mutex);
1472                return 1;
1473        }
1474
1475        btrfs_delayed_item_release_metadata(root, item);
1476        btrfs_release_delayed_item(item);
1477        mutex_unlock(&node->mutex);
1478        return 0;
1479}
1480
1481int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1482                                   struct btrfs_root *root, struct inode *dir,
1483                                   u64 index)
1484{
1485        struct btrfs_delayed_node *node;
1486        struct btrfs_delayed_item *item;
1487        struct btrfs_key item_key;
1488        int ret;
1489
1490        node = btrfs_get_or_create_delayed_node(dir);
1491        if (IS_ERR(node))
1492                return PTR_ERR(node);
1493
1494        item_key.objectid = btrfs_ino(dir);
1495        btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
1496        item_key.offset = index;
1497
1498        ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
1499        if (!ret)
1500                goto end;
1501
1502        item = btrfs_alloc_delayed_item(0);
1503        if (!item) {
1504                ret = -ENOMEM;
1505                goto end;
1506        }
1507
1508        item->key = item_key;
1509
1510        ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
1511        /*
1512         * we have reserved enough space when we start a new transaction,
1513         * so reserving metadata failure is impossible.
1514         */
1515        BUG_ON(ret);
1516
1517        mutex_lock(&node->mutex);
1518        ret = __btrfs_add_delayed_deletion_item(node, item);
1519        if (unlikely(ret)) {
1520                printk(KERN_ERR "err add delayed dir index item(index: %llu) "
1521                                "into the deletion tree of the delayed node"
1522                                "(root id: %llu, inode id: %llu, errno: %d)\n",
1523                                (unsigned long long)index,
1524                                (unsigned long long)node->root->objectid,
1525                                (unsigned long long)node->inode_id,
1526                                ret);
1527                BUG();
1528        }
1529        mutex_unlock(&node->mutex);
1530end:
1531        btrfs_release_delayed_node(node);
1532        return ret;
1533}
1534
1535int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1536{
1537        struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1538
1539        if (!delayed_node)
1540                return -ENOENT;
1541
1542        /*
1543         * Since we have held i_mutex of this directory, it is impossible that
1544         * a new directory index is added into the delayed node and index_cnt
1545         * is updated now. So we needn't lock the delayed node.
1546         */
1547        if (!delayed_node->index_cnt) {
1548                btrfs_release_delayed_node(delayed_node);
1549                return -EINVAL;
1550        }
1551
1552        BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1553        btrfs_release_delayed_node(delayed_node);
1554        return 0;
1555}
1556
1557void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1558                             struct list_head *del_list)
1559{
1560        struct btrfs_delayed_node *delayed_node;
1561        struct btrfs_delayed_item *item;
1562
1563        delayed_node = btrfs_get_delayed_node(inode);
1564        if (!delayed_node)
1565                return;
1566
1567        mutex_lock(&delayed_node->mutex);
1568        item = __btrfs_first_delayed_insertion_item(delayed_node);
1569        while (item) {
1570                atomic_inc(&item->refs);
1571                list_add_tail(&item->readdir_list, ins_list);
1572                item = __btrfs_next_delayed_item(item);
1573        }
1574
1575        item = __btrfs_first_delayed_deletion_item(delayed_node);
1576        while (item) {
1577                atomic_inc(&item->refs);
1578                list_add_tail(&item->readdir_list, del_list);
1579                item = __btrfs_next_delayed_item(item);
1580        }
1581        mutex_unlock(&delayed_node->mutex);
1582        /*
1583         * This delayed node is still cached in the btrfs inode, so refs
1584         * must be > 1 now, and we needn't check it is going to be freed
1585         * or not.
1586         *
1587         * Besides that, this function is used to read dir, we do not
1588         * insert/delete delayed items in this period. So we also needn't
1589         * requeue or dequeue this delayed node.
1590         */
1591        atomic_dec(&delayed_node->refs);
1592}
1593
1594void btrfs_put_delayed_items(struct list_head *ins_list,
1595                             struct list_head *del_list)
1596{
1597        struct btrfs_delayed_item *curr, *next;
1598
1599        list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1600                list_del(&curr->readdir_list);
1601                if (atomic_dec_and_test(&curr->refs))
1602                        kfree(curr);
1603        }
1604
1605        list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1606                list_del(&curr->readdir_list);
1607                if (atomic_dec_and_test(&curr->refs))
1608                        kfree(curr);
1609        }
1610}
1611
1612int btrfs_should_delete_dir_index(struct list_head *del_list,
1613                                  u64 index)
1614{
1615        struct btrfs_delayed_item *curr, *next;
1616        int ret;
1617
1618        if (list_empty(del_list))
1619                return 0;
1620
1621        list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1622                if (curr->key.offset > index)
1623                        break;
1624
1625                list_del(&curr->readdir_list);
1626                ret = (curr->key.offset == index);
1627
1628                if (atomic_dec_and_test(&curr->refs))
1629                        kfree(curr);
1630
1631                if (ret)
1632                        return 1;
1633                else
1634                        continue;
1635        }
1636        return 0;
1637}
1638
1639/*
1640 * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1641 *
1642 */
1643int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
1644                                    filldir_t filldir,
1645                                    struct list_head *ins_list)
1646{
1647        struct btrfs_dir_item *di;
1648        struct btrfs_delayed_item *curr, *next;
1649        struct btrfs_key location;
1650        char *name;
1651        int name_len;
1652        int over = 0;
1653        unsigned char d_type;
1654
1655        if (list_empty(ins_list))
1656                return 0;
1657
1658        /*
1659         * Changing the data of the delayed item is impossible. So
1660         * we needn't lock them. And we have held i_mutex of the
1661         * directory, nobody can delete any directory indexes now.
1662         */
1663        list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1664                list_del(&curr->readdir_list);
1665
1666                if (curr->key.offset < filp->f_pos) {
1667                        if (atomic_dec_and_test(&curr->refs))
1668                                kfree(curr);
1669                        continue;
1670                }
1671
1672                filp->f_pos = curr->key.offset;
1673
1674                di = (struct btrfs_dir_item *)curr->data;
1675                name = (char *)(di + 1);
1676                name_len = le16_to_cpu(di->name_len);
1677
1678                d_type = btrfs_filetype_table[di->type];
1679                btrfs_disk_key_to_cpu(&location, &di->location);
1680
1681                over = filldir(dirent, name, name_len, curr->key.offset,
1682                               location.objectid, d_type);
1683
1684                if (atomic_dec_and_test(&curr->refs))
1685                        kfree(curr);
1686
1687                if (over)
1688                        return 1;
1689        }
1690        return 0;
1691}
1692
1693BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
1694                         generation, 64);
1695BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
1696                         sequence, 64);
1697BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
1698                         transid, 64);
1699BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
1700BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
1701                         nbytes, 64);
1702BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
1703                         block_group, 64);
1704BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
1705BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
1706BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
1707BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
1708BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
1709BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
1710
1711BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
1712BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
1713
1714static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1715                                  struct btrfs_inode_item *inode_item,
1716                                  struct inode *inode)
1717{
1718        btrfs_set_stack_inode_uid(inode_item, inode->i_uid);
1719        btrfs_set_stack_inode_gid(inode_item, inode->i_gid);
1720        btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1721        btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1722        btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1723        btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1724        btrfs_set_stack_inode_generation(inode_item,
1725                                         BTRFS_I(inode)->generation);
1726        btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
1727        btrfs_set_stack_inode_transid(inode_item, trans->transid);
1728        btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1729        btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1730        btrfs_set_stack_inode_block_group(inode_item, 0);
1731
1732        btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1733                                     inode->i_atime.tv_sec);
1734        btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
1735                                      inode->i_atime.tv_nsec);
1736
1737        btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
1738                                     inode->i_mtime.tv_sec);
1739        btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
1740                                      inode->i_mtime.tv_nsec);
1741
1742        btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
1743                                     inode->i_ctime.tv_sec);
1744        btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
1745                                      inode->i_ctime.tv_nsec);
1746}
1747
1748int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1749{
1750        struct btrfs_delayed_node *delayed_node;
1751        struct btrfs_inode_item *inode_item;
1752        struct btrfs_timespec *tspec;
1753
1754        delayed_node = btrfs_get_delayed_node(inode);
1755        if (!delayed_node)
1756                return -ENOENT;
1757
1758        mutex_lock(&delayed_node->mutex);
1759        if (!delayed_node->inode_dirty) {
1760                mutex_unlock(&delayed_node->mutex);
1761                btrfs_release_delayed_node(delayed_node);
1762                return -ENOENT;
1763        }
1764
1765        inode_item = &delayed_node->inode_item;
1766
1767        inode->i_uid = btrfs_stack_inode_uid(inode_item);
1768        inode->i_gid = btrfs_stack_inode_gid(inode_item);
1769        btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
1770        inode->i_mode = btrfs_stack_inode_mode(inode_item);
1771        set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1772        inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1773        BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1774        inode->i_version = btrfs_stack_inode_sequence(inode_item);
1775        inode->i_rdev = 0;
1776        *rdev = btrfs_stack_inode_rdev(inode_item);
1777        BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1778
1779        tspec = btrfs_inode_atime(inode_item);
1780        inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
1781        inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1782
1783        tspec = btrfs_inode_mtime(inode_item);
1784        inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
1785        inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1786
1787        tspec = btrfs_inode_ctime(inode_item);
1788        inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
1789        inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1790
1791        inode->i_generation = BTRFS_I(inode)->generation;
1792        BTRFS_I(inode)->index_cnt = (u64)-1;
1793
1794        mutex_unlock(&delayed_node->mutex);
1795        btrfs_release_delayed_node(delayed_node);
1796        return 0;
1797}
1798
1799int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1800                               struct btrfs_root *root, struct inode *inode)
1801{
1802        struct btrfs_delayed_node *delayed_node;
1803        int ret = 0;
1804
1805        delayed_node = btrfs_get_or_create_delayed_node(inode);
1806        if (IS_ERR(delayed_node))
1807                return PTR_ERR(delayed_node);
1808
1809        mutex_lock(&delayed_node->mutex);
1810        if (delayed_node->inode_dirty) {
1811                fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1812                goto release_node;
1813        }
1814
1815        ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
1816                                                   delayed_node);
1817        if (ret)
1818                goto release_node;
1819
1820        fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1821        delayed_node->inode_dirty = 1;
1822        delayed_node->count++;
1823        atomic_inc(&root->fs_info->delayed_root->items);
1824release_node:
1825        mutex_unlock(&delayed_node->mutex);
1826        btrfs_release_delayed_node(delayed_node);
1827        return ret;
1828}
1829
1830static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1831{
1832        struct btrfs_root *root = delayed_node->root;
1833        struct btrfs_delayed_item *curr_item, *prev_item;
1834
1835        mutex_lock(&delayed_node->mutex);
1836        curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1837        while (curr_item) {
1838                btrfs_delayed_item_release_metadata(root, curr_item);
1839                prev_item = curr_item;
1840                curr_item = __btrfs_next_delayed_item(prev_item);
1841                btrfs_release_delayed_item(prev_item);
1842        }
1843
1844        curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1845        while (curr_item) {
1846                btrfs_delayed_item_release_metadata(root, curr_item);
1847                prev_item = curr_item;
1848                curr_item = __btrfs_next_delayed_item(prev_item);
1849                btrfs_release_delayed_item(prev_item);
1850        }
1851
1852        if (delayed_node->inode_dirty) {
1853                btrfs_delayed_inode_release_metadata(root, delayed_node);
1854                btrfs_release_delayed_inode(delayed_node);
1855        }
1856        mutex_unlock(&delayed_node->mutex);
1857}
1858
1859void btrfs_kill_delayed_inode_items(struct inode *inode)
1860{
1861        struct btrfs_delayed_node *delayed_node;
1862
1863        delayed_node = btrfs_get_delayed_node(inode);
1864        if (!delayed_node)
1865                return;
1866
1867        __btrfs_kill_delayed_node(delayed_node);
1868        btrfs_release_delayed_node(delayed_node);
1869}
1870
1871void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1872{
1873        u64 inode_id = 0;
1874        struct btrfs_delayed_node *delayed_nodes[8];
1875        int i, n;
1876
1877        while (1) {
1878                spin_lock(&root->inode_lock);
1879                n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1880                                           (void **)delayed_nodes, inode_id,
1881                                           ARRAY_SIZE(delayed_nodes));
1882                if (!n) {
1883                        spin_unlock(&root->inode_lock);
1884                        break;
1885                }
1886
1887                inode_id = delayed_nodes[n - 1]->inode_id + 1;
1888
1889                for (i = 0; i < n; i++)
1890                        atomic_inc(&delayed_nodes[i]->refs);
1891                spin_unlock(&root->inode_lock);
1892
1893                for (i = 0; i < n; i++) {
1894                        __btrfs_kill_delayed_node(delayed_nodes[i]);
1895                        btrfs_release_delayed_node(delayed_nodes[i]);
1896                }
1897        }
1898}
1899
1900void btrfs_destroy_delayed_inodes(struct btrfs_root *root)
1901{
1902        struct btrfs_delayed_root *delayed_root;
1903        struct btrfs_delayed_node *curr_node, *prev_node;
1904
1905        delayed_root = btrfs_get_delayed_root(root);
1906
1907        curr_node = btrfs_first_delayed_node(delayed_root);
1908        while (curr_node) {
1909                __btrfs_kill_delayed_node(curr_node);
1910
1911                prev_node = curr_node;
1912                curr_node = btrfs_next_delayed_node(curr_node);
1913                btrfs_release_delayed_node(prev_node);
1914        }
1915}
1916
1917
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.