linux/fs/btrfs/ctree.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2007,2008 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/sched.h>
  20#include "ctree.h"
  21#include "disk-io.h"
  22#include "transaction.h"
  23#include "print-tree.h"
  24#include "locking.h"
  25
  26static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
  27                      *root, struct btrfs_path *path, int level);
  28static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
  29                      *root, struct btrfs_key *ins_key,
  30                      struct btrfs_path *path, int data_size, int extend);
  31static int push_node_left(struct btrfs_trans_handle *trans,
  32                          struct btrfs_root *root, struct extent_buffer *dst,
  33                          struct extent_buffer *src, int empty);
  34static int balance_node_right(struct btrfs_trans_handle *trans,
  35                              struct btrfs_root *root,
  36                              struct extent_buffer *dst_buf,
  37                              struct extent_buffer *src_buf);
  38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  39                   struct btrfs_path *path, int level, int slot);
  40
  41struct btrfs_path *btrfs_alloc_path(void)
  42{
  43        struct btrfs_path *path;
  44        path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
  45        if (path)
  46                path->reada = 1;
  47        return path;
  48}
  49
  50/*
  51 * set all locked nodes in the path to blocking locks.  This should
  52 * be done before scheduling
  53 */
  54noinline void btrfs_set_path_blocking(struct btrfs_path *p)
  55{
  56        int i;
  57        for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
  58                if (p->nodes[i] && p->locks[i])
  59                        btrfs_set_lock_blocking(p->nodes[i]);
  60        }
  61}
  62
  63/*
  64 * reset all the locked nodes in the patch to spinning locks.
  65 *
  66 * held is used to keep lockdep happy, when lockdep is enabled
  67 * we set held to a blocking lock before we go around and
  68 * retake all the spinlocks in the path.  You can safely use NULL
  69 * for held
  70 */
  71noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
  72                                        struct extent_buffer *held)
  73{
  74        int i;
  75
  76#ifdef CONFIG_DEBUG_LOCK_ALLOC
  77        /* lockdep really cares that we take all of these spinlocks
  78         * in the right order.  If any of the locks in the path are not
  79         * currently blocking, it is going to complain.  So, make really
  80         * really sure by forcing the path to blocking before we clear
  81         * the path blocking.
  82         */
  83        if (held)
  84                btrfs_set_lock_blocking(held);
  85        btrfs_set_path_blocking(p);
  86#endif
  87
  88        for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
  89                if (p->nodes[i] && p->locks[i])
  90                        btrfs_clear_lock_blocking(p->nodes[i]);
  91        }
  92
  93#ifdef CONFIG_DEBUG_LOCK_ALLOC
  94        if (held)
  95                btrfs_clear_lock_blocking(held);
  96#endif
  97}
  98
  99/* this also releases the path */
 100void btrfs_free_path(struct btrfs_path *p)
 101{
 102        btrfs_release_path(NULL, p);
 103        kmem_cache_free(btrfs_path_cachep, p);
 104}
 105
 106/*
 107 * path release drops references on the extent buffers in the path
 108 * and it drops any locks held by this path
 109 *
 110 * It is safe to call this on paths that no locks or extent buffers held.
 111 */
 112noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
 113{
 114        int i;
 115
 116        for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
 117                p->slots[i] = 0;
 118                if (!p->nodes[i])
 119                        continue;
 120                if (p->locks[i]) {
 121                        btrfs_tree_unlock(p->nodes[i]);
 122                        p->locks[i] = 0;
 123                }
 124                free_extent_buffer(p->nodes[i]);
 125                p->nodes[i] = NULL;
 126        }
 127}
 128
 129/*
 130 * safely gets a reference on the root node of a tree.  A lock
 131 * is not taken, so a concurrent writer may put a different node
 132 * at the root of the tree.  See btrfs_lock_root_node for the
 133 * looping required.
 134 *
 135 * The extent buffer returned by this has a reference taken, so
 136 * it won't disappear.  It may stop being the root of the tree
 137 * at any time because there are no locks held.
 138 */
 139struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
 140{
 141        struct extent_buffer *eb;
 142        spin_lock(&root->node_lock);
 143        eb = root->node;
 144        extent_buffer_get(eb);
 145        spin_unlock(&root->node_lock);
 146        return eb;
 147}
 148
 149/* loop around taking references on and locking the root node of the
 150 * tree until you end up with a lock on the root.  A locked buffer
 151 * is returned, with a reference held.
 152 */
 153struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
 154{
 155        struct extent_buffer *eb;
 156
 157        while (1) {
 158                eb = btrfs_root_node(root);
 159                btrfs_tree_lock(eb);
 160
 161                spin_lock(&root->node_lock);
 162                if (eb == root->node) {
 163                        spin_unlock(&root->node_lock);
 164                        break;
 165                }
 166                spin_unlock(&root->node_lock);
 167
 168                btrfs_tree_unlock(eb);
 169                free_extent_buffer(eb);
 170        }
 171        return eb;
 172}
 173
 174/* cowonly root (everything not a reference counted cow subvolume), just get
 175 * put onto a simple dirty list.  transaction.c walks this to make sure they
 176 * get properly updated on disk.
 177 */
 178static void add_root_to_dirty_list(struct btrfs_root *root)
 179{
 180        if (root->track_dirty && list_empty(&root->dirty_list)) {
 181                list_add(&root->dirty_list,
 182                         &root->fs_info->dirty_cowonly_roots);
 183        }
 184}
 185
 186/*
 187 * used by snapshot creation to make a copy of a root for a tree with
 188 * a given objectid.  The buffer with the new root node is returned in
 189 * cow_ret, and this func returns zero on success or a negative error code.
 190 */
 191int btrfs_copy_root(struct btrfs_trans_handle *trans,
 192                      struct btrfs_root *root,
 193                      struct extent_buffer *buf,
 194                      struct extent_buffer **cow_ret, u64 new_root_objectid)
 195{
 196        struct extent_buffer *cow;
 197        u32 nritems;
 198        int ret = 0;
 199        int level;
 200        struct btrfs_root *new_root;
 201
 202        new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
 203        if (!new_root)
 204                return -ENOMEM;
 205
 206        memcpy(new_root, root, sizeof(*new_root));
 207        new_root->root_key.objectid = new_root_objectid;
 208
 209        WARN_ON(root->ref_cows && trans->transid !=
 210                root->fs_info->running_transaction->transid);
 211        WARN_ON(root->ref_cows && trans->transid != root->last_trans);
 212
 213        level = btrfs_header_level(buf);
 214        nritems = btrfs_header_nritems(buf);
 215
 216        cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
 217                                     new_root_objectid, trans->transid,
 218                                     level, buf->start, 0);
 219        if (IS_ERR(cow)) {
 220                kfree(new_root);
 221                return PTR_ERR(cow);
 222        }
 223
 224        copy_extent_buffer(cow, buf, 0, 0, cow->len);
 225        btrfs_set_header_bytenr(cow, cow->start);
 226        btrfs_set_header_generation(cow, trans->transid);
 227        btrfs_set_header_owner(cow, new_root_objectid);
 228        btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
 229
 230        write_extent_buffer(cow, root->fs_info->fsid,
 231                            (unsigned long)btrfs_header_fsid(cow),
 232                            BTRFS_FSID_SIZE);
 233
 234        WARN_ON(btrfs_header_generation(buf) > trans->transid);
 235        ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
 236        kfree(new_root);
 237
 238        if (ret)
 239                return ret;
 240
 241        btrfs_mark_buffer_dirty(cow);
 242        *cow_ret = cow;
 243        return 0;
 244}
 245
 246/*
 247 * does the dirty work in cow of a single block.  The parent block (if
 248 * supplied) is updated to point to the new cow copy.  The new buffer is marked
 249 * dirty and returned locked.  If you modify the block it needs to be marked
 250 * dirty again.
 251 *
 252 * search_start -- an allocation hint for the new block
 253 *
 254 * empty_size -- a hint that you plan on doing more cow.  This is the size in
 255 * bytes the allocator should try to find free next to the block it returns.
 256 * This is just a hint and may be ignored by the allocator.
 257 */
 258static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 259                             struct btrfs_root *root,
 260                             struct extent_buffer *buf,
 261                             struct extent_buffer *parent, int parent_slot,
 262                             struct extent_buffer **cow_ret,
 263                             u64 search_start, u64 empty_size)
 264{
 265        u64 parent_start;
 266        struct extent_buffer *cow;
 267        u32 nritems;
 268        int ret = 0;
 269        int level;
 270        int unlock_orig = 0;
 271
 272        if (*cow_ret == buf)
 273                unlock_orig = 1;
 274
 275        btrfs_assert_tree_locked(buf);
 276
 277        if (parent)
 278                parent_start = parent->start;
 279        else
 280                parent_start = 0;
 281
 282        WARN_ON(root->ref_cows && trans->transid !=
 283                root->fs_info->running_transaction->transid);
 284        WARN_ON(root->ref_cows && trans->transid != root->last_trans);
 285
 286        level = btrfs_header_level(buf);
 287        nritems = btrfs_header_nritems(buf);
 288
 289        cow = btrfs_alloc_free_block(trans, root, buf->len,
 290                                     parent_start, root->root_key.objectid,
 291                                     trans->transid, level,
 292                                     search_start, empty_size);
 293        if (IS_ERR(cow))
 294                return PTR_ERR(cow);
 295
 296        /* cow is set to blocking by btrfs_init_new_buffer */
 297
 298        copy_extent_buffer(cow, buf, 0, 0, cow->len);
 299        btrfs_set_header_bytenr(cow, cow->start);
 300        btrfs_set_header_generation(cow, trans->transid);
 301        btrfs_set_header_owner(cow, root->root_key.objectid);
 302        btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
 303
 304        write_extent_buffer(cow, root->fs_info->fsid,
 305                            (unsigned long)btrfs_header_fsid(cow),
 306                            BTRFS_FSID_SIZE);
 307
 308        WARN_ON(btrfs_header_generation(buf) > trans->transid);
 309        if (btrfs_header_generation(buf) != trans->transid) {
 310                u32 nr_extents;
 311                ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
 312                if (ret)
 313                        return ret;
 314
 315                ret = btrfs_cache_ref(trans, root, buf, nr_extents);
 316                WARN_ON(ret);
 317        } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
 318                /*
 319                 * There are only two places that can drop reference to
 320                 * tree blocks owned by living reloc trees, one is here,
 321                 * the other place is btrfs_drop_subtree. In both places,
 322                 * we check reference count while tree block is locked.
 323                 * Furthermore, if reference count is one, it won't get
 324                 * increased by someone else.
 325                 */
 326                u32 refs;
 327                ret = btrfs_lookup_extent_ref(trans, root, buf->start,
 328                                              buf->len, &refs);
 329                BUG_ON(ret);
 330                if (refs == 1) {
 331                        ret = btrfs_update_ref(trans, root, buf, cow,
 332                                               0, nritems);
 333                        clean_tree_block(trans, root, buf);
 334                } else {
 335                        ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
 336                }
 337                BUG_ON(ret);
 338        } else {
 339                ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
 340                if (ret)
 341                        return ret;
 342                clean_tree_block(trans, root, buf);
 343        }
 344
 345        if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
 346                ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
 347                WARN_ON(ret);
 348        }
 349
 350        if (buf == root->node) {
 351                WARN_ON(parent && parent != buf);
 352
 353                spin_lock(&root->node_lock);
 354                root->node = cow;
 355                extent_buffer_get(cow);
 356                spin_unlock(&root->node_lock);
 357
 358                if (buf != root->commit_root) {
 359                        btrfs_free_extent(trans, root, buf->start,
 360                                          buf->len, buf->start,
 361                                          root->root_key.objectid,
 362                                          btrfs_header_generation(buf),
 363                                          level, 1);
 364                }
 365                free_extent_buffer(buf);
 366                add_root_to_dirty_list(root);
 367        } else {
 368                btrfs_set_node_blockptr(parent, parent_slot,
 369                                        cow->start);
 370                WARN_ON(trans->transid == 0);
 371                btrfs_set_node_ptr_generation(parent, parent_slot,
 372                                              trans->transid);
 373                btrfs_mark_buffer_dirty(parent);
 374                WARN_ON(btrfs_header_generation(parent) != trans->transid);
 375                btrfs_free_extent(trans, root, buf->start, buf->len,
 376                                  parent_start, btrfs_header_owner(parent),
 377                                  btrfs_header_generation(parent), level, 1);
 378        }
 379        if (unlock_orig)
 380                btrfs_tree_unlock(buf);
 381        free_extent_buffer(buf);
 382        btrfs_mark_buffer_dirty(cow);
 383        *cow_ret = cow;
 384        return 0;
 385}
 386
 387/*
 388 * cows a single block, see __btrfs_cow_block for the real work.
 389 * This version of it has extra checks so that a block isn't cow'd more than
 390 * once per transaction, as long as it hasn't been written yet
 391 */
 392noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
 393                    struct btrfs_root *root, struct extent_buffer *buf,
 394                    struct extent_buffer *parent, int parent_slot,
 395                    struct extent_buffer **cow_ret)
 396{
 397        u64 search_start;
 398        int ret;
 399
 400        if (trans->transaction != root->fs_info->running_transaction) {
 401                printk(KERN_CRIT "trans %llu running %llu\n",
 402                       (unsigned long long)trans->transid,
 403                       (unsigned long long)
 404                       root->fs_info->running_transaction->transid);
 405                WARN_ON(1);
 406        }
 407        if (trans->transid != root->fs_info->generation) {
 408                printk(KERN_CRIT "trans %llu running %llu\n",
 409                       (unsigned long long)trans->transid,
 410                       (unsigned long long)root->fs_info->generation);
 411                WARN_ON(1);
 412        }
 413
 414        if (btrfs_header_generation(buf) == trans->transid &&
 415            btrfs_header_owner(buf) == root->root_key.objectid &&
 416            !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
 417                *cow_ret = buf;
 418                return 0;
 419        }
 420
 421        search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
 422
 423        if (parent)
 424                btrfs_set_lock_blocking(parent);
 425        btrfs_set_lock_blocking(buf);
 426
 427        ret = __btrfs_cow_block(trans, root, buf, parent,
 428                                 parent_slot, cow_ret, search_start, 0);
 429        return ret;
 430}
 431
 432/*
 433 * helper function for defrag to decide if two blocks pointed to by a
 434 * node are actually close by
 435 */
 436static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
 437{
 438        if (blocknr < other && other - (blocknr + blocksize) < 32768)
 439                return 1;
 440        if (blocknr > other && blocknr - (other + blocksize) < 32768)
 441                return 1;
 442        return 0;
 443}
 444
 445/*
 446 * compare two keys in a memcmp fashion
 447 */
 448static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
 449{
 450        struct btrfs_key k1;
 451
 452        btrfs_disk_key_to_cpu(&k1, disk);
 453
 454        if (k1.objectid > k2->objectid)
 455                return 1;
 456        if (k1.objectid < k2->objectid)
 457                return -1;
 458        if (k1.type > k2->type)
 459                return 1;
 460        if (k1.type < k2->type)
 461                return -1;
 462        if (k1.offset > k2->offset)
 463                return 1;
 464        if (k1.offset < k2->offset)
 465                return -1;
 466        return 0;
 467}
 468
 469/*
 470 * same as comp_keys only with two btrfs_key's
 471 */
 472static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
 473{
 474        if (k1->objectid > k2->objectid)
 475                return 1;
 476        if (k1->objectid < k2->objectid)
 477                return -1;
 478        if (k1->type > k2->type)
 479                return 1;
 480        if (k1->type < k2->type)
 481                return -1;
 482        if (k1->offset > k2->offset)
 483                return 1;
 484        if (k1->offset < k2->offset)
 485                return -1;
 486        return 0;
 487}
 488
 489/*
 490 * this is used by the defrag code to go through all the
 491 * leaves pointed to by a node and reallocate them so that
 492 * disk order is close to key order
 493 */
 494int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 495                       struct btrfs_root *root, struct extent_buffer *parent,
 496                       int start_slot, int cache_only, u64 *last_ret,
 497                       struct btrfs_key *progress)
 498{
 499        struct extent_buffer *cur;
 500        u64 blocknr;
 501        u64 gen;
 502        u64 search_start = *last_ret;
 503        u64 last_block = 0;
 504        u64 other;
 505        u32 parent_nritems;
 506        int end_slot;
 507        int i;
 508        int err = 0;
 509        int parent_level;
 510        int uptodate;
 511        u32 blocksize;
 512        int progress_passed = 0;
 513        struct btrfs_disk_key disk_key;
 514
 515        parent_level = btrfs_header_level(parent);
 516        if (cache_only && parent_level != 1)
 517                return 0;
 518
 519        if (trans->transaction != root->fs_info->running_transaction)
 520                WARN_ON(1);
 521        if (trans->transid != root->fs_info->generation)
 522                WARN_ON(1);
 523
 524        parent_nritems = btrfs_header_nritems(parent);
 525        blocksize = btrfs_level_size(root, parent_level - 1);
 526        end_slot = parent_nritems;
 527
 528        if (parent_nritems == 1)
 529                return 0;
 530
 531        btrfs_set_lock_blocking(parent);
 532
 533        for (i = start_slot; i < end_slot; i++) {
 534                int close = 1;
 535
 536                if (!parent->map_token) {
 537                        map_extent_buffer(parent,
 538                                        btrfs_node_key_ptr_offset(i),
 539                                        sizeof(struct btrfs_key_ptr),
 540                                        &parent->map_token, &parent->kaddr,
 541                                        &parent->map_start, &parent->map_len,
 542                                        KM_USER1);
 543                }
 544                btrfs_node_key(parent, &disk_key, i);
 545                if (!progress_passed && comp_keys(&disk_key, progress) < 0)
 546                        continue;
 547
 548                progress_passed = 1;
 549                blocknr = btrfs_node_blockptr(parent, i);
 550                gen = btrfs_node_ptr_generation(parent, i);
 551                if (last_block == 0)
 552                        last_block = blocknr;
 553
 554                if (i > 0) {
 555                        other = btrfs_node_blockptr(parent, i - 1);
 556                        close = close_blocks(blocknr, other, blocksize);
 557                }
 558                if (!close && i < end_slot - 2) {
 559                        other = btrfs_node_blockptr(parent, i + 1);
 560                        close = close_blocks(blocknr, other, blocksize);
 561                }
 562                if (close) {
 563                        last_block = blocknr;
 564                        continue;
 565                }
 566                if (parent->map_token) {
 567                        unmap_extent_buffer(parent, parent->map_token,
 568                                            KM_USER1);
 569                        parent->map_token = NULL;
 570                }
 571
 572                cur = btrfs_find_tree_block(root, blocknr, blocksize);
 573                if (cur)
 574                        uptodate = btrfs_buffer_uptodate(cur, gen);
 575                else
 576                        uptodate = 0;
 577                if (!cur || !uptodate) {
 578                        if (cache_only) {
 579                                free_extent_buffer(cur);
 580                                continue;
 581                        }
 582                        if (!cur) {
 583                                cur = read_tree_block(root, blocknr,
 584                                                         blocksize, gen);
 585                        } else if (!uptodate) {
 586                                btrfs_read_buffer(cur, gen);
 587                        }
 588                }
 589                if (search_start == 0)
 590                        search_start = last_block;
 591
 592                btrfs_tree_lock(cur);
 593                btrfs_set_lock_blocking(cur);
 594                err = __btrfs_cow_block(trans, root, cur, parent, i,
 595                                        &cur, search_start,
 596                                        min(16 * blocksize,
 597                                            (end_slot - i) * blocksize));
 598                if (err) {
 599                        btrfs_tree_unlock(cur);
 600                        free_extent_buffer(cur);
 601                        break;
 602                }
 603                search_start = cur->start;
 604                last_block = cur->start;
 605                *last_ret = search_start;
 606                btrfs_tree_unlock(cur);
 607                free_extent_buffer(cur);
 608        }
 609        if (parent->map_token) {
 610                unmap_extent_buffer(parent, parent->map_token,
 611                                    KM_USER1);
 612                parent->map_token = NULL;
 613        }
 614        return err;
 615}
 616
 617/*
 618 * The leaf data grows from end-to-front in the node.
 619 * this returns the address of the start of the last item,
 620 * which is the stop of the leaf data stack
 621 */
 622static inline unsigned int leaf_data_end(struct btrfs_root *root,
 623                                         struct extent_buffer *leaf)
 624{
 625        u32 nr = btrfs_header_nritems(leaf);
 626        if (nr == 0)
 627                return BTRFS_LEAF_DATA_SIZE(root);
 628        return btrfs_item_offset_nr(leaf, nr - 1);
 629}
 630
 631/*
 632 * extra debugging checks to make sure all the items in a key are
 633 * well formed and in the proper order
 634 */
 635static int check_node(struct btrfs_root *root, struct btrfs_path *path,
 636                      int level)
 637{
 638        struct extent_buffer *parent = NULL;
 639        struct extent_buffer *node = path->nodes[level];
 640        struct btrfs_disk_key parent_key;
 641        struct btrfs_disk_key node_key;
 642        int parent_slot;
 643        int slot;
 644        struct btrfs_key cpukey;
 645        u32 nritems = btrfs_header_nritems(node);
 646
 647        if (path->nodes[level + 1])
 648                parent = path->nodes[level + 1];
 649
 650        slot = path->slots[level];
 651        BUG_ON(nritems == 0);
 652        if (parent) {
 653                parent_slot = path->slots[level + 1];
 654                btrfs_node_key(parent, &parent_key, parent_slot);
 655                btrfs_node_key(node, &node_key, 0);
 656                BUG_ON(memcmp(&parent_key, &node_key,
 657                              sizeof(struct btrfs_disk_key)));
 658                BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
 659                       btrfs_header_bytenr(node));
 660        }
 661        BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
 662        if (slot != 0) {
 663                btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
 664                btrfs_node_key(node, &node_key, slot);
 665                BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
 666        }
 667        if (slot < nritems - 1) {
 668                btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
 669                btrfs_node_key(node, &node_key, slot);
 670                BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
 671        }
 672        return 0;
 673}
 674
 675/*
 676 * extra checking to make sure all the items in a leaf are
 677 * well formed and in the proper order
 678 */
 679static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
 680                      int level)
 681{
 682        struct extent_buffer *leaf = path->nodes[level];
 683        struct extent_buffer *parent = NULL;
 684        int parent_slot;
 685        struct btrfs_key cpukey;
 686        struct btrfs_disk_key parent_key;
 687        struct btrfs_disk_key leaf_key;
 688        int slot = path->slots[0];
 689
 690        u32 nritems = btrfs_header_nritems(leaf);
 691
 692        if (path->nodes[level + 1])
 693                parent = path->nodes[level + 1];
 694
 695        if (nritems == 0)
 696                return 0;
 697
 698        if (parent) {
 699                parent_slot = path->slots[level + 1];
 700                btrfs_node_key(parent, &parent_key, parent_slot);
 701                btrfs_item_key(leaf, &leaf_key, 0);
 702
 703                BUG_ON(memcmp(&parent_key, &leaf_key,
 704                       sizeof(struct btrfs_disk_key)));
 705                BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
 706                       btrfs_header_bytenr(leaf));
 707        }
 708        if (slot != 0 && slot < nritems - 1) {
 709                btrfs_item_key(leaf, &leaf_key, slot);
 710                btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
 711                if (comp_keys(&leaf_key, &cpukey) <= 0) {
 712                        btrfs_print_leaf(root, leaf);
 713                        printk(KERN_CRIT "slot %d offset bad key\n", slot);
 714                        BUG_ON(1);
 715                }
 716                if (btrfs_item_offset_nr(leaf, slot - 1) !=
 717                       btrfs_item_end_nr(leaf, slot)) {
 718                        btrfs_print_leaf(root, leaf);
 719                        printk(KERN_CRIT "slot %d offset bad\n", slot);
 720                        BUG_ON(1);
 721                }
 722        }
 723        if (slot < nritems - 1) {
 724                btrfs_item_key(leaf, &leaf_key, slot);
 725                btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
 726                BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
 727                if (btrfs_item_offset_nr(leaf, slot) !=
 728                        btrfs_item_end_nr(leaf, slot + 1)) {
 729                        btrfs_print_leaf(root, leaf);
 730                        printk(KERN_CRIT "slot %d offset bad\n", slot);
 731                        BUG_ON(1);
 732                }
 733        }
 734        BUG_ON(btrfs_item_offset_nr(leaf, 0) +
 735               btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
 736        return 0;
 737}
 738
 739static noinline int check_block(struct btrfs_root *root,
 740                                struct btrfs_path *path, int level)
 741{
 742        return 0;
 743        if (level == 0)
 744                return check_leaf(root, path, level);
 745        return check_node(root, path, level);
 746}
 747
 748/*
 749 * search for key in the extent_buffer.  The items start at offset p,
 750 * and they are item_size apart.  There are 'max' items in p.
 751 *
 752 * the slot in the array is returned via slot, and it points to
 753 * the place where you would insert key if it is not found in
 754 * the array.
 755 *
 756 * slot may point to max if the key is bigger than all of the keys
 757 */
 758static noinline int generic_bin_search(struct extent_buffer *eb,
 759                                       unsigned long p,
 760                                       int item_size, struct btrfs_key *key,
 761                                       int max, int *slot)
 762{
 763        int low = 0;
 764        int high = max;
 765        int mid;
 766        int ret;
 767        struct btrfs_disk_key *tmp = NULL;
 768        struct btrfs_disk_key unaligned;
 769        unsigned long offset;
 770        char *map_token = NULL;
 771        char *kaddr = NULL;
 772        unsigned long map_start = 0;
 773        unsigned long map_len = 0;
 774        int err;
 775
 776        while (low < high) {
 777                mid = (low + high) / 2;
 778                offset = p + mid * item_size;
 779
 780                if (!map_token || offset < map_start ||
 781                    (offset + sizeof(struct btrfs_disk_key)) >
 782                    map_start + map_len) {
 783                        if (map_token) {
 784                                unmap_extent_buffer(eb, map_token, KM_USER0);
 785                                map_token = NULL;
 786                        }
 787
 788                        err = map_private_extent_buffer(eb, offset,
 789                                                sizeof(struct btrfs_disk_key),
 790                                                &map_token, &kaddr,
 791                                                &map_start, &map_len, KM_USER0);
 792
 793                        if (!err) {
 794                                tmp = (struct btrfs_disk_key *)(kaddr + offset -
 795                                                        map_start);
 796                        } else {
 797                                read_extent_buffer(eb, &unaligned,
 798                                                   offset, sizeof(unaligned));
 799                                tmp = &unaligned;
 800                        }
 801
 802                } else {
 803                        tmp = (struct btrfs_disk_key *)(kaddr + offset -
 804                                                        map_start);
 805                }
 806                ret = comp_keys(tmp, key);
 807
 808                if (ret < 0)
 809                        low = mid + 1;
 810                else if (ret > 0)
 811                        high = mid;
 812                else {
 813                        *slot = mid;
 814                        if (map_token)
 815                                unmap_extent_buffer(eb, map_token, KM_USER0);
 816                        return 0;
 817                }
 818        }
 819        *slot = low;
 820        if (map_token)
 821                unmap_extent_buffer(eb, map_token, KM_USER0);
 822        return 1;
 823}
 824
 825/*
 826 * simple bin_search frontend that does the right thing for
 827 * leaves vs nodes
 828 */
 829static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
 830                      int level, int *slot)
 831{
 832        if (level == 0) {
 833                return generic_bin_search(eb,
 834                                          offsetof(struct btrfs_leaf, items),
 835                                          sizeof(struct btrfs_item),
 836                                          key, btrfs_header_nritems(eb),
 837                                          slot);
 838        } else {
 839                return generic_bin_search(eb,
 840                                          offsetof(struct btrfs_node, ptrs),
 841                                          sizeof(struct btrfs_key_ptr),
 842                                          key, btrfs_header_nritems(eb),
 843                                          slot);
 844        }
 845        return -1;
 846}
 847
 848/* given a node and slot number, this reads the blocks it points to.  The
 849 * extent buffer is returned with a reference taken (but unlocked).
 850 * NULL is returned on error.
 851 */
 852static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
 853                                   struct extent_buffer *parent, int slot)
 854{
 855        int level = btrfs_header_level(parent);
 856        if (slot < 0)
 857                return NULL;
 858        if (slot >= btrfs_header_nritems(parent))
 859                return NULL;
 860
 861        BUG_ON(level == 0);
 862
 863        return read_tree_block(root, btrfs_node_blockptr(parent, slot),
 864                       btrfs_level_size(root, level - 1),
 865                       btrfs_node_ptr_generation(parent, slot));
 866}
 867
 868/*
 869 * node level balancing, used to make sure nodes are in proper order for
 870 * item deletion.  We balance from the top down, so we have to make sure
 871 * that a deletion won't leave an node completely empty later on.
 872 */
 873static noinline int balance_level(struct btrfs_trans_handle *trans,
 874                         struct btrfs_root *root,
 875                         struct btrfs_path *path, int level)
 876{
 877        struct extent_buffer *right = NULL;
 878        struct extent_buffer *mid;
 879        struct extent_buffer *left = NULL;
 880        struct extent_buffer *parent = NULL;
 881        int ret = 0;
 882        int wret;
 883        int pslot;
 884        int orig_slot = path->slots[level];
 885        int err_on_enospc = 0;
 886        u64 orig_ptr;
 887
 888        if (level == 0)
 889                return 0;
 890
 891        mid = path->nodes[level];
 892
 893        WARN_ON(!path->locks[level]);
 894        WARN_ON(btrfs_header_generation(mid) != trans->transid);
 895
 896        orig_ptr = btrfs_node_blockptr(mid, orig_slot);
 897
 898        if (level < BTRFS_MAX_LEVEL - 1)
 899                parent = path->nodes[level + 1];
 900        pslot = path->slots[level + 1];
 901
 902        /*
 903         * deal with the case where there is only one pointer in the root
 904         * by promoting the node below to a root
 905         */
 906        if (!parent) {
 907                struct extent_buffer *child;
 908
 909                if (btrfs_header_nritems(mid) != 1)
 910                        return 0;
 911
 912                /* promote the child to a root */
 913                child = read_node_slot(root, mid, 0);
 914                BUG_ON(!child);
 915                btrfs_tree_lock(child);
 916                btrfs_set_lock_blocking(child);
 917                ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
 918                BUG_ON(ret);
 919
 920                spin_lock(&root->node_lock);
 921                root->node = child;
 922                spin_unlock(&root->node_lock);
 923
 924                ret = btrfs_update_extent_ref(trans, root, child->start,
 925                                              child->len,
 926                                              mid->start, child->start,
 927                                              root->root_key.objectid,
 928                                              trans->transid, level - 1);
 929                BUG_ON(ret);
 930
 931                add_root_to_dirty_list(root);
 932                btrfs_tree_unlock(child);
 933
 934                path->locks[level] = 0;
 935                path->nodes[level] = NULL;
 936                clean_tree_block(trans, root, mid);
 937                btrfs_tree_unlock(mid);
 938                /* once for the path */
 939                free_extent_buffer(mid);
 940                ret = btrfs_free_extent(trans, root, mid->start, mid->len,
 941                                        mid->start, root->root_key.objectid,
 942                                        btrfs_header_generation(mid),
 943                                        level, 1);
 944                /* once for the root ptr */
 945                free_extent_buffer(mid);
 946                return ret;
 947        }
 948        if (btrfs_header_nritems(mid) >
 949            BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
 950                return 0;
 951
 952        if (trans->transaction->delayed_refs.flushing &&
 953            btrfs_header_nritems(mid) > 2)
 954                return 0;
 955
 956        if (btrfs_header_nritems(mid) < 2)
 957                err_on_enospc = 1;
 958
 959        left = read_node_slot(root, parent, pslot - 1);
 960        if (left) {
 961                btrfs_tree_lock(left);
 962                btrfs_set_lock_blocking(left);
 963                wret = btrfs_cow_block(trans, root, left,
 964                                       parent, pslot - 1, &left);
 965                if (wret) {
 966                        ret = wret;
 967                        goto enospc;
 968                }
 969        }
 970        right = read_node_slot(root, parent, pslot + 1);
 971        if (right) {
 972                btrfs_tree_lock(right);
 973                btrfs_set_lock_blocking(right);
 974                wret = btrfs_cow_block(trans, root, right,
 975                                       parent, pslot + 1, &right);
 976                if (wret) {
 977                        ret = wret;
 978                        goto enospc;
 979                }
 980        }
 981
 982        /* first, try to make some room in the middle buffer */
 983        if (left) {
 984                orig_slot += btrfs_header_nritems(left);
 985                wret = push_node_left(trans, root, left, mid, 1);
 986                if (wret < 0)
 987                        ret = wret;
 988                if (btrfs_header_nritems(mid) < 2)
 989                        err_on_enospc = 1;
 990        }
 991
 992        /*
 993         * then try to empty the right most buffer into the middle
 994         */
 995        if (right) {
 996                wret = push_node_left(trans, root, mid, right, 1);
 997                if (wret < 0 && wret != -ENOSPC)
 998                        ret = wret;
 999                if (btrfs_header_nritems(right) == 0) {
1000                        u64 bytenr = right->start;
1001                        u64 generation = btrfs_header_generation(parent);
1002                        u32 blocksize = right->len;
1003
1004                        clean_tree_block(trans, root, right);
1005                        btrfs_tree_unlock(right);
1006                        free_extent_buffer(right);
1007                        right = NULL;
1008                        wret = del_ptr(trans, root, path, level + 1, pslot +
1009                                       1);
1010                        if (wret)
1011                                ret = wret;
1012                        wret = btrfs_free_extent(trans, root, bytenr,
1013                                                 blocksize, parent->start,
1014                                                 btrfs_header_owner(parent),
1015                                                 generation, level, 1);
1016                        if (wret)
1017                                ret = wret;
1018                } else {
1019                        struct btrfs_disk_key right_key;
1020                        btrfs_node_key(right, &right_key, 0);
1021                        btrfs_set_node_key(parent, &right_key, pslot + 1);
1022                        btrfs_mark_buffer_dirty(parent);
1023                }
1024        }
1025        if (btrfs_header_nritems(mid) == 1) {
1026                /*
1027                 * we're not allowed to leave a node with one item in the
1028                 * tree during a delete.  A deletion from lower in the tree
1029                 * could try to delete the only pointer in this node.
1030                 * So, pull some keys from the left.
1031                 * There has to be a left pointer at this point because
1032                 * otherwise we would have pulled some pointers from the
1033                 * right
1034                 */
1035                BUG_ON(!left);
1036                wret = balance_node_right(trans, root, mid, left);
1037                if (wret < 0) {
1038                        ret = wret;
1039                        goto enospc;
1040                }
1041                if (wret == 1) {
1042                        wret = push_node_left(trans, root, left, mid, 1);
1043                        if (wret < 0)
1044                                ret = wret;
1045                }
1046                BUG_ON(wret == 1);
1047        }
1048        if (btrfs_header_nritems(mid) == 0) {
1049                /* we've managed to empty the middle node, drop it */
1050                u64 root_gen = btrfs_header_generation(parent);
1051                u64 bytenr = mid->start;
1052                u32 blocksize = mid->len;
1053
1054                clean_tree_block(trans, root, mid);
1055                btrfs_tree_unlock(mid);
1056                free_extent_buffer(mid);
1057                mid = NULL;
1058                wret = del_ptr(trans, root, path, level + 1, pslot);
1059                if (wret)
1060                        ret = wret;
1061                wret = btrfs_free_extent(trans, root, bytenr, blocksize,
1062                                         parent->start,
1063                                         btrfs_header_owner(parent),
1064                                         root_gen, level, 1);
1065                if (wret)
1066                        ret = wret;
1067        } else {
1068                /* update the parent key to reflect our changes */
1069                struct btrfs_disk_key mid_key;
1070                btrfs_node_key(mid, &mid_key, 0);
1071                btrfs_set_node_key(parent, &mid_key, pslot);
1072                btrfs_mark_buffer_dirty(parent);
1073        }
1074
1075        /* update the path */
1076        if (left) {
1077                if (btrfs_header_nritems(left) > orig_slot) {
1078                        extent_buffer_get(left);
1079                        /* left was locked after cow */
1080                        path->nodes[level] = left;
1081                        path->slots[level + 1] -= 1;
1082                        path->slots[level] = orig_slot;
1083                        if (mid) {
1084                                btrfs_tree_unlock(mid);
1085                                free_extent_buffer(mid);
1086                        }
1087                } else {
1088                        orig_slot -= btrfs_header_nritems(left);
1089                        path->slots[level] = orig_slot;
1090                }
1091        }
1092        /* double check we haven't messed things up */
1093        check_block(root, path, level);
1094        if (orig_ptr !=
1095            btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1096                BUG();
1097enospc:
1098        if (right) {
1099                btrfs_tree_unlock(right);
1100                free_extent_buffer(right);
1101        }
1102        if (left) {
1103                if (path->nodes[level] != left)
1104                        btrfs_tree_unlock(left);
1105                free_extent_buffer(left);
1106        }
1107        return ret;
1108}
1109
1110/* Node balancing for insertion.  Here we only split or push nodes around
1111 * when they are completely full.  This is also done top down, so we
1112 * have to be pessimistic.
1113 */
1114static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1115                                          struct btrfs_root *root,
1116                                          struct btrfs_path *path, int level)
1117{
1118        struct extent_buffer *right = NULL;
1119        struct extent_buffer *mid;
1120        struct extent_buffer *left = NULL;
1121        struct extent_buffer *parent = NULL;
1122        int ret = 0;
1123        int wret;
1124        int pslot;
1125        int orig_slot = path->slots[level];
1126        u64 orig_ptr;
1127
1128        if (level == 0)
1129                return 1;
1130
1131        mid = path->nodes[level];
1132        WARN_ON(btrfs_header_generation(mid) != trans->transid);
1133        orig_ptr = btrfs_node_blockptr(mid, orig_slot);
1134
1135        if (level < BTRFS_MAX_LEVEL - 1)
1136                parent = path->nodes[level + 1];
1137        pslot = path->slots[level + 1];
1138
1139        if (!parent)
1140                return 1;
1141
1142        left = read_node_slot(root, parent, pslot - 1);
1143
1144        /* first, try to make some room in the middle buffer */
1145        if (left) {
1146                u32 left_nr;
1147
1148                btrfs_tree_lock(left);
1149                btrfs_set_lock_blocking(left);
1150
1151                left_nr = btrfs_header_nritems(left);
1152                if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1153                        wret = 1;
1154                } else {
1155                        ret = btrfs_cow_block(trans, root, left, parent,
1156                                              pslot - 1, &left);
1157                        if (ret)
1158                                wret = 1;
1159                        else {
1160                                wret = push_node_left(trans, root,
1161                                                      left, mid, 0);
1162                        }
1163                }
1164                if (wret < 0)
1165                        ret = wret;
1166                if (wret == 0) {
1167                        struct btrfs_disk_key disk_key;
1168                        orig_slot += left_nr;
1169                        btrfs_node_key(mid, &disk_key, 0);
1170                        btrfs_set_node_key(parent, &disk_key, pslot);
1171                        btrfs_mark_buffer_dirty(parent);
1172                        if (btrfs_header_nritems(left) > orig_slot) {
1173                                path->nodes[level] = left;
1174                                path->slots[level + 1] -= 1;
1175                                path->slots[level] = orig_slot;
1176                                btrfs_tree_unlock(mid);
1177                                free_extent_buffer(mid);
1178                        } else {
1179                                orig_slot -=
1180                                        btrfs_header_nritems(left);
1181                                path->slots[level] = orig_slot;
1182                                btrfs_tree_unlock(left);
1183                                free_extent_buffer(left);
1184                        }
1185                        return 0;
1186                }
1187                btrfs_tree_unlock(left);
1188                free_extent_buffer(left);
1189        }
1190        right = read_node_slot(root, parent, pslot + 1);
1191
1192        /*
1193         * then try to empty the right most buffer into the middle
1194         */
1195        if (right) {
1196                u32 right_nr;
1197
1198                btrfs_tree_lock(right);
1199                btrfs_set_lock_blocking(right);
1200
1201                right_nr = btrfs_header_nritems(right);
1202                if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1203                        wret = 1;
1204                } else {
1205                        ret = btrfs_cow_block(trans, root, right,
1206                                              parent, pslot + 1,
1207                                              &right);
1208                        if (ret)
1209                                wret = 1;
1210                        else {
1211                                wret = balance_node_right(trans, root,
1212                                                          right, mid);
1213                        }
1214                }
1215                if (wret < 0)
1216                        ret = wret;
1217                if (wret == 0) {
1218                        struct btrfs_disk_key disk_key;
1219
1220                        btrfs_node_key(right, &disk_key, 0);
1221                        btrfs_set_node_key(parent, &disk_key, pslot + 1);
1222                        btrfs_mark_buffer_dirty(parent);
1223
1224                        if (btrfs_header_nritems(mid) <= orig_slot) {
1225                                path->nodes[level] = right;
1226                                path->slots[level + 1] += 1;
1227                                path->slots[level] = orig_slot -
1228                                        btrfs_header_nritems(mid);
1229                                btrfs_tree_unlock(mid);
1230                                free_extent_buffer(mid);
1231                        } else {
1232                                btrfs_tree_unlock(right);
1233                                free_extent_buffer(right);
1234                        }
1235                        return 0;
1236                }
1237                btrfs_tree_unlock(right);
1238                free_extent_buffer(right);
1239        }
1240        return 1;
1241}
1242
1243/*
1244 * readahead one full node of leaves, finding things that are close
1245 * to the block in 'slot', and triggering ra on them.
1246 */
1247static void reada_for_search(struct btrfs_root *root,
1248                             struct btrfs_path *path,
1249                             int level, int slot, u64 objectid)
1250{
1251        struct extent_buffer *node;
1252        struct btrfs_disk_key disk_key;
1253        u32 nritems;
1254        u64 search;
1255        u64 target;
1256        u64 nread = 0;
1257        int direction = path->reada;
1258        struct extent_buffer *eb;
1259        u32 nr;
1260        u32 blocksize;
1261        u32 nscan = 0;
1262
1263        if (level != 1)
1264                return;
1265
1266        if (!path->nodes[level])
1267                return;
1268
1269        node = path->nodes[level];
1270
1271        search = btrfs_node_blockptr(node, slot);
1272        blocksize = btrfs_level_size(root, level - 1);
1273        eb = btrfs_find_tree_block(root, search, blocksize);
1274        if (eb) {
1275                free_extent_buffer(eb);
1276                return;
1277        }
1278
1279        target = search;
1280
1281        nritems = btrfs_header_nritems(node);
1282        nr = slot;
1283        while (1) {
1284                if (direction < 0) {
1285                        if (nr == 0)
1286                                break;
1287                        nr--;
1288                } else if (direction > 0) {
1289                        nr++;
1290                        if (nr >= nritems)
1291                                break;
1292                }
1293                if (path->reada < 0 && objectid) {
1294                        btrfs_node_key(node, &disk_key, nr);
1295                        if (btrfs_disk_key_objectid(&disk_key) != objectid)
1296                                break;
1297                }
1298                search = btrfs_node_blockptr(node, nr);
1299                if ((search <= target && target - search <= 65536) ||
1300                    (search > target && search - target <= 65536)) {
1301                        readahead_tree_block(root, search, blocksize,
1302                                     btrfs_node_ptr_generation(node, nr));
1303                        nread += blocksize;
1304                }
1305                nscan++;
1306                if ((nread > 65536 || nscan > 32))
1307                        break;
1308        }
1309}
1310
1311/*
1312 * returns -EAGAIN if it had to drop the path, or zero if everything was in
1313 * cache
1314 */
1315static noinline int reada_for_balance(struct btrfs_root *root,
1316                                      struct btrfs_path *path, int level)
1317{
1318        int slot;
1319        int nritems;
1320        struct extent_buffer *parent;
1321        struct extent_buffer *eb;
1322        u64 gen;
1323        u64 block1 = 0;
1324        u64 block2 = 0;
1325        int ret = 0;
1326        int blocksize;
1327
1328        parent = path->nodes[level + 1];
1329        if (!parent)
1330                return 0;
1331
1332        nritems = btrfs_header_nritems(parent);
1333        slot = path->slots[level + 1];
1334        blocksize = btrfs_level_size(root, level);
1335
1336        if (slot > 0) {
1337                block1 = btrfs_node_blockptr(parent, slot - 1);
1338                gen = btrfs_node_ptr_generation(parent, slot - 1);
1339                eb = btrfs_find_tree_block(root, block1, blocksize);
1340                if (eb && btrfs_buffer_uptodate(eb, gen))
1341                        block1 = 0;
1342                free_extent_buffer(eb);
1343        }
1344        if (slot + 1 < nritems) {
1345                block2 = btrfs_node_blockptr(parent, slot + 1);
1346                gen = btrfs_node_ptr_generation(parent, slot + 1);
1347                eb = btrfs_find_tree_block(root, block2, blocksize);
1348                if (eb && btrfs_buffer_uptodate(eb, gen))
1349                        block2 = 0;
1350                free_extent_buffer(eb);
1351        }
1352        if (block1 || block2) {
1353                ret = -EAGAIN;
1354
1355                /* release the whole path */
1356                btrfs_release_path(root, path);
1357
1358                /* read the blocks */
1359                if (block1)
1360                        readahead_tree_block(root, block1, blocksize, 0);
1361                if (block2)
1362                        readahead_tree_block(root, block2, blocksize, 0);
1363
1364                if (block1) {
1365                        eb = read_tree_block(root, block1, blocksize, 0);
1366                        free_extent_buffer(eb);
1367                }
1368                if (block2) {
1369                        eb = read_tree_block(root, block2, blocksize, 0);
1370                        free_extent_buffer(eb);
1371                }
1372        }
1373        return ret;
1374}
1375
1376
1377/*
1378 * when we walk down the tree, it is usually safe to unlock the higher layers
1379 * in the tree.  The exceptions are when our path goes through slot 0, because
1380 * operations on the tree might require changing key pointers higher up in the
1381 * tree.
1382 *
1383 * callers might also have set path->keep_locks, which tells this code to keep
1384 * the lock if the path points to the last slot in the block.  This is part of
1385 * walking through the tree, and selecting the next slot in the higher block.
1386 *
1387 * lowest_unlock sets the lowest level in the tree we're allowed to unlock.  so
1388 * if lowest_unlock is 1, level 0 won't be unlocked
1389 */
1390static noinline void unlock_up(struct btrfs_path *path, int level,
1391                               int lowest_unlock)
1392{
1393        int i;
1394        int skip_level = level;
1395        int no_skips = 0;
1396        struct extent_buffer *t;
1397
1398        for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1399                if (!path->nodes[i])
1400                        break;
1401                if (!path->locks[i])
1402                        break;
1403                if (!no_skips && path->slots[i] == 0) {
1404                        skip_level = i + 1;
1405                        continue;
1406                }
1407                if (!no_skips && path->keep_locks) {
1408                        u32 nritems;
1409                        t = path->nodes[i];
1410                        nritems = btrfs_header_nritems(t);
1411                        if (nritems < 1 || path->slots[i] >= nritems - 1) {
1412                                skip_level = i + 1;
1413                                continue;
1414                        }
1415                }
1416                if (skip_level < i && i >= lowest_unlock)
1417                        no_skips = 1;
1418
1419                t = path->nodes[i];
1420                if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1421                        btrfs_tree_unlock(t);
1422                        path->locks[i] = 0;
1423                }
1424        }
1425}
1426
1427/*
1428 * This releases any locks held in the path starting at level and
1429 * going all the way up to the root.
1430 *
1431 * btrfs_search_slot will keep the lock held on higher nodes in a few
1432 * corner cases, such as COW of the block at slot zero in the node.  This
1433 * ignores those rules, and it should only be called when there are no
1434 * more updates to be done higher up in the tree.
1435 */
1436noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1437{
1438        int i;
1439
1440        if (path->keep_locks || path->lowest_level)
1441                return;
1442
1443        for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1444                if (!path->nodes[i])
1445                        continue;
1446                if (!path->locks[i])
1447                        continue;
1448                btrfs_tree_unlock(path->nodes[i]);
1449                path->locks[i] = 0;
1450        }
1451}
1452
1453/*
1454 * helper function for btrfs_search_slot.  The goal is to find a block
1455 * in cache without setting the path to blocking.  If we find the block
1456 * we return zero and the path is unchanged.
1457 *
1458 * If we can't find the block, we set the path blocking and do some
1459 * reada.  -EAGAIN is returned and the search must be repeated.
1460 */
1461static int
1462read_block_for_search(struct btrfs_trans_handle *trans,
1463                       struct btrfs_root *root, struct btrfs_path *p,
1464                       struct extent_buffer **eb_ret, int level, int slot,
1465                       struct btrfs_key *key)
1466{
1467        u64 blocknr;
1468        u64 gen;
1469        u32 blocksize;
1470        struct extent_buffer *b = *eb_ret;
1471        struct extent_buffer *tmp;
1472        int ret;
1473
1474        blocknr = btrfs_node_blockptr(b, slot);
1475        gen = btrfs_node_ptr_generation(b, slot);
1476        blocksize = btrfs_level_size(root, level - 1);
1477
1478        tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1479        if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1480                /*
1481                 * we found an up to date block without sleeping, return
1482                 * right away
1483                 */
1484                *eb_ret = tmp;
1485                return 0;
1486        }
1487
1488        /*
1489         * reduce lock contention at high levels
1490         * of the btree by dropping locks before
1491         * we read.  Don't release the lock on the current
1492         * level because we need to walk this node to figure
1493         * out which blocks to read.
1494         */
1495        btrfs_unlock_up_safe(p, level + 1);
1496        btrfs_set_path_blocking(p);
1497
1498        if (tmp)
1499                free_extent_buffer(tmp);
1500        if (p->reada)
1501                reada_for_search(root, p, level, slot, key->objectid);
1502
1503        btrfs_release_path(NULL, p);
1504
1505        ret = -EAGAIN;
1506        tmp = read_tree_block(root, blocknr, blocksize, gen);
1507        if (tmp) {
1508                /*
1509                 * If the read above didn't mark this buffer up to date,
1510                 * it will never end up being up to date.  Set ret to EIO now
1511                 * and give up so that our caller doesn't loop forever
1512                 * on our EAGAINs.
1513                 */
1514                if (!btrfs_buffer_uptodate(tmp, 0))
1515                        ret = -EIO;
1516                free_extent_buffer(tmp);
1517        }
1518        return ret;
1519}
1520
1521/*
1522 * helper function for btrfs_search_slot.  This does all of the checks
1523 * for node-level blocks and does any balancing required based on
1524 * the ins_len.
1525 *
1526 * If no extra work was required, zero is returned.  If we had to
1527 * drop the path, -EAGAIN is returned and btrfs_search_slot must
1528 * start over
1529 */
1530static int
1531setup_nodes_for_search(struct btrfs_trans_handle *trans,
1532                       struct btrfs_root *root, struct btrfs_path *p,
1533                       struct extent_buffer *b, int level, int ins_len)
1534{
1535        int ret;
1536        if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
1537            BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1538                int sret;
1539
1540                sret = reada_for_balance(root, p, level);
1541                if (sret)
1542                        goto again;
1543
1544                btrfs_set_path_blocking(p);
1545                sret = split_node(trans, root, p, level);
1546                btrfs_clear_path_blocking(p, NULL);
1547
1548                BUG_ON(sret > 0);
1549                if (sret) {
1550                        ret = sret;
1551                        goto done;
1552                }
1553                b = p->nodes[level];
1554        } else if (ins_len < 0 && btrfs_header_nritems(b) <
1555                   BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
1556                int sret;
1557
1558                sret = reada_for_balance(root, p, level);
1559                if (sret)
1560                        goto again;
1561
1562                btrfs_set_path_blocking(p);
1563                sret = balance_level(trans, root, p, level);
1564                btrfs_clear_path_blocking(p, NULL);
1565
1566                if (sret) {
1567                        ret = sret;
1568                        goto done;
1569                }
1570                b = p->nodes[level];
1571                if (!b) {
1572                        btrfs_release_path(NULL, p);
1573                        goto again;
1574                }
1575                BUG_ON(btrfs_header_nritems(b) == 1);
1576        }
1577        return 0;
1578
1579again:
1580        ret = -EAGAIN;
1581done:
1582        return ret;
1583}
1584
1585/*
1586 * look for key in the tree.  path is filled in with nodes along the way
1587 * if key is found, we return zero and you can find the item in the leaf
1588 * level of the path (level 0)
1589 *
1590 * If the key isn't found, the path points to the slot where it should
1591 * be inserted, and 1 is returned.  If there are other errors during the
1592 * search a negative error number is returned.
1593 *
1594 * if ins_len > 0, nodes and leaves will be split as we walk down the
1595 * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1596 * possible)
1597 */
1598int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1599                      *root, struct btrfs_key *key, struct btrfs_path *p, int
1600                      ins_len, int cow)
1601{
1602        struct extent_buffer *b;
1603        int slot;
1604        int ret;
1605        int level;
1606        int lowest_unlock = 1;
1607        u8 lowest_level = 0;
1608
1609        lowest_level = p->lowest_level;
1610        WARN_ON(lowest_level && ins_len > 0);
1611        WARN_ON(p->nodes[0] != NULL);
1612
1613        if (ins_len < 0)
1614                lowest_unlock = 2;
1615
1616again:
1617        if (p->skip_locking)
1618                b = btrfs_root_node(root);
1619        else
1620                b = btrfs_lock_root_node(root);
1621
1622        while (b) {
1623                level = btrfs_header_level(b);
1624
1625                /*
1626                 * setup the path here so we can release it under lock
1627                 * contention with the cow code
1628                 */
1629                p->nodes[level] = b;
1630                if (!p->skip_locking)
1631                        p->locks[level] = 1;
1632
1633                if (cow) {
1634                        int wret;
1635
1636                        /*
1637                         * if we don't really need to cow this block
1638                         * then we don't want to set the path blocking,
1639                         * so we test it here
1640                         */
1641                        if (btrfs_header_generation(b) == trans->transid &&
1642                            btrfs_header_owner(b) == root->root_key.objectid &&
1643                            !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
1644                                goto cow_done;
1645                        }
1646                        btrfs_set_path_blocking(p);
1647
1648                        wret = btrfs_cow_block(trans, root, b,
1649                                               p->nodes[level + 1],
1650                                               p->slots[level + 1], &b);
1651                        if (wret) {
1652                                free_extent_buffer(b);
1653                                ret = wret;
1654                                goto done;
1655                        }
1656                }
1657cow_done:
1658                BUG_ON(!cow && ins_len);
1659                if (level != btrfs_header_level(b))
1660                        WARN_ON(1);
1661                level = btrfs_header_level(b);
1662
1663                p->nodes[level] = b;
1664                if (!p->skip_locking)
1665                        p->locks[level] = 1;
1666
1667                btrfs_clear_path_blocking(p, NULL);
1668
1669                /*
1670                 * we have a lock on b and as long as we aren't changing
1671                 * the tree, there is no way to for the items in b to change.
1672                 * It is safe to drop the lock on our parent before we
1673                 * go through the expensive btree search on b.
1674                 *
1675                 * If cow is true, then we might be changing slot zero,
1676                 * which may require changing the parent.  So, we can't
1677                 * drop the lock until after we know which slot we're
1678                 * operating on.
1679                 */
1680                if (!cow)
1681                        btrfs_unlock_up_safe(p, level + 1);
1682
1683                ret = check_block(root, p, level);
1684                if (ret) {
1685                        ret = -1;
1686                        goto done;
1687                }
1688
1689                ret = bin_search(b, key, level, &slot);
1690
1691                if (level != 0) {
1692                        if (ret && slot > 0)
1693                                slot -= 1;
1694                        p->slots[level] = slot;
1695                        ret = setup_nodes_for_search(trans, root, p, b, level,
1696                                                     ins_len);
1697                        if (ret == -EAGAIN)
1698                                goto again;
1699                        else if (ret)
1700                                goto done;
1701                        b = p->nodes[level];
1702                        slot = p->slots[level];
1703
1704                        unlock_up(p, level, lowest_unlock);
1705
1706                        /* this is only true while dropping a snapshot */
1707                        if (level == lowest_level) {
1708                                ret = 0;
1709                                goto done;
1710                        }
1711
1712                        ret = read_block_for_search(trans, root, p,
1713                                                    &b, level, slot, key);
1714                        if (ret == -EAGAIN)
1715                                goto again;
1716
1717                        if (ret == -EIO)
1718                                goto done;
1719
1720                        if (!p->skip_locking) {
1721                                int lret;
1722
1723                                btrfs_clear_path_blocking(p, NULL);
1724                                lret = btrfs_try_spin_lock(b);
1725
1726                                if (!lret) {
1727                                        btrfs_set_path_blocking(p);
1728                                        btrfs_tree_lock(b);
1729                                        btrfs_clear_path_blocking(p, b);
1730                                }
1731                        }
1732                } else {
1733                        p->slots[level] = slot;
1734                        if (ins_len > 0 &&
1735                            btrfs_leaf_free_space(root, b) < ins_len) {
1736                                int sret;
1737
1738                                btrfs_set_path_blocking(p);
1739                                sret = split_leaf(trans, root, key,
1740                                                      p, ins_len, ret == 0);
1741                                btrfs_clear_path_blocking(p, NULL);
1742
1743                                BUG_ON(sret > 0);
1744                                if (sret) {
1745                                        ret = sret;
1746                                        goto done;
1747                                }
1748                        }
1749                        if (!p->search_for_split)
1750                                unlock_up(p, level, lowest_unlock);
1751                        goto done;
1752                }
1753        }
1754        ret = 1;
1755done:
1756        /*
1757         * we don't really know what they plan on doing with the path
1758         * from here on, so for now just mark it as blocking
1759         */
1760        if (!p->leave_spinning)
1761                btrfs_set_path_blocking(p);
1762        if (ret < 0)
1763                btrfs_release_path(root, p);
1764        return ret;
1765}
1766
1767int btrfs_merge_path(struct btrfs_trans_handle *trans,
1768                     struct btrfs_root *root,
1769                     struct btrfs_key *node_keys,
1770                     u64 *nodes, int lowest_level)
1771{
1772        struct extent_buffer *eb;
1773        struct extent_buffer *parent;
1774        struct btrfs_key key;
1775        u64 bytenr;
1776        u64 generation;
1777        u32 blocksize;
1778        int level;
1779        int slot;
1780        int key_match;
1781        int ret;
1782
1783        eb = btrfs_lock_root_node(root);
1784        ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb);
1785        BUG_ON(ret);
1786
1787        btrfs_set_lock_blocking(eb);
1788
1789        parent = eb;
1790        while (1) {
1791                level = btrfs_header_level(parent);
1792                if (level == 0 || level <= lowest_level)
1793                        break;
1794
1795                ret = bin_search(parent, &node_keys[lowest_level], level,
1796                                 &slot);
1797                if (ret && slot > 0)
1798                        slot--;
1799
1800                bytenr = btrfs_node_blockptr(parent, slot);
1801                if (nodes[level - 1] == bytenr)
1802                        break;
1803
1804                blocksize = btrfs_level_size(root, level - 1);
1805                generation = btrfs_node_ptr_generation(parent, slot);
1806                btrfs_node_key_to_cpu(eb, &key, slot);
1807                key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1808
1809                if (generation == trans->transid) {
1810                        eb = read_tree_block(root, bytenr, blocksize,
1811                                             generation);
1812                        btrfs_tree_lock(eb);
1813                        btrfs_set_lock_blocking(eb);
1814                }
1815
1816                /*
1817                 * if node keys match and node pointer hasn't been modified
1818                 * in the running transaction, we can merge the path. for
1819                 * blocks owened by reloc trees, the node pointer check is
1820                 * skipped, this is because these blocks are fully controlled
1821                 * by the space balance code, no one else can modify them.
1822                 */
1823                if (!nodes[level - 1] || !key_match ||
1824                    (generation == trans->transid &&
1825                     btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
1826                        if (level == 1 || level == lowest_level + 1) {
1827                                if (generation == trans->transid) {
1828                                        btrfs_tree_unlock(eb);
1829                                        free_extent_buffer(eb);
1830                                }
1831                                break;
1832                        }
1833
1834                        if (generation != trans->transid) {
1835                                eb = read_tree_block(root, bytenr, blocksize,
1836                                                generation);
1837                                btrfs_tree_lock(eb);
1838                                btrfs_set_lock_blocking(eb);
1839                        }
1840
1841                        ret = btrfs_cow_block(trans, root, eb, parent, slot,
1842                                              &eb);
1843                        BUG_ON(ret);
1844
1845                        if (root->root_key.objectid ==
1846                            BTRFS_TREE_RELOC_OBJECTID) {
1847                                if (!nodes[level - 1]) {
1848                                        nodes[level - 1] = eb->start;
1849                                        memcpy(&node_keys[level - 1], &key,
1850                                               sizeof(node_keys[0]));
1851                                } else {
1852                                        WARN_ON(1);
1853                                }
1854                        }
1855
1856                        btrfs_tree_unlock(parent);
1857                        free_extent_buffer(parent);
1858                        parent = eb;
1859                        continue;
1860                }
1861
1862                btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1863                btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1864                btrfs_mark_buffer_dirty(parent);
1865
1866                ret = btrfs_inc_extent_ref(trans, root,
1867                                        nodes[level - 1],
1868                                        blocksize, parent->start,
1869                                        btrfs_header_owner(parent),
1870                                        btrfs_header_generation(parent),
1871                                        level - 1);
1872                BUG_ON(ret);
1873
1874                /*
1875                 * If the block was created in the running transaction,
1876                 * it's possible this is the last reference to it, so we
1877                 * should drop the subtree.
1878                 */
1879                if (generation == trans->transid) {
1880                        ret = btrfs_drop_subtree(trans, root, eb, parent);
1881                        BUG_ON(ret);
1882                        btrfs_tree_unlock(eb);
1883                        free_extent_buffer(eb);
1884                } else {
1885                        ret = btrfs_free_extent(trans, root, bytenr,
1886                                        blocksize, parent->start,
1887                                        btrfs_header_owner(parent),
1888                                        btrfs_header_generation(parent),
1889                                        level - 1, 1);
1890                        BUG_ON(ret);
1891                }
1892                break;
1893        }
1894        btrfs_tree_unlock(parent);
1895        free_extent_buffer(parent);
1896        return 0;
1897}
1898
1899/*
1900 * adjust the pointers going up the tree, starting at level
1901 * making sure the right key of each node is points to 'key'.
1902 * This is used after shifting pointers to the left, so it stops
1903 * fixing up pointers when a given leaf/node is not in slot 0 of the
1904 * higher levels
1905 *
1906 * If this fails to write a tree block, it returns -1, but continues
1907 * fixing up the blocks in ram so the tree is consistent.
1908 */
1909static int fixup_low_keys(struct btrfs_trans_handle *trans,
1910                          struct btrfs_root *root, struct btrfs_path *path,
1911                          struct btrfs_disk_key *key, int level)
1912{
1913        int i;
1914        int ret = 0;
1915        struct extent_buffer *t;
1916
1917        for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1918                int tslot = path->slots[i];
1919                if (!path->nodes[i])
1920                        break;
1921                t = path->nodes[i];
1922                btrfs_set_node_key(t, key, tslot);
1923                btrfs_mark_buffer_dirty(path->nodes[i]);
1924                if (tslot != 0)
1925                        break;
1926        }
1927        return ret;
1928}
1929
1930/*
1931 * update item key.
1932 *
1933 * This function isn't completely safe. It's the caller's responsibility
1934 * that the new key won't break the order
1935 */
1936int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1937                            struct btrfs_root *root, struct btrfs_path *path,
1938                            struct btrfs_key *new_key)
1939{
1940        struct btrfs_disk_key disk_key;
1941        struct extent_buffer *eb;
1942        int slot;
1943
1944        eb = path->nodes[0];
1945        slot = path->slots[0];
1946        if (slot > 0) {
1947                btrfs_item_key(eb, &disk_key, slot - 1);
1948                if (comp_keys(&disk_key, new_key) >= 0)
1949                        return -1;
1950        }
1951        if (slot < btrfs_header_nritems(eb) - 1) {
1952                btrfs_item_key(eb, &disk_key, slot + 1);
1953                if (comp_keys(&disk_key, new_key) <= 0)
1954                        return -1;
1955        }
1956
1957        btrfs_cpu_key_to_disk(&disk_key, new_key);
1958        btrfs_set_item_key(eb, &disk_key, slot);
1959        btrfs_mark_buffer_dirty(eb);
1960        if (slot == 0)
1961                fixup_low_keys(trans, root, path, &disk_key, 1);
1962        return 0;
1963}
1964
1965/*
1966 * try to push data from one node into the next node left in the
1967 * tree.
1968 *
1969 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1970 * error, and > 0 if there was no room in the left hand block.
1971 */
1972static int push_node_left(struct btrfs_trans_handle *trans,
1973                          struct btrfs_root *root, struct extent_buffer *dst,
1974                          struct extent_buffer *src, int empty)
1975{
1976        int push_items = 0;
1977        int src_nritems;
1978        int dst_nritems;
1979        int ret = 0;
1980
1981        src_nritems = btrfs_header_nritems(src);
1982        dst_nritems = btrfs_header_nritems(dst);
1983        push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1984        WARN_ON(btrfs_header_generation(src) != trans->transid);
1985        WARN_ON(btrfs_header_generation(dst) != trans->transid);
1986
1987        if (!empty && src_nritems <= 8)
1988                return 1;
1989
1990        if (push_items <= 0)
1991                return 1;
1992
1993        if (empty) {
1994                push_items = min(src_nritems, push_items);
1995                if (push_items < src_nritems) {
1996                        /* leave at least 8 pointers in the node if
1997                         * we aren't going to empty it
1998                         */
1999                        if (src_nritems - push_items < 8) {
2000                                if (push_items <= 8)
2001                                        return 1;
2002                                push_items -= 8;
2003                        }
2004                }
2005        } else
2006                push_items = min(src_nritems - 8, push_items);
2007
2008        copy_extent_buffer(dst, src,
2009                           btrfs_node_key_ptr_offset(dst_nritems),
2010                           btrfs_node_key_ptr_offset(0),
2011                           push_items * sizeof(struct btrfs_key_ptr));
2012
2013        if (push_items < src_nritems) {
2014                memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
2015                                      btrfs_node_key_ptr_offset(push_items),
2016                                      (src_nritems - push_items) *
2017                                      sizeof(struct btrfs_key_ptr));
2018        }
2019        btrfs_set_header_nritems(src, src_nritems - push_items);
2020        btrfs_set_header_nritems(dst, dst_nritems + push_items);
2021        btrfs_mark_buffer_dirty(src);
2022        btrfs_mark_buffer_dirty(dst);
2023
2024        ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
2025        BUG_ON(ret);
2026
2027        return ret;
2028}
2029
2030/*
2031 * try to push data from one node into the next node right in the
2032 * tree.
2033 *
2034 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
2035 * error, and > 0 if there was no room in the right hand block.
2036 *
2037 * this will  only push up to 1/2 the contents of the left node over
2038 */
2039static int balance_node_right(struct btrfs_trans_handle *trans,
2040                              struct btrfs_root *root,
2041                              struct extent_buffer *dst,
2042                              struct extent_buffer *src)
2043{
2044        int push_items = 0;
2045        int max_push;
2046        int src_nritems;
2047        int dst_nritems;
2048        int ret = 0;
2049
2050        WARN_ON(btrfs_header_generation(src) != trans->transid);
2051        WARN_ON(btrfs_header_generation(dst) != trans->transid);
2052
2053        src_nritems = btrfs_header_nritems(src);
2054        dst_nritems = btrfs_header_nritems(dst);
2055        push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
2056        if (push_items <= 0)
2057                return 1;
2058
2059        if (src_nritems < 4)
2060                return 1;
2061
2062        max_push = src_nritems / 2 + 1;
2063        /* don't try to empty the node */
2064        if (max_push >= src_nritems)
2065                return 1;
2066
2067        if (max_push < push_items)
2068                push_items = max_push;
2069
2070        memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
2071                                      btrfs_node_key_ptr_offset(0),
2072                                      (dst_nritems) *
2073                                      sizeof(struct btrfs_key_ptr));
2074
2075        copy_extent_buffer(dst, src,
2076                           btrfs_node_key_ptr_offset(0),
2077                           btrfs_node_key_ptr_offset(src_nritems - push_items),
2078                           push_items * sizeof(struct btrfs_key_ptr));
2079
2080        btrfs_set_header_nritems(src, src_nritems - push_items);
2081        btrfs_set_header_nritems(dst, dst_nritems + push_items);
2082
2083        btrfs_mark_buffer_dirty(src);
2084        btrfs_mark_buffer_dirty(dst);
2085
2086        ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
2087        BUG_ON(ret);
2088
2089        return ret;
2090}
2091
2092/*
2093 * helper function to insert a new root level in the tree.
2094 * A new node is allocated, and a single item is inserted to
2095 * point to the existing root
2096 *
2097 * returns zero on success or < 0 on failure.
2098 */
2099static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2100                           struct btrfs_root *root,
2101                           struct btrfs_path *path, int level)
2102{
2103        u64 lower_gen;
2104        struct extent_buffer *lower;
2105        struct extent_buffer *c;
2106        struct extent_buffer *old;
2107        struct btrfs_disk_key lower_key;
2108        int ret;
2109
2110        BUG_ON(path->nodes[level]);
2111        BUG_ON(path->nodes[level-1] != root->node);
2112
2113        lower = path->nodes[level-1];
2114        if (level == 1)
2115                btrfs_item_key(lower, &lower_key, 0);
2116        else
2117                btrfs_node_key(lower, &lower_key, 0);
2118
2119        c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2120                                   root->root_key.objectid, trans->transid,
2121                                   level, root->node->start, 0);
2122        if (IS_ERR(c))
2123                return PTR_ERR(c);
2124
2125        memset_extent_buffer(c, 0, 0, root->nodesize);
2126        btrfs_set_header_nritems(c, 1);
2127        btrfs_set_header_level(c, level);
2128        btrfs_set_header_bytenr(c, c->start);
2129        btrfs_set_header_generation(c, trans->transid);
2130        btrfs_set_header_owner(c, root->root_key.objectid);
2131
2132        write_extent_buffer(c, root->fs_info->fsid,
2133                            (unsigned long)btrfs_header_fsid(c),
2134                            BTRFS_FSID_SIZE);
2135
2136        write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
2137                            (unsigned long)btrfs_header_chunk_tree_uuid(c),
2138                            BTRFS_UUID_SIZE);
2139
2140        btrfs_set_node_key(c, &lower_key, 0);
2141        btrfs_set_node_blockptr(c, 0, lower->start);
2142        lower_gen = btrfs_header_generation(lower);
2143        WARN_ON(lower_gen != trans->transid);
2144
2145        btrfs_set_node_ptr_generation(c, 0, lower_gen);
2146
2147        btrfs_mark_buffer_dirty(c);
2148
2149        spin_lock(&root->node_lock);
2150        old = root->node;
2151        root->node = c;
2152        spin_unlock(&root->node_lock);
2153
2154        ret = btrfs_update_extent_ref(trans, root, lower->start,
2155                                      lower->len, lower->start, c->start,
2156                                      root->root_key.objectid,
2157                                      trans->transid, level - 1);
2158        BUG_ON(ret);
2159
2160        /* the super has an extra ref to root->node */
2161        free_extent_buffer(old);
2162
2163        add_root_to_dirty_list(root);
2164        extent_buffer_get(c);
2165        path->nodes[level] = c;
2166        path->locks[level] = 1;
2167        path->slots[level] = 0;
2168        return 0;
2169}
2170
2171/*
2172 * worker function to insert a single pointer in a node.
2173 * the node should have enough room for the pointer already
2174 *
2175 * slot and level indicate where you want the key to go, and
2176 * blocknr is the block the key points to.
2177 *
2178 * returns zero on success and < 0 on any error
2179 */
2180static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
2181                      *root, struct btrfs_path *path, struct btrfs_disk_key
2182                      *key, u64 bytenr, int slot, int level)
2183{
2184        struct extent_buffer *lower;
2185        int nritems;
2186
2187        BUG_ON(!path->nodes[level]);
2188        lower = path->nodes[level];
2189        nritems = btrfs_header_nritems(lower);
2190        BUG_ON(slot > nritems);
2191        if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
2192                BUG();
2193        if (slot != nritems) {
2194                memmove_extent_buffer(lower,
2195                              btrfs_node_key_ptr_offset(slot + 1),
2196                              btrfs_node_key_ptr_offset(slot),
2197                              (nritems - slot) * sizeof(struct btrfs_key_ptr));
2198        }
2199        btrfs_set_node_key(lower, key, slot);
2200        btrfs_set_node_blockptr(lower, slot, bytenr);
2201        WARN_ON(trans->transid == 0);
2202        btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2203        btrfs_set_header_nritems(lower, nritems + 1);
2204        btrfs_mark_buffer_dirty(lower);
2205        return 0;
2206}
2207
2208/*
2209 * split the node at the specified level in path in two.
2210 * The path is corrected to point to the appropriate node after the split
2211 *
2212 * Before splitting this tries to make some room in the node by pushing
2213 * left and right, if either one works, it returns right away.
2214 *
2215 * returns 0 on success and < 0 on failure
2216 */
2217static noinline int split_node(struct btrfs_trans_handle *trans,
2218                               struct btrfs_root *root,
2219                               struct btrfs_path *path, int level)
2220{
2221        struct extent_buffer *c;
2222        struct extent_buffer *split;
2223        struct btrfs_disk_key disk_key;
2224        int mid;
2225        int ret;
2226        int wret;
2227        u32 c_nritems;
2228
2229        c = path->nodes[level];
2230        WARN_ON(btrfs_header_generation(c) != trans->transid);
2231        if (c == root->node) {
2232                /* trying to split the root, lets make a new one */
2233                ret = insert_new_root(trans, root, path, level + 1);
2234                if (ret)
2235                        return ret;
2236        } else if (!trans->transaction->delayed_refs.flushing) {
2237                ret = push_nodes_for_insert(trans, root, path, level);
2238                c = path->nodes[level];
2239                if (!ret && btrfs_header_nritems(c) <
2240                    BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
2241                        return 0;
2242                if (ret < 0)
2243                        return ret;
2244        }
2245
2246        c_nritems = btrfs_header_nritems(c);
2247
2248        split = btrfs_alloc_free_block(trans, root, root->nodesize,
2249                                        path->nodes[level + 1]->start,
2250                                        root->root_key.objectid,
2251                                        trans->transid, level, c->start, 0);
2252        if (IS_ERR(split))
2253                return PTR_ERR(split);
2254
2255        btrfs_set_header_flags(split, btrfs_header_flags(c));
2256        btrfs_set_header_level(split, btrfs_header_level(c));
2257        btrfs_set_header_bytenr(split, split->start);
2258        btrfs_set_header_generation(split, trans->transid);
2259        btrfs_set_header_owner(split, root->root_key.objectid);
2260        btrfs_set_header_flags(split, 0);
2261        write_extent_buffer(split, root->fs_info->fsid,
2262                            (unsigned long)btrfs_header_fsid(split),
2263                            BTRFS_FSID_SIZE);
2264        write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
2265                            (unsigned long)btrfs_header_chunk_tree_uuid(split),
2266                            BTRFS_UUID_SIZE);
2267
2268        mid = (c_nritems + 1) / 2;
2269
2270        copy_extent_buffer(split, c,
2271                           btrfs_node_key_ptr_offset(0),
2272                           btrfs_node_key_ptr_offset(mid),
2273                           (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2274        btrfs_set_header_nritems(split, c_nritems - mid);
2275        btrfs_set_header_nritems(c, mid);
2276        ret = 0;
2277
2278        btrfs_mark_buffer_dirty(c);
2279        btrfs_mark_buffer_dirty(split);
2280
2281        btrfs_node_key(split, &disk_key, 0);
2282        wret = insert_ptr(trans, root, path, &disk_key, split->start,
2283                          path->slots[level + 1] + 1,
2284                          level + 1);
2285        if (wret)
2286                ret = wret;
2287
2288        ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
2289        BUG_ON(ret);
2290
2291        if (path->slots[level] >= mid) {
2292                path->slots[level] -= mid;
2293                btrfs_tree_unlock(c);
2294                free_extent_buffer(c);
2295                path->nodes[level] = split;
2296                path->slots[level + 1] += 1;
2297        } else {
2298                btrfs_tree_unlock(split);
2299                free_extent_buffer(split);
2300        }
2301        return ret;
2302}
2303
2304/*
2305 * how many bytes are required to store the items in a leaf.  start
2306 * and nr indicate which items in the leaf to check.  This totals up the
2307 * space used both by the item structs and the item data
2308 */
2309static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2310{
2311        int data_len;
2312        int nritems = btrfs_header_nritems(l);
2313        int end = min(nritems, start + nr) - 1;
2314
2315        if (!nr)
2316                return 0;
2317        data_len = btrfs_item_end_nr(l, start);
2318        data_len = data_len - btrfs_item_offset_nr(l, end);
2319        data_len += sizeof(struct btrfs_item) * nr;
2320        WARN_ON(data_len < 0);
2321        return data_len;
2322}
2323
2324/*
2325 * The space between the end of the leaf items and
2326 * the start of the leaf data.  IOW, how much room
2327 * the leaf has left for both items and data
2328 */
2329noinline int btrfs_leaf_free_space(struct btrfs_root *root,
2330                                   struct extent_buffer *leaf)
2331{
2332        int nritems = btrfs_header_nritems(leaf);
2333        int ret;
2334        ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
2335        if (ret < 0) {
2336                printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
2337                       "used %d nritems %d\n",
2338                       ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
2339                       leaf_space_used(leaf, 0, nritems), nritems);
2340        }
2341        return ret;
2342}
2343
2344static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2345                                      struct btrfs_root *root,
2346                                      struct btrfs_path *path,
2347                                      int data_size, int empty,
2348                                      struct extent_buffer *right,
2349                                      int free_space, u32 left_nritems)
2350{
2351        struct extent_buffer *left = path->nodes[0];
2352        struct extent_buffer *upper = path->nodes[1];
2353        struct btrfs_disk_key disk_key;
2354        int slot;
2355        u32 i;
2356        int push_space = 0;
2357        int push_items = 0;
2358        struct btrfs_item *item;
2359        u32 nr;
2360        u32 right_nritems;
2361        u32 data_end;
2362        u32 this_item_size;
2363        int ret;
2364
2365        if (empty)
2366                nr = 0;
2367        else
2368                nr = 1;
2369
2370        if (path->slots[0] >= left_nritems)
2371                push_space += data_size;
2372
2373        slot = path->slots[1];
2374        i = left_nritems - 1;
2375        while (i >= nr) {
2376                item = btrfs_item_nr(left, i);
2377
2378                if (!empty && push_items > 0) {
2379                        if (path->slots[0] > i)
2380                                break;
2381                        if (path->slots[0] == i) {
2382                                int space = btrfs_leaf_free_space(root, left);
2383                                if (space + push_space * 2 > free_space)
2384                                        break;
2385                        }
2386                }
2387
2388                if (path->slots[0] == i)
2389                        push_space += data_size;
2390
2391                if (!left->map_token) {
2392                        map_extent_buffer(left, (unsigned long)item,
2393                                        sizeof(struct btrfs_item),
2394                                        &left->map_token, &left->kaddr,
2395                                        &left->map_start, &left->map_len,
2396                                        KM_USER1);
2397                }
2398
2399                this_item_size = btrfs_item_size(left, item);
2400                if (this_item_size + sizeof(*item) + push_space > free_space)
2401                        break;
2402
2403                push_items++;
2404                push_space += this_item_size + sizeof(*item);
2405                if (i == 0)
2406                        break;
2407                i--;
2408        }
2409        if (left->map_token) {
2410                unmap_extent_buffer(left, left->map_token, KM_USER1);
2411                left->map_token = NULL;
2412        }
2413
2414        if (push_items == 0)
2415                goto out_unlock;
2416
2417        if (!empty && push_items == left_nritems)
2418                WARN_ON(1);
2419
2420        /* push left to right */
2421        right_nritems = btrfs_header_nritems(right);
2422
2423        push_space = btrfs_item_end_nr(left, left_nritems - push_items);
2424        push_space -= leaf_data_end(root, left);
2425
2426        /* make room in the right data area */
2427        data_end = leaf_data_end(root, right);
2428        memmove_extent_buffer(right,
2429                              btrfs_leaf_data(right) + data_end - push_space,
2430                              btrfs_leaf_data(right) + data_end,
2431                              BTRFS_LEAF_DATA_SIZE(root) - data_end);
2432
2433        /* copy from the left data area */
2434        copy_extent_buffer(right, left, btrfs_leaf_data(right) +
2435                     BTRFS_LEAF_DATA_SIZE(root) - push_space,
2436                     btrfs_leaf_data(left) + leaf_data_end(root, left),
2437                     push_space);
2438
2439        memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2440                              btrfs_item_nr_offset(0),
2441                              right_nritems * sizeof(struct btrfs_item));
2442
2443        /* copy the items from left to right */
2444        copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2445                   btrfs_item_nr_offset(left_nritems - push_items),
2446                   push_items * sizeof(struct btrfs_item));
2447
2448        /* update the item pointers */
2449        right_nritems += push_items;
2450        btrfs_set_header_nritems(right, right_nritems);
2451        push_space = BTRFS_LEAF_DATA_SIZE(root);
2452        for (i = 0; i < right_nritems; i++) {
2453                item = btrfs_item_nr(right, i);
2454                if (!right->map_token) {
2455                        map_extent_buffer(right, (unsigned long)item,
2456                                        sizeof(struct btrfs_item),
2457                                        &right->map_token, &right->kaddr,
2458                                        &right->map_start, &right->map_len,
2459                                        KM_USER1);
2460                }
2461                push_space -= btrfs_item_size(right, item);
2462                btrfs_set_item_offset(right, item, push_space);
2463        }
2464
2465        if (right->map_token) {
2466                unmap_extent_buffer(right, right->map_token, KM_USER1);
2467                right->map_token = NULL;
2468        }
2469        left_nritems -= push_items;
2470        btrfs_set_header_nritems(left, left_nritems);
2471
2472        if (left_nritems)
2473                btrfs_mark_buffer_dirty(left);
2474        btrfs_mark_buffer_dirty(right);
2475
2476        ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
2477        BUG_ON(ret);
2478
2479        btrfs_item_key(right, &disk_key, 0);
2480        btrfs_set_node_key(upper, &disk_key, slot + 1);
2481        btrfs_mark_buffer_dirty(upper);
2482
2483        /* then fixup the leaf pointer in the path */
2484        if (path->slots[0] >= left_nritems) {
2485                path->slots[0] -= left_nritems;
2486                if (btrfs_header_nritems(path->nodes[0]) == 0)
2487                        clean_tree_block(trans, root, path->nodes[0]);
2488                btrfs_tree_unlock(path->nodes[0]);
2489                free_extent_buffer(path->nodes[0]);
2490                path->nodes[0] = right;
2491                path->slots[1] += 1;
2492        } else {
2493                btrfs_tree_unlock(right);
2494                free_extent_buffer(right);
2495        }
2496        return 0;
2497
2498out_unlock:
2499        btrfs_tree_unlock(right);
2500        free_extent_buffer(right);
2501        return 1;
2502}
2503
2504/*
2505 * push some data in the path leaf to the right, trying to free up at
2506 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2507 *
2508 * returns 1 if the push failed because the other node didn't have enough
2509 * room, 0 if everything worked out and < 0 if there were major errors.
2510 */
2511static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2512                           *root, struct btrfs_path *path, int data_size,
2513                           int empty)
2514{
2515        struct extent_buffer *left = path->nodes[0];
2516        struct extent_buffer *right;
2517        struct extent_buffer *upper;
2518        int slot;
2519        int free_space;
2520        u32 left_nritems;
2521        int ret;
2522
2523        if (!path->nodes[1])
2524                return 1;
2525
2526        slot = path->slots[1];
2527        upper = path->nodes[1];
2528        if (slot >= btrfs_header_nritems(upper) - 1)
2529                return 1;
2530
2531        btrfs_assert_tree_locked(path->nodes[1]);
2532
2533        right = read_node_slot(root, upper, slot + 1);
2534        btrfs_tree_lock(right);
2535        btrfs_set_lock_blocking(right);
2536
2537        free_space = btrfs_leaf_free_space(root, right);
2538        if (free_space < data_size)
2539                goto out_unlock;
2540
2541        /* cow and double check */
2542        ret = btrfs_cow_block(trans, root, right, upper,
2543                              slot + 1, &right);
2544        if (ret)
2545                goto out_unlock;
2546
2547        free_space = btrfs_leaf_free_space(root, right);
2548        if (free_space < data_size)
2549                goto out_unlock;
2550
2551        left_nritems = btrfs_header_nritems(left);
2552        if (left_nritems == 0)
2553                goto out_unlock;
2554
2555        return __push_leaf_right(trans, root, path, data_size, empty,
2556                                right, free_space, left_nritems);
2557out_unlock:
2558        btrfs_tree_unlock(right);
2559        free_extent_buffer(right);
2560        return 1;
2561}
2562
2563/*
2564 * push some data in the path leaf to the left, trying to free up at
2565 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2566 */
2567static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2568                                     struct btrfs_root *root,
2569                                     struct btrfs_path *path, int data_size,
2570                                     int empty, struct extent_buffer *left,
2571                                     int free_space, int right_nritems)
2572{
2573        struct btrfs_disk_key disk_key;
2574        struct extent_buffer *right = path->nodes[0];
2575        int slot;
2576        int i;
2577        int push_space = 0;
2578        int push_items = 0;
2579        struct btrfs_item *item;
2580        u32 old_left_nritems;
2581        u32 nr;
2582        int ret = 0;
2583        int wret;
2584        u32 this_item_size;
2585        u32 old_left_item_size;
2586
2587        slot = path->slots[1];
2588
2589        if (empty)
2590                nr = right_nritems;
2591        else
2592                nr = right_nritems - 1;
2593
2594        for (i = 0; i < nr; i++) {
2595                item = btrfs_item_nr(right, i);
2596                if (!right->map_token) {
2597                        map_extent_buffer(right, (unsigned long)item,
2598                                        sizeof(struct btrfs_item),
2599                                        &right->map_token, &right->kaddr,
2600                                        &right->map_start, &right->map_len,
2601                                        KM_USER1);
2602                }
2603
2604                if (!empty && push_items > 0) {
2605                        if (path->slots[0] < i)
2606                                break;
2607                        if (path->slots[0] == i) {
2608                                int space = btrfs_leaf_free_space(root, right);
2609                                if (space + push_space * 2 > free_space)
2610                                        break;
2611                        }
2612                }
2613
2614                if (path->slots[0] == i)
2615                        push_space += data_size;
2616
2617                this_item_size = btrfs_item_size(right, item);
2618                if (this_item_size + sizeof(*item) + push_space > free_space)
2619                        break;
2620
2621                push_items++;
2622                push_space += this_item_size + sizeof(*item);
2623        }
2624
2625        if (right->map_token) {
2626                unmap_extent_buffer(right, right->map_token, KM_USER1);
2627                right->map_token = NULL;
2628        }
2629
2630        if (push_items == 0) {
2631                ret = 1;
2632                goto out;
2633        }
2634        if (!empty && push_items == btrfs_header_nritems(right))
2635                WARN_ON(1);
2636
2637        /* push data from right to left */
2638        copy_extent_buffer(left, right,
2639                           btrfs_item_nr_offset(btrfs_header_nritems(left)),
2640                           btrfs_item_nr_offset(0),
2641                           push_items * sizeof(struct btrfs_item));
2642
2643        push_space = BTRFS_LEAF_DATA_SIZE(root) -
2644                     btrfs_item_offset_nr(right, push_items - 1);
2645
2646        copy_extent_buffer(left, right, btrfs_leaf_data(left) +
2647                     leaf_data_end(root, left) - push_space,
2648                     btrfs_leaf_data(right) +
2649                     btrfs_item_offset_nr(right, push_items - 1),
2650                     push_space);
2651        old_left_nritems = btrfs_header_nritems(left);
2652        BUG_ON(old_left_nritems <= 0);
2653
2654        old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
2655        for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
2656                u32 ioff;
2657
2658                item = btrfs_item_nr(left, i);
2659                if (!left->map_token) {
2660                        map_extent_buffer(left, (unsigned long)item,
2661                                        sizeof(struct btrfs_item),
2662                                        &left->map_token, &left->kaddr,
2663                                        &left->map_start, &left->map_len,
2664                                        KM_USER1);
2665                }
2666
2667                ioff = btrfs_item_offset(left, item);
2668                btrfs_set_item_offset(left, item,
2669                      ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
2670        }
2671        btrfs_set_header_nritems(left, old_left_nritems + push_items);
2672        if (left->map_token) {
2673                unmap_extent_buffer(left, left->map_token, KM_USER1);
2674                left->map_token = NULL;
2675        }
2676
2677        /* fixup right node */
2678        if (push_items > right_nritems) {
2679                printk(KERN_CRIT "push items %d nr %u\n", push_items,
2680                       right_nritems);
2681                WARN_ON(1);
2682        }
2683
2684        if (push_items < right_nritems) {
2685                push_space = btrfs_item_offset_nr(right, push_items - 1) -
2686                                                  leaf_data_end(root, right);
2687                memmove_extent_buffer(right, btrfs_leaf_data(right) +
2688                                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
2689                                      btrfs_leaf_data(right) +
2690                                      leaf_data_end(root, right), push_space);
2691
2692                memmove_extent_buffer(right, btrfs_item_nr_offset(0),
2693                              btrfs_item_nr_offset(push_items),
2694                             (btrfs_header_nritems(right) - push_items) *
2695                             sizeof(struct btrfs_item));
2696        }
2697        right_nritems -= push_items;
2698        btrfs_set_header_nritems(right, right_nritems);
2699        push_space = BTRFS_LEAF_DATA_SIZE(root);
2700        for (i = 0; i < right_nritems; i++) {
2701                item = btrfs_item_nr(right, i);
2702
2703                if (!right->map_token) {
2704                        map_extent_buffer(right, (unsigned long)item,
2705                                        sizeof(struct btrfs_item),
2706                                        &right->map_token, &right->kaddr,
2707                                        &right->map_start, &right->map_len,
2708                                        KM_USER1);
2709                }
2710
2711                push_space = push_space - btrfs_item_size(right, item);
2712                btrfs_set_item_offset(right, item, push_space);
2713        }
2714        if (right->map_token) {
2715                unmap_extent_buffer(right, right->map_token, KM_USER1);
2716                right->map_token = NULL;
2717        }
2718
2719        btrfs_mark_buffer_dirty(left);
2720        if (right_nritems)
2721                btrfs_mark_buffer_dirty(right);
2722
2723        ret = btrfs_update_ref(trans, root, right, left,
2724                               old_left_nritems, push_items);
2725        BUG_ON(ret);
2726
2727        btrfs_item_key(right, &disk_key, 0);
2728        wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2729        if (wret)
2730                ret = wret;
2731
2732        /* then fixup the leaf pointer in the path */
2733        if (path->slots[0] < push_items) {
2734                path->slots[0] += old_left_nritems;
2735                if (btrfs_header_nritems(path->nodes[0]) == 0)
2736                        clean_tree_block(trans, root, path->nodes[0]);
2737                btrfs_tree_unlock(path->nodes[0]);
2738                free_extent_buffer(path->nodes[0]);
2739                path->nodes[0] = left;
2740                path->slots[1] -= 1;
2741        } else {
2742                btrfs_tree_unlock(left);
2743                free_extent_buffer(left);
2744                path->slots[0] -= push_items;
2745        }
2746        BUG_ON(path->slots[0] < 0);
2747        return ret;
2748out:
2749        btrfs_tree_unlock(left);
2750        free_extent_buffer(left);
2751        return ret;
2752}
2753
2754/*
2755 * push some data in the path leaf to the left, trying to free up at
2756 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2757 */
2758static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2759                          *root, struct btrfs_path *path, int data_size,
2760                          int empty)
2761{
2762        struct extent_buffer *right = path->nodes[0];
2763        struct extent_buffer *left;
2764        int slot;
2765        int free_space;
2766        u32 right_nritems;
2767        int ret = 0;
2768
2769        slot = path->slots[1];
2770        if (slot == 0)
2771                return 1;
2772        if (!path->nodes[1])
2773                return 1;
2774
2775        right_nritems = btrfs_header_nritems(right);
2776        if (right_nritems == 0)
2777                return 1;
2778
2779        btrfs_assert_tree_locked(path->nodes[1]);
2780
2781        left = read_node_slot(root, path->nodes[1], slot - 1);
2782        btrfs_tree_lock(left);
2783        btrfs_set_lock_blocking(left);
2784
2785        free_space = btrfs_leaf_free_space(root, left);
2786        if (free_space < data_size) {
2787                ret = 1;
2788                goto out;
2789        }
2790
2791        /* cow and double check */
2792        ret = btrfs_cow_block(trans, root, left,
2793                              path->nodes[1], slot - 1, &left);
2794        if (ret) {
2795                /* we hit -ENOSPC, but it isn't fatal here */
2796                ret = 1;
2797                goto out;
2798        }
2799
2800        free_space = btrfs_leaf_free_space(root, left);
2801        if (free_space < data_size) {
2802                ret = 1;
2803                goto out;
2804        }
2805
2806        return __push_leaf_left(trans, root, path, data_size,
2807                               empty, left, free_space, right_nritems);
2808out:
2809        btrfs_tree_unlock(left);
2810        free_extent_buffer(left);
2811        return ret;
2812}
2813
2814/*
2815 * split the path's leaf in two, making sure there is at least data_size
2816 * available for the resulting leaf level of the path.
2817 *
2818 * returns 0 if all went well and < 0 on failure.
2819 */
2820static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2821                               struct btrfs_root *root,
2822                               struct btrfs_path *path,
2823                               struct extent_buffer *l,
2824                               struct extent_buffer *right,
2825                               int slot, int mid, int nritems)
2826{
2827        int data_copy_size;
2828        int rt_data_off;
2829        int i;
2830        int ret = 0;
2831        int wret;
2832        struct btrfs_disk_key disk_key;
2833
2834        nritems = nritems - mid;
2835        btrfs_set_header_nritems(right, nritems);
2836        data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2837
2838        copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2839                           btrfs_item_nr_offset(mid),
2840                           nritems * sizeof(struct btrfs_item));
2841
2842        copy_extent_buffer(right, l,
2843                     btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2844                     data_copy_size, btrfs_leaf_data(l) +
2845                     leaf_data_end(root, l), data_copy_size);
2846
2847        rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2848                      btrfs_item_end_nr(l, mid);
2849
2850        for (i = 0; i < nritems; i++) {
2851                struct btrfs_item *item = btrfs_item_nr(right, i);
2852                u32 ioff;
2853
2854                if (!right->map_token) {
2855                        map_extent_buffer(right, (unsigned long)item,
2856                                        sizeof(struct btrfs_item),
2857                                        &right->map_token, &right->kaddr,
2858                                        &right->map_start, &right->map_len,
2859                                        KM_USER1);
2860                }
2861
2862                ioff = btrfs_item_offset(right, item);
2863                btrfs_set_item_offset(right, item, ioff + rt_data_off);
2864        }
2865
2866        if (right->map_token) {
2867                unmap_extent_buffer(right, right->map_token, KM_USER1);
2868                right->map_token = NULL;
2869        }
2870
2871        btrfs_set_header_nritems(l, mid);
2872        ret = 0;
2873        btrfs_item_key(right, &disk_key, 0);
2874        wret = insert_ptr(trans, root, path, &disk_key, right->start,
2875                          path->slots[1] + 1, 1);
2876        if (wret)
2877                ret = wret;
2878
2879        btrfs_mark_buffer_dirty(right);
2880        btrfs_mark_buffer_dirty(l);
2881        BUG_ON(path->slots[0] != slot);
2882
2883        ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
2884        BUG_ON(ret);
2885
2886        if (mid <= slot) {
2887                btrfs_tree_unlock(path->nodes[0]);
2888                free_extent_buffer(path->nodes[0]);
2889                path->nodes[0] = right;
2890                path->slots[0] -= mid;
2891                path->slots[1] += 1;
2892        } else {
2893                btrfs_tree_unlock(right);
2894                free_extent_buffer(right);
2895        }
2896
2897        BUG_ON(path->slots[0] < 0);
2898
2899        return ret;
2900}
2901
2902/*
2903 * split the path's leaf in two, making sure there is at least data_size
2904 * available for the resulting leaf level of the path.
2905 *
2906 * returns 0 if all went well and < 0 on failure.
2907 */
2908static noinline int split_leaf(struct btrfs_trans_handle *trans,
2909                               struct btrfs_root *root,
2910                               struct btrfs_key *ins_key,
2911                               struct btrfs_path *path, int data_size,
2912                               int extend)
2913{
2914        struct extent_buffer *l;
2915        u32 nritems;
2916        int mid;
2917        int slot;
2918        struct extent_buffer *right;
2919        int ret = 0;
2920        int wret;
2921        int double_split;
2922        int num_doubles = 0;
2923
2924        /* first try to make some room by pushing left and right */
2925        if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY &&
2926            !trans->transaction->delayed_refs.flushing) {
2927                wret = push_leaf_right(trans, root, path, data_size, 0);
2928                if (wret < 0)
2929                        return wret;
2930                if (wret) {
2931                        wret = push_leaf_left(trans, root, path, data_size, 0);
2932                        if (wret < 0)
2933                                return wret;
2934                }
2935                l = path->nodes[0];
2936
2937                /* did the pushes work? */
2938                if (btrfs_leaf_free_space(root, l) >= data_size)
2939                        return 0;
2940        }
2941
2942        if (!path->nodes[1]) {
2943                ret = insert_new_root(trans, root, path, 1);
2944                if (ret)
2945                        return ret;
2946        }
2947again:
2948        double_split = 0;
2949        l = path->nodes[0];
2950        slot = path->slots[0];
2951        nritems = btrfs_header_nritems(l);
2952        mid = (nritems + 1) / 2;
2953
2954        right = btrfs_alloc_free_block(trans, root, root->leafsize,
2955                                        path->nodes[1]->start,
2956                                        root->root_key.objectid,
2957                                        trans->transid, 0, l->start, 0);
2958        if (IS_ERR(right)) {
2959                BUG_ON(1);
2960                return PTR_ERR(right);
2961        }
2962
2963        memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2964        btrfs_set_header_bytenr(right, right->start);
2965        btrfs_set_header_generation(right, trans->transid);
2966        btrfs_set_header_owner(right, root->root_key.objectid);
2967        btrfs_set_header_level(right, 0);
2968        write_extent_buffer(right, root->fs_info->fsid,
2969                            (unsigned long)btrfs_header_fsid(right),
2970                            BTRFS_FSID_SIZE);
2971
2972        write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2973                            (unsigned long)btrfs_header_chunk_tree_uuid(right),
2974                            BTRFS_UUID_SIZE);
2975
2976        if (mid <= slot) {
2977                if (nritems == 1 ||
2978                    leaf_space_used(l, mid, nritems - mid) + data_size >
2979                        BTRFS_LEAF_DATA_SIZE(root)) {
2980                        if (slot >= nritems) {
2981                                struct btrfs_disk_key disk_key;
2982
2983                                btrfs_cpu_key_to_disk(&disk_key, ins_key);
2984                                btrfs_set_header_nritems(right, 0);
2985                                wret = insert_ptr(trans, root, path,
2986                                                  &disk_key, right->start,
2987                                                  path->slots[1] + 1, 1);
2988                                if (wret)
2989                                        ret = wret;
2990
2991                                btrfs_tree_unlock(path->nodes[0]);
2992                                free_extent_buffer(path->nodes[0]);
2993                                path->nodes[0] = right;
2994                                path->slots[0] = 0;
2995                                path->slots[1] += 1;
2996                                btrfs_mark_buffer_dirty(right);
2997                                return ret;
2998                        }
2999                        mid = slot;
3000                        if (mid != nritems &&
3001                            leaf_space_used(l, mid, nritems - mid) +
3002                            data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3003                                double_split = 1;
3004                        }
3005                }
3006        } else {
3007                if (leaf_space_used(l, 0, mid) + data_size >
3008                        BTRFS_LEAF_DATA_SIZE(root)) {
3009                        if (!extend && data_size && slot == 0) {
3010                                struct btrfs_disk_key disk_key;
3011
3012                                btrfs_cpu_key_to_disk(&disk_key, ins_key);
3013                                btrfs_set_header_nritems(right, 0);
3014                                wret = insert_ptr(trans, root, path,
3015                                                  &disk_key,
3016                                                  right->start,
3017                                                  path->slots[1], 1);
3018                                if (wret)
3019                                        ret = wret;
3020                                btrfs_tree_unlock(path->nodes[0]);
3021                                free_extent_buffer(path->nodes[0]);
3022                                path->nodes[0] = right;
3023                                path->slots[0] = 0;
3024                                if (path->slots[1] == 0) {
3025                                        wret = fixup_low_keys(trans, root,
3026                                                      path, &disk_key, 1);
3027                                        if (wret)
3028                                                ret = wret;
3029                                }
3030                                btrfs_mark_buffer_dirty(right);
3031                                return ret;
3032                        } else if ((extend || !data_size) && slot == 0) {
3033                                mid = 1;
3034                        } else {
3035                                mid = slot;
3036                                if (mid != nritems &&
3037                                    leaf_space_used(l, mid, nritems - mid) +
3038                                    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3039                                        double_split = 1;
3040                                }
3041                        }
3042                }
3043        }
3044
3045        ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
3046        BUG_ON(ret);
3047
3048        if (double_split) {
3049                BUG_ON(num_doubles != 0);
3050                num_doubles++;
3051                goto again;
3052        }
3053
3054        return ret;
3055}
3056
3057/*
3058 * This function splits a single item into two items,
3059 * giving 'new_key' to the new item and splitting the
3060 * old one at split_offset (from the start of the item).
3061 *
3062 * The path may be released by this operation.  After
3063 * the split, the path is pointing to the old item.  The
3064 * new item is going to be in the same node as the old one.
3065 *
3066 * Note, the item being split must be smaller enough to live alone on
3067 * a tree block with room for one extra struct btrfs_item
3068 *
3069 * This allows us to split the item in place, keeping a lock on the
3070 * leaf the entire time.
3071 */
3072int btrfs_split_item(struct btrfs_trans_handle *trans,
3073                     struct btrfs_root *root,
3074                     struct btrfs_path *path,
3075                     struct btrfs_key *new_key,
3076                     unsigned long split_offset)
3077{
3078        u32 item_size;
3079        struct extent_buffer *leaf;
3080        struct btrfs_key orig_key;
3081        struct btrfs_item *item;
3082        struct btrfs_item *new_item;
3083        int ret = 0;
3084        int slot;
3085        u32 nritems;
3086        u32 orig_offset;
3087        struct btrfs_disk_key disk_key;
3088        char *buf;
3089
3090        leaf = path->nodes[0];
3091        btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
3092        if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
3093                goto split;
3094
3095        item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3096        btrfs_release_path(root, path);
3097
3098        path->search_for_split = 1;
3099        path->keep_locks = 1;
3100
3101        ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
3102        path->search_for_split = 0;
3103
3104        /* if our item isn't there or got smaller, return now */
3105        if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
3106                                                        path->slots[0])) {
3107                path->keep_locks = 0;
3108                return -EAGAIN;
3109        }
3110
3111        btrfs_set_path_blocking(path);
3112        ret = split_leaf(trans, root, &orig_key, path,
3113                         sizeof(struct btrfs_item), 1);
3114        path->keep_locks = 0;
3115        BUG_ON(ret);
3116
3117        btrfs_unlock_up_safe(path, 1);
3118        leaf = path->nodes[0];
3119        BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3120
3121split:
3122        /*
3123         * make sure any changes to the path from split_leaf leave it
3124         * in a blocking state
3125         */
3126        btrfs_set_path_blocking(path);
3127
3128        item = btrfs_item_nr(leaf, path->slots[0]);
3129        orig_offset = btrfs_item_offset(leaf, item);
3130        item_size = btrfs_item_size(leaf, item);
3131
3132        buf = kmalloc(item_size, GFP_NOFS);
3133        read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3134                            path->slots[0]), item_size);
3135        slot = path->slots[0] + 1;
3136        leaf = path->nodes[0];
3137
3138        nritems = btrfs_header_nritems(leaf);
3139
3140        if (slot != nritems) {
3141                /* shift the items */
3142                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3143                              btrfs_item_nr_offset(slot),
3144                              (nritems - slot) * sizeof(struct btrfs_item));
3145
3146        }
3147
3148        btrfs_cpu_key_to_disk(&disk_key, new_key);
3149        btrfs_set_item_key(leaf, &disk_key, slot);
3150
3151        new_item = btrfs_item_nr(leaf, slot);
3152
3153        btrfs_set_item_offset(leaf, new_item, orig_offset);
3154        btrfs_set_item_size(leaf, new_item, item_size - split_offset);
3155
3156        btrfs_set_item_offset(leaf, item,
3157                              orig_offset + item_size - split_offset);
3158        btrfs_set_item_size(leaf, item, split_offset);
3159
3160        btrfs_set_header_nritems(leaf, nritems + 1);
3161
3162        /* write the data for the start of the original item */
3163        write_extent_buffer(leaf, buf,
3164                            btrfs_item_ptr_offset(leaf, path->slots[0]),
3165                            split_offset);
3166
3167        /* write the data for the new item */
3168        write_extent_buffer(leaf, buf + split_offset,
3169                            btrfs_item_ptr_offset(leaf, slot),
3170                            item_size - split_offset);
3171        btrfs_mark_buffer_dirty(leaf);
3172
3173        ret = 0;
3174        if (btrfs_leaf_free_space(root, leaf) < 0) {
3175                btrfs_print_leaf(root, leaf);
3176                BUG();
3177        }
3178        kfree(buf);
3179        return ret;
3180}
3181
3182/*
3183 * make the item pointed to by the path smaller.  new_size indicates
3184 * how small to make it, and from_end tells us if we just chop bytes
3185 * off the end of the item or if we shift the item to chop bytes off
3186 * the front.
3187 */
3188int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3189                        struct btrfs_root *root,
3190                        struct btrfs_path *path,
3191                        u32 new_size, int from_end)
3192{
3193        int ret = 0;
3194        int slot;
3195        int slot_orig;
3196        struct extent_buffer *leaf;
3197        struct btrfs_item *item;
3198        u32 nritems;
3199        unsigned int data_end;
3200        unsigned int old_data_start;
3201        unsigned int old_size;
3202        unsigned int size_diff;
3203        int i;
3204
3205        slot_orig = path->slots[0];
3206        leaf = path->nodes[0];
3207        slot = path->slots[0];
3208
3209        old_size = btrfs_item_size_nr(leaf, slot);
3210        if (old_size == new_size)
3211                return 0;
3212
3213        nritems = btrfs_header_nritems(leaf);
3214        data_end = leaf_data_end(root, leaf);
3215
3216        old_data_start = btrfs_item_offset_nr(leaf, slot);
3217
3218        size_diff = old_size - new_size;
3219
3220        BUG_ON(slot < 0);
3221        BUG_ON(slot >= nritems);
3222
3223        /*
3224         * item0..itemN ... dataN.offset..dataN.size .. data0.size
3225         */
3226        /* first correct the data pointers */
3227        for (i = slot; i < nritems; i++) {
3228                u32 ioff;
3229                item = btrfs_item_nr(leaf, i);
3230
3231                if (!leaf->map_token) {
3232                        map_extent_buffer(leaf, (unsigned long)item,
3233                                        sizeof(struct btrfs_item),
3234                                        &leaf->map_token, &leaf->kaddr,
3235                                        &leaf->map_start, &leaf->map_len,
3236                                        KM_USER1);
3237                }
3238
3239                ioff = btrfs_item_offset(leaf, item);
3240                btrfs_set_item_offset(leaf, item, ioff + size_diff);
3241        }
3242
3243        if (leaf->map_token) {
3244                unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3245                leaf->map_token = NULL;
3246        }
3247
3248        /* shift the data */
3249        if (from_end) {
3250                memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3251                              data_end + size_diff, btrfs_leaf_data(leaf) +
3252                              data_end, old_data_start + new_size - data_end);
3253        } else {
3254                struct btrfs_disk_key disk_key;
3255                u64 offset;
3256
3257                btrfs_item_key(leaf, &disk_key, slot);
3258
3259                if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3260                        unsigned long ptr;
3261                        struct btrfs_file_extent_item *fi;
3262
3263                        fi = btrfs_item_ptr(leaf, slot,
3264                                            struct btrfs_file_extent_item);
3265                        fi = (struct btrfs_file_extent_item *)(
3266                             (unsigned long)fi - size_diff);
3267
3268                        if (btrfs_file_extent_type(leaf, fi) ==
3269                            BTRFS_FILE_EXTENT_INLINE) {
3270                                ptr = btrfs_item_ptr_offset(leaf, slot);
3271                                memmove_extent_buffer(leaf, ptr,
3272                                      (unsigned long)fi,
3273                                      offsetof(struct btrfs_file_extent_item,
3274                                                 disk_bytenr));
3275                        }
3276                }
3277
3278                memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3279                              data_end + size_diff, btrfs_leaf_data(leaf) +
3280                              data_end, old_data_start - data_end);
3281
3282                offset = btrfs_disk_key_offset(&disk_key);
3283                btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3284                btrfs_set_item_key(leaf, &disk_key, slot);
3285                if (slot == 0)
3286                        fixup_low_keys(trans, root, path, &disk_key, 1);
3287        }
3288
3289        item = btrfs_item_nr(leaf, slot);
3290        btrfs_set_item_size(leaf, item, new_size);
3291        btrfs_mark_buffer_dirty(leaf);
3292
3293        ret = 0;
3294        if (btrfs_leaf_free_space(root, leaf) < 0) {
3295                btrfs_print_leaf(root, leaf);
3296                BUG();
3297        }
3298        return ret;
3299}
3300
3301/*
3302 * make the item pointed to by the path bigger, data_size is the new size.
3303 */
3304int btrfs_extend_item(struct btrfs_trans_handle *trans,
3305                      struct btrfs_root *root, struct btrfs_path *path,
3306                      u32 data_size)
3307{
3308        int ret = 0;
3309        int slot;
3310        int slot_orig;
3311        struct extent_buffer *leaf;
3312        struct btrfs_item *item;
3313        u32 nritems;
3314        unsigned int data_end;
3315        unsigned int old_data;
3316        unsigned int old_size;
3317        int i;
3318
3319        slot_orig = path->slots[0];
3320        leaf = path->nodes[0];
3321
3322        nritems = btrfs_header_nritems(leaf);
3323        data_end = leaf_data_end(root, leaf);
3324
3325        if (btrfs_leaf_free_space(root, leaf) < data_size) {
3326                btrfs_print_leaf(root, leaf);
3327                BUG();
3328        }
3329        slot = path->slots[0];
3330        old_data = btrfs_item_end_nr(leaf, slot);
3331
3332        BUG_ON(slot < 0);
3333        if (slot >= nritems) {
3334                btrfs_print_leaf(root, leaf);
3335                printk(KERN_CRIT "slot %d too large, nritems %d\n",
3336                       slot, nritems);
3337                BUG_ON(1);
3338        }
3339
3340        /*
3341         * item0..itemN ... dataN.offset..dataN.size .. data0.size
3342         */
3343        /* first correct the data pointers */
3344        for (i = slot; i < nritems; i++) {
3345                u32 ioff;
3346                item = btrfs_item_nr(leaf, i);
3347
3348                if (!leaf->map_token) {
3349                        map_extent_buffer(leaf, (unsigned long)item,
3350                                        sizeof(struct btrfs_item),
3351                                        &leaf->map_token, &leaf->kaddr,
3352                                        &leaf->map_start, &leaf->map_len,
3353                                        KM_USER1);
3354                }
3355                ioff = btrfs_item_offset(leaf, item);
3356                btrfs_set_item_offset(leaf, item, ioff - data_size);
3357        }
3358
3359        if (leaf->map_token) {
3360                unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3361                leaf->map_token = NULL;
3362        }
3363
3364        /* shift the data */
3365        memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3366                      data_end - data_size, btrfs_leaf_data(leaf) +
3367                      data_end, old_data - data_end);
3368
3369        data_end = old_data;
3370        old_size = btrfs_item_size_nr(leaf, slot);
3371        item = btrfs_item_nr(leaf, slot);
3372        btrfs_set_item_size(leaf, item, old_size + data_size);
3373        btrfs_mark_buffer_dirty(leaf);
3374
3375        ret = 0;
3376        if (btrfs_leaf_free_space(root, leaf) < 0) {
3377                btrfs_print_leaf(root, leaf);
3378                BUG();
3379        }
3380        return ret;
3381}
3382
3383/*
3384 * Given a key and some data, insert items into the tree.
3385 * This does all the path init required, making room in the tree if needed.
3386 * Returns the number of keys that were inserted.
3387 */
3388int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3389                            struct btrfs_root *root,
3390                            struct btrfs_path *path,
3391                            struct btrfs_key *cpu_key, u32 *data_size,
3392                            int nr)
3393{
3394        struct extent_buffer *leaf;
3395        struct btrfs_item *item;
3396        int ret = 0;
3397        int slot;
3398        int i;
3399        u32 nritems;
3400        u32 total_data = 0;
3401        u32 total_size = 0;
3402        unsigned int data_end;
3403        struct btrfs_disk_key disk_key;
3404        struct btrfs_key found_key;
3405
3406        for (i = 0; i < nr; i++) {
3407                if (total_size + data_size[i] + sizeof(struct btrfs_item) >
3408                    BTRFS_LEAF_DATA_SIZE(root)) {
3409                        break;
3410                        nr = i;
3411                }
3412                total_data += data_size[i];
3413                total_size += data_size[i] + sizeof(struct btrfs_item);
3414        }
3415        BUG_ON(nr == 0);
3416
3417        ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3418        if (ret == 0)
3419                return -EEXIST;
3420        if (ret < 0)
3421                goto out;
3422
3423        leaf = path->nodes[0];
3424
3425        nritems = btrfs_header_nritems(leaf);
3426        data_end = leaf_data_end(root, leaf);
3427
3428        if (btrfs_leaf_free_space(root, leaf) < total_size) {
3429                for (i = nr; i >= 0; i--) {
3430                        total_data -= data_size[i];
3431                        total_size -= data_size[i] + sizeof(struct btrfs_item);
3432                        if (total_size < btrfs_leaf_free_space(root, leaf))
3433                                break;
3434                }
3435                nr = i;
3436        }
3437
3438        slot = path->slots[0];
3439        BUG_ON(slot < 0);
3440
3441        if (slot != nritems) {
3442                unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3443
3444                item = btrfs_item_nr(leaf, slot);
3445                btrfs_item_key_to_cpu(leaf, &found_key, slot);
3446
3447                /* figure out how many keys we can insert in here */
3448                total_data = data_size[0];
3449                for (i = 1; i < nr; i++) {
3450                        if (comp_cpu_keys(&found_key, cpu_key + i) <= 0)
3451                                break;
3452                        total_data += data_size[i];
3453                }
3454                nr = i;
3455
3456                if (old_data < data_end) {
3457                        btrfs_print_leaf(root, leaf);
3458                        printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3459                               slot, old_data, data_end);
3460                        BUG_ON(1);
3461                }
3462                /*
3463                 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3464                 */
3465                /* first correct the data pointers */
3466                WARN_ON(leaf->map_token);
3467                for (i = slot; i < nritems; i++) {
3468                        u32 ioff;
3469
3470                        item = btrfs_item_nr(leaf, i);
3471                        if (!leaf->map_token) {
3472                                map_extent_buffer(leaf, (unsigned long)item,
3473                                        sizeof(struct btrfs_item),
3474                                        &leaf->map_token, &leaf->kaddr,
3475                                        &leaf->map_start, &leaf->map_len,
3476                                        KM_USER1);
3477                        }
3478
3479                        ioff = btrfs_item_offset(leaf, item);
3480                        btrfs_set_item_offset(leaf, item, ioff - total_data);
3481                }
3482                if (leaf->map_token) {
3483                        unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3484                        leaf->map_token = NULL;
3485                }
3486
3487                /* shift the items */
3488                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3489                              btrfs_item_nr_offset(slot),
3490                              (nritems - slot) * sizeof(struct btrfs_item));
3491
3492                /* shift the data */
3493                memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3494                              data_end - total_data, btrfs_leaf_data(leaf) +
3495                              data_end, old_data - data_end);
3496                data_end = old_data;
3497        } else {
3498                /*
3499                 * this sucks but it has to be done, if we are inserting at
3500                 * the end of the leaf only insert 1 of the items, since we
3501                 * have no way of knowing whats on the next leaf and we'd have
3502                 * to drop our current locks to figure it out
3503                 */
3504                nr = 1;
3505        }
3506
3507        /* setup the item for the new data */
3508        for (i = 0; i < nr; i++) {
3509                btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3510                btrfs_set_item_key(leaf, &disk_key, slot + i);
3511                item = btrfs_item_nr(leaf, slot + i);
3512                btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3513                data_end -= data_size[i];
3514                btrfs_set_item_size(leaf, item, data_size[i]);
3515        }
3516        btrfs_set_header_nritems(leaf, nritems + nr);
3517        btrfs_mark_buffer_dirty(leaf);
3518
3519        ret = 0;
3520        if (slot == 0) {
3521                btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3522                ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3523        }
3524
3525        if (btrfs_leaf_free_space(root, leaf) < 0) {
3526                btrfs_print_leaf(root, leaf);
3527                BUG();
3528        }
3529out:
3530        if (!ret)
3531                ret = nr;
3532        return ret;
3533}
3534
3535/*
3536 * this is a helper for btrfs_insert_empty_items, the main goal here is
3537 * to save stack depth by doing the bulk of the work in a function
3538 * that doesn't call btrfs_search_slot
3539 */
3540static noinline_for_stack int
3541setup_items_for_insert(struct btrfs_trans_handle *trans,
3542                      struct btrfs_root *root, struct btrfs_path *path,
3543                      struct btrfs_key *cpu_key, u32 *data_size,
3544                      u32 total_data, u32 total_size, int nr)
3545{
3546        struct btrfs_item *item;
3547        int i;
3548        u32 nritems;
3549        unsigned int data_end;
3550        struct btrfs_disk_key disk_key;
3551        int ret;
3552        struct extent_buffer *leaf;
3553        int slot;
3554
3555        leaf = path->nodes[0];
3556        slot = path->slots[0];
3557
3558        nritems = btrfs_header_nritems(leaf);
3559        data_end = leaf_data_end(root, leaf);
3560
3561        if (btrfs_leaf_free_space(root, leaf) < total_size) {
3562                btrfs_print_leaf(root, leaf);
3563                printk(KERN_CRIT "not enough freespace need %u have %d\n",
3564                       total_size, btrfs_leaf_free_space(root, leaf));
3565                BUG();
3566        }
3567
3568        if (slot != nritems) {
3569                unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3570
3571                if (old_data < data_end) {
3572                        btrfs_print_leaf(root, leaf);
3573                        printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3574                               slot, old_data, data_end);
3575                        BUG_ON(1);
3576                }
3577                /*
3578                 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3579                 */
3580                /* first correct the data pointers */
3581                WARN_ON(leaf->map_token);
3582                for (i = slot; i < nritems; i++) {
3583                        u32 ioff;
3584
3585                        item = btrfs_item_nr(leaf, i);
3586                        if (!leaf->map_token) {
3587                                map_extent_buffer(leaf, (unsigned long)item,
3588                                        sizeof(struct btrfs_item),
3589                                        &leaf->map_token, &leaf->kaddr,
3590                                        &leaf->map_start, &leaf->map_len,
3591                                        KM_USER1);
3592                        }
3593
3594                        ioff = btrfs_item_offset(leaf, item);
3595                        btrfs_set_item_offset(leaf, item, ioff - total_data);
3596                }
3597                if (leaf->map_token) {
3598                        unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3599                        leaf->map_token = NULL;
3600                }
3601
3602                /* shift the items */
3603                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3604                              btrfs_item_nr_offset(slot),
3605                              (nritems - slot) * sizeof(struct btrfs_item));
3606
3607                /* shift the data */
3608                memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3609                              data_end - total_data, btrfs_leaf_data(leaf) +
3610                              data_end, old_data - data_end);
3611                data_end = old_data;
3612        }
3613
3614        /* setup the item for the new data */
3615        for (i = 0; i < nr; i++) {
3616                btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3617                btrfs_set_item_key(leaf, &disk_key, slot + i);
3618                item = btrfs_item_nr(leaf, slot + i);
3619                btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3620                data_end -= data_size[i];
3621                btrfs_set_item_size(leaf, item, data_size[i]);
3622        }
3623
3624        btrfs_set_header_nritems(leaf, nritems + nr);
3625
3626        ret = 0;
3627        if (slot == 0) {
3628                struct btrfs_disk_key disk_key;
3629                btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3630                ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3631        }
3632        btrfs_unlock_up_safe(path, 1);
3633        btrfs_mark_buffer_dirty(leaf);
3634
3635        if (btrfs_leaf_free_space(root, leaf) < 0) {
3636                btrfs_print_leaf(root, leaf);
3637                BUG();
3638        }
3639        return ret;
3640}
3641
3642/*
3643 * Given a key and some data, insert items into the tree.
3644 * This does all the path init required, making room in the tree if needed.
3645 */
3646int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3647                            struct btrfs_root *root,
3648                            struct btrfs_path *path,
3649                            struct btrfs_key *cpu_key, u32 *data_size,
3650                            int nr)
3651{
3652        struct extent_buffer *leaf;
3653        int ret = 0;
3654        int slot;
3655        int i;
3656        u32 total_size = 0;
3657        u32 total_data = 0;
3658
3659        for (i = 0; i < nr; i++)
3660                total_data += data_size[i];
3661
3662        total_size = total_data + (nr * sizeof(struct btrfs_item));
3663        ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3664        if (ret == 0)
3665                return -EEXIST;
3666        if (ret < 0)
3667                goto out;
3668
3669        leaf = path->nodes[0];
3670        slot = path->slots[0];
3671        BUG_ON(slot < 0);
3672
3673        ret = setup_items_for_insert(trans, root, path, cpu_key, data_size,
3674                               total_data, total_size, nr);
3675
3676out:
3677        return ret;
3678}
3679
3680/*
3681 * Given a key and some data, insert an item into the tree.
3682 * This does all the path init required, making room in the tree if needed.
3683 */
3684int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3685                      *root, struct btrfs_key *cpu_key, void *data, u32
3686                      data_size)
3687{
3688        int ret = 0;
3689        struct btrfs_path *path;
3690        struct extent_buffer *leaf;
3691        unsigned long ptr;
3692
3693        path = btrfs_alloc_path();
3694        BUG_ON(!path);
3695        ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
3696        if (!ret) {
3697                leaf = path->nodes[0];
3698                ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3699                write_extent_buffer(leaf, data, ptr, data_size);
3700                btrfs_mark_buffer_dirty(leaf);
3701        }
3702        btrfs_free_path(path);
3703        return ret;
3704}
3705
3706/*
3707 * delete the pointer from a given node.
3708 *
3709 * the tree should have been previously balanced so the deletion does not
3710 * empty a node.
3711 */
3712static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3713                   struct btrfs_path *path, int level, int slot)
3714{
3715        struct extent_buffer *parent = path->nodes[level];
3716        u32 nritems;
3717        int ret = 0;
3718        int wret;
3719
3720        nritems = btrfs_header_nritems(parent);
3721        if (slot != nritems - 1) {
3722                memmove_extent_buffer(parent,
3723                              btrfs_node_key_ptr_offset(slot),
3724                              btrfs_node_key_ptr_offset(slot + 1),
3725                              sizeof(struct btrfs_key_ptr) *
3726                              (nritems - slot - 1));
3727        }
3728        nritems--;
3729        btrfs_set_header_nritems(parent, nritems);
3730        if (nritems == 0 && parent == root->node) {
3731                BUG_ON(btrfs_header_level(root->node) != 1);
3732                /* just turn the root into a leaf and break */
3733                btrfs_set_header_level(root->node, 0);
3734        } else if (slot == 0) {
3735                struct btrfs_disk_key disk_key;
3736
3737                btrfs_node_key(parent, &disk_key, 0);
3738                wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
3739                if (wret)
3740                        ret = wret;
3741        }
3742        btrfs_mark_buffer_dirty(parent);
3743        return ret;
3744}
3745
3746/*
3747 * a helper function to delete the leaf pointed to by path->slots[1] and
3748 * path->nodes[1].  bytenr is the node block pointer, but since the callers
3749 * already know it, it is faster to have them pass it down than to
3750 * read it out of the node again.
3751 *
3752 * This deletes the pointer in path->nodes[1] and frees the leaf
3753 * block extent.  zero is returned if it all worked out, < 0 otherwise.
3754 *
3755 * The path must have already been setup for deleting the leaf, including
3756 * all the proper balancing.  path->nodes[1] must be locked.
3757 */
3758noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3759                            struct btrfs_root *root,
3760                            struct btrfs_path *path, u64 bytenr)
3761{
3762        int ret;
3763        u64 root_gen = btrfs_header_generation(path->nodes[1]);
3764        u64 parent_start = path->nodes[1]->start;
3765        u64 parent_owner = btrfs_header_owner(path->nodes[1]);
3766
3767        ret = del_ptr(trans, root, path, 1, path->slots[1]);
3768        if (ret)
3769                return ret;
3770
3771        /*
3772         * btrfs_free_extent is expensive, we want to make sure we
3773         * aren't holding any locks when we call it
3774         */
3775        btrfs_unlock_up_safe(path, 0);
3776
3777        ret = btrfs_free_extent(trans, root, bytenr,
3778                                btrfs_level_size(root, 0),
3779                                parent_start, parent_owner,
3780                                root_gen, 0, 1);
3781        return ret;
3782}
3783/*
3784 * delete the item at the leaf level in path.  If that empties
3785 * the leaf, remove it from the tree
3786 */
3787int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3788                    struct btrfs_path *path, int slot, int nr)
3789{
3790        struct extent_buffer *leaf;
3791        struct btrfs_item *item;
3792        int last_off;
3793        int dsize = 0;
3794        int ret = 0;
3795        int wret;
3796        int i;
3797        u32 nritems;
3798
3799        leaf = path->nodes[0];
3800        last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
3801
3802        for (i = 0; i < nr; i++)
3803                dsize += btrfs_item_size_nr(leaf, slot + i);
3804
3805        nritems = btrfs_header_nritems(leaf);
3806
3807        if (slot + nr != nritems) {
3808                int data_end = leaf_data_end(root, leaf);
3809
3810                memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3811                              data_end + dsize,
3812                              btrfs_leaf_data(leaf) + data_end,
3813                              last_off - data_end);
3814
3815                for (i = slot + nr; i < nritems; i++) {
3816                        u32 ioff;
3817
3818                        item = btrfs_item_nr(leaf, i);
3819                        if (!leaf->map_token) {
3820                                map_extent_buffer(leaf, (unsigned long)item,
3821                                        sizeof(struct btrfs_item),
3822                                        &leaf->map_token, &leaf->kaddr,
3823                                        &leaf->map_start, &leaf->map_len,
3824                                        KM_USER1);
3825                        }
3826                        ioff = btrfs_item_offset(leaf, item);
3827                        btrfs_set_item_offset(leaf, item, ioff + dsize);
3828                }
3829
3830                if (leaf->map_token) {
3831                        unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3832                        leaf->map_token = NULL;
3833                }
3834
3835                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
3836                              btrfs_item_nr_offset(slot + nr),
3837                              sizeof(struct btrfs_item) *
3838                              (nritems - slot - nr));
3839        }
3840        btrfs_set_header_nritems(leaf, nritems - nr);
3841        nritems -= nr;
3842
3843        /* delete the leaf if we've emptied it */
3844        if (nritems == 0) {
3845                if (leaf == root->node) {
3846                        btrfs_set_header_level(leaf, 0);
3847                } else {
3848                        ret = btrfs_del_leaf(trans, root, path, leaf->start);
3849                        BUG_ON(ret);
3850                }
3851        } else {
3852                int used = leaf_space_used(leaf, 0, nritems);
3853                if (slot == 0) {
3854                        struct btrfs_disk_key disk_key;
3855
3856                        btrfs_item_key(leaf, &disk_key, 0);
3857                        wret = fixup_low_keys(trans, root, path,
3858                                              &disk_key, 1);
3859                        if (wret)
3860                                ret = wret;
3861                }
3862
3863                /* delete the leaf if it is mostly empty */
3864                if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 &&
3865                    !trans->transaction->delayed_refs.flushing) {
3866                        /* push_leaf_left fixes the path.
3867                         * make sure the path still points to our leaf
3868                         * for possible call to del_ptr below
3869                         */
3870                        slot = path->slots[1];
3871                        extent_buffer_get(leaf);
3872
3873                        btrfs_set_path_blocking(path);
3874                        wret = push_leaf_left(trans, root, path, 1, 1);
3875                        if (wret < 0 && wret != -ENOSPC)
3876                                ret = wret;
3877
3878                        if (path->nodes[0] == leaf &&
3879                            btrfs_header_nritems(leaf)) {
3880                                wret = push_leaf_right(trans, root, path, 1, 1);
3881                                if (wret < 0 && wret != -ENOSPC)
3882                                        ret = wret;
3883                        }
3884
3885                        if (btrfs_header_nritems(leaf) == 0) {
3886                                path->slots[1] = slot;
3887                                ret = btrfs_del_leaf(trans, root, path,
3888                                                     leaf->start);
3889                                BUG_ON(ret);
3890                                free_extent_buffer(leaf);
3891                        } else {
3892                                /* if we're still in the path, make sure
3893                                 * we're dirty.  Otherwise, one of the
3894                                 * push_leaf functions must have already
3895                                 * dirtied this buffer
3896                                 */
3897                                if (path->nodes[0] == leaf)
3898                                        btrfs_mark_buffer_dirty(leaf);
3899                                free_extent_buffer(leaf);
3900                        }
3901                } else {
3902                        btrfs_mark_buffer_dirty(leaf);
3903                }
3904        }
3905        return ret;
3906}
3907
3908/*
3909 * search the tree again to find a leaf with lesser keys
3910 * returns 0 if it found something or 1 if there are no lesser leaves.
3911 * returns < 0 on io errors.
3912 *
3913 * This may release the path, and so you may lose any locks held at the
3914 * time you call it.
3915 */
3916int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3917{
3918        struct btrfs_key key;
3919        struct btrfs_disk_key found_key;
3920        int ret;
3921
3922        btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
3923
3924        if (key.offset > 0)
3925                key.offset--;
3926        else if (key.type > 0)
3927                key.type--;
3928        else if (key.objectid > 0)
3929                key.objectid--;
3930        else
3931                return 1;
3932
3933        btrfs_release_path(root, path);
3934        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3935        if (ret < 0)
3936                return ret;
3937        btrfs_item_key(path->nodes[0], &found_key, 0);
3938        ret = comp_keys(&found_key, &key);
3939        if (ret < 0)
3940                return 0;
3941        return 1;
3942}
3943
3944/*
3945 * A helper function to walk down the tree starting at min_key, and looking
3946 * for nodes or leaves that are either in cache or have a minimum
3947 * transaction id.  This is used by the btree defrag code, and tree logging
3948 *
3949 * This does not cow, but it does stuff the starting key it finds back
3950 * into min_key, so you can call btrfs_search_slot with cow=1 on the
3951 * key and get a writable path.
3952 *
3953 * This does lock as it descends, and path->keep_locks should be set
3954 * to 1 by the caller.
3955 *
3956 * This honors path->lowest_level to prevent descent past a given level
3957 * of the tree.
3958 *
3959 * min_trans indicates the oldest transaction that you are interested
3960 * in walking through.  Any nodes or leaves older than min_trans are
3961 * skipped over (without reading them).
3962 *
3963 * returns zero if something useful was found, < 0 on error and 1 if there
3964 * was nothing in the tree that matched the search criteria.
3965 */
3966int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
3967                         struct btrfs_key *max_key,
3968                         struct btrfs_path *path, int cache_only,
3969                         u64 min_trans)
3970{
3971        struct extent_buffer *cur;
3972        struct btrfs_key found_key;
3973        int slot;
3974        int sret;
3975        u32 nritems;
3976        int level;
3977        int ret = 1;
3978
3979        WARN_ON(!path->keep_locks);
3980again:
3981        cur = btrfs_lock_root_node(root);
3982        level = btrfs_header_level(cur);
3983        WARN_ON(path->nodes[level]);
3984        path->nodes[level] = cur;
3985        path->locks[level] = 1;
3986
3987        if (btrfs_header_generation(cur) < min_trans) {
3988                ret = 1;
3989                goto out;
3990        }
3991        while (1) {
3992                nritems = btrfs_header_nritems(cur);
3993                level = btrfs_header_level(cur);
3994                sret = bin_search(cur, min_key, level, &slot);
3995
3996                /* at the lowest level, we're done, setup the path and exit */
3997                if (level == path->lowest_level) {
3998                        if (slot >= nritems)
3999                                goto find_next_key;
4000                        ret = 0;
4001                        path->slots[level] = slot;
4002                        btrfs_item_key_to_cpu(cur, &found_key, slot);
4003                        goto out;
4004                }
4005                if (sret && slot > 0)
4006                        slot--;
4007                /*
4008                 * check this node pointer against the cache_only and
4009                 * min_trans parameters.  If it isn't in cache or is too
4010                 * old, skip to the next one.
4011                 */
4012                while (slot < nritems) {
4013                        u64 blockptr;
4014                        u64 gen;
4015                        struct extent_buffer *tmp;
4016                        struct btrfs_disk_key disk_key;
4017
4018                        blockptr = btrfs_node_blockptr(cur, slot);
4019                        gen = btrfs_node_ptr_generation(cur, slot);
4020                        if (gen < min_trans) {
4021                                slot++;
4022                                continue;
4023                        }
4024                        if (!cache_only)
4025                                break;
4026
4027                        if (max_key) {
4028                                btrfs_node_key(cur, &disk_key, slot);
4029                                if (comp_keys(&disk_key, max_key) >= 0) {
4030                                        ret = 1;
4031                                        goto out;
4032                                }
4033                        }
4034
4035                        tmp = btrfs_find_tree_block(root, blockptr,
4036                                            btrfs_level_size(root, level - 1));
4037
4038                        if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
4039                                free_extent_buffer(tmp);
4040                                break;
4041                        }
4042                        if (tmp)
4043                                free_extent_buffer(tmp);
4044                        slot++;
4045                }
4046find_next_key:
4047                /*
4048                 * we didn't find a candidate key in this node, walk forward
4049                 * and find another one
4050                 */
4051                if (slot >= nritems) {
4052                        path->slots[level] = slot;
4053                        btrfs_set_path_blocking(path);
4054                        sret = btrfs_find_next_key(root, path, min_key, level,
4055                                                  cache_only, min_trans);
4056                        if (sret == 0) {
4057                                btrfs_release_path(root, path);
4058                                goto again;
4059                        } else {
4060                                goto out;
4061                        }
4062                }
4063                /* save our key for returning back */
4064                btrfs_node_key_to_cpu(cur, &found_key, slot);
4065                path->slots[level] = slot;
4066                if (level == path->lowest_level) {
4067                        ret = 0;
4068                        unlock_up(path, level, 1);
4069                        goto out;
4070                }
4071                btrfs_set_path_blocking(path);
4072                cur = read_node_slot(root, cur, slot);
4073
4074                btrfs_tree_lock(cur);
4075
4076                path->locks[level - 1] = 1;
4077                path->nodes[level - 1] = cur;
4078                unlock_up(path, level, 1);
4079                btrfs_clear_path_blocking(path, NULL);
4080        }
4081out:
4082        if (ret == 0)
4083                memcpy(min_key, &found_key, sizeof(found_key));
4084        btrfs_set_path_blocking(path);
4085        return ret;
4086}
4087
4088/*
4089 * this is similar to btrfs_next_leaf, but does not try to preserve
4090 * and fixup the path.  It looks for and returns the next key in the
4091 * tree based on the current path and the cache_only and min_trans
4092 * parameters.
4093 *
4094 * 0 is returned if another key is found, < 0 if there are any errors
4095 * and 1 is returned if there are no higher keys in the tree
4096 *
4097 * path->keep_locks should be set to 1 on the search made before
4098 * calling this function.
4099 */
4100int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
4101                        struct btrfs_key *key, int lowest_level,
4102                        int cache_only, u64 min_trans)
4103{
4104        int level = lowest_level;
4105        int slot;
4106        struct extent_buffer *c;
4107
4108        WARN_ON(!path->keep_locks);
4109        while (level < BTRFS_MAX_LEVEL) {
4110                if (!path->nodes[level])
4111                        return 1;
4112
4113                slot = path->slots[level] + 1;
4114                c = path->nodes[level];
4115next:
4116                if (slot >= btrfs_header_nritems(c)) {
4117                        level++;
4118                        if (level == BTRFS_MAX_LEVEL)
4119                                return 1;
4120                        continue;
4121                }
4122                if (level == 0)
4123                        btrfs_item_key_to_cpu(c, key, slot);
4124                else {
4125                        u64 blockptr = btrfs_node_blockptr(c, slot);
4126                        u64 gen = btrfs_node_ptr_generation(c, slot);
4127
4128                        if (cache_only) {
4129                                struct extent_buffer *cur;
4130                                cur = btrfs_find_tree_block(root, blockptr,
4131                                            btrfs_level_size(root, level - 1));
4132                                if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
4133                                        slot++;
4134                                        if (cur)
4135                                                free_extent_buffer(cur);
4136                                        goto next;
4137                                }
4138                                free_extent_buffer(cur);
4139                        }
4140                        if (gen < min_trans) {
4141                                slot++;
4142                                goto next;
4143                        }
4144                        btrfs_node_key_to_cpu(c, key, slot);
4145                }
4146                return 0;
4147        }
4148        return 1;
4149}
4150
4151/*
4152 * search the tree again to find a leaf with greater keys
4153 * returns 0 if it found something or 1 if there are no greater leaves.
4154 * returns < 0 on io errors.
4155 */
4156int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4157{
4158        int slot;
4159        int level;
4160        struct extent_buffer *c;
4161        struct extent_buffer *next;
4162        struct btrfs_key key;
4163        u32 nritems;
4164        int ret;
4165        int old_spinning = path->leave_spinning;
4166        int force_blocking = 0;
4167
4168        nritems = btrfs_header_nritems(path->nodes[0]);
4169        if (nritems == 0)
4170                return 1;
4171
4172        /*
4173         * we take the blocks in an order that upsets lockdep.  Using
4174         * blocking mode is the only way around it.
4175         */
4176#ifdef CONFIG_DEBUG_LOCK_ALLOC
4177        force_blocking = 1;
4178#endif
4179
4180        btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4181again:
4182        level = 1;
4183        next = NULL;
4184        btrfs_release_path(root, path);
4185
4186        path->keep_locks = 1;
4187
4188        if (!force_blocking)
4189                path->leave_spinning = 1;
4190
4191        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4192        path->keep_locks = 0;
4193
4194        if (ret < 0)
4195                return ret;
4196
4197        nritems = btrfs_header_nritems(path->nodes[0]);
4198        /*
4199         * by releasing the path above we dropped all our locks.  A balance
4200         * could have added more items next to the key that used to be
4201         * at the very end of the block.  So, check again here and
4202         * advance the path if there are now more items available.
4203         */
4204        if (nritems > 0 && path->slots[0] < nritems - 1) {
4205                path->slots[0]++;
4206                ret = 0;
4207                goto done;
4208        }
4209
4210        while (level < BTRFS_MAX_LEVEL) {
4211                if (!path->nodes[level]) {
4212                        ret = 1;
4213                        goto done;
4214                }
4215
4216                slot = path->slots[level] + 1;
4217                c = path->nodes[level];
4218                if (slot >= btrfs_header_nritems(c)) {
4219                        level++;
4220                        if (level == BTRFS_MAX_LEVEL) {
4221                                ret = 1;
4222                                goto done;
4223                        }
4224                        continue;
4225                }
4226
4227                if (next) {
4228                        btrfs_tree_unlock(next);
4229                        free_extent_buffer(next);
4230                }
4231
4232                next = c;
4233                ret = read_block_for_search(NULL, root, path, &next, level,
4234                                            slot, &key);
4235                if (ret == -EAGAIN)
4236                        goto again;
4237
4238                if (ret < 0) {
4239                        btrfs_release_path(root, path);
4240                        goto done;
4241                }
4242
4243                if (!path->skip_locking) {
4244                        ret = btrfs_try_spin_lock(next);
4245                        if (!ret) {
4246                                btrfs_set_path_blocking(path);
4247                                btrfs_tree_lock(next);
4248                                if (!force_blocking)
4249                                        btrfs_clear_path_blocking(path, next);
4250                        }
4251                        if (force_blocking)
4252                                btrfs_set_lock_blocking(next);
4253                }
4254                break;
4255        }
4256        path->slots[level] = slot;
4257        while (1) {
4258                level--;
4259                c = path->nodes[level];
4260                if (path->locks[level])
4261                        btrfs_tree_unlock(c);
4262
4263                free_extent_buffer(c);
4264                path->nodes[level] = next;
4265                path->slots[level] = 0;
4266                if (!path->skip_locking)
4267                        path->locks[level] = 1;
4268
4269                if (!level)
4270                        break;
4271
4272                ret = read_block_for_search(NULL, root, path, &next, level,
4273                                            0, &key);
4274                if (ret == -EAGAIN)
4275                        goto again;
4276
4277                if (ret < 0) {
4278                        btrfs_release_path(root, path);
4279                        goto done;
4280                }
4281
4282                if (!path->skip_locking) {
4283                        btrfs_assert_tree_locked(path->nodes[level]);
4284                        ret = btrfs_try_spin_lock(next);
4285                        if (!ret) {
4286                                btrfs_set_path_blocking(path);
4287                                btrfs_tree_lock(next);
4288                                if (!force_blocking)
4289                                        btrfs_clear_path_blocking(path, next);
4290                        }
4291                        if (force_blocking)
4292                                btrfs_set_lock_blocking(next);
4293                }
4294        }
4295        ret = 0;
4296done:
4297        unlock_up(path, 0, 1);
4298        path->leave_spinning = old_spinning;
4299        if (!old_spinning)
4300                btrfs_set_path_blocking(path);
4301
4302        return ret;
4303}
4304
4305/*
4306 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
4307 * searching until it gets past min_objectid or finds an item of 'type'
4308 *
4309 * returns 0 if something is found, 1 if nothing was found and < 0 on error
4310 */
4311int btrfs_previous_item(struct btrfs_root *root,
4312                        struct btrfs_path *path, u64 min_objectid,
4313                        int type)
4314{
4315        struct btrfs_key found_key;
4316        struct extent_buffer *leaf;
4317        u32 nritems;
4318        int ret;
4319
4320        while (1) {
4321                if (path->slots[0] == 0) {
4322                        btrfs_set_path_blocking(path);
4323                        ret = btrfs_prev_leaf(root, path);
4324                        if (ret != 0)
4325                                return ret;
4326                } else {
4327                        path->slots[0]--;
4328                }
4329                leaf = path->nodes[0];
4330                nritems = btrfs_header_nritems(leaf);
4331                if (nritems == 0)
4332                        return 1;
4333                if (path->slots[0] == nritems)
4334                        path->slots[0]--;
4335
4336                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4337                if (found_key.type == type)
4338                        return 0;
4339                if (found_key.objectid < min_objectid)
4340                        break;
4341                if (found_key.objectid == min_objectid &&
4342                    found_key.type < type)
4343                        break;
4344        }
4345        return 1;
4346}
4347
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.