linux/fs/reiserfs/fix_node.c
<<
>>
Prefs
   1/*
   2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
   3 */
   4
   5/**
   6 ** old_item_num
   7 ** old_entry_num
   8 ** set_entry_sizes
   9 ** create_virtual_node
  10 ** check_left
  11 ** check_right
  12 ** directory_part_size
  13 ** get_num_ver
  14 ** set_parameters
  15 ** is_leaf_removable
  16 ** are_leaves_removable
  17 ** get_empty_nodes
  18 ** get_lfree
  19 ** get_rfree
  20 ** is_left_neighbor_in_cache
  21 ** decrement_key
  22 ** get_far_parent
  23 ** get_parents
  24 ** can_node_be_removed
  25 ** ip_check_balance
  26 ** dc_check_balance_internal
  27 ** dc_check_balance_leaf
  28 ** dc_check_balance
  29 ** check_balance
  30 ** get_direct_parent
  31 ** get_neighbors
  32 ** fix_nodes
  33 ** 
  34 ** 
  35 **/
  36
  37#include <linux/time.h>
  38#include <linux/string.h>
  39#include <linux/reiserfs_fs.h>
  40#include <linux/buffer_head.h>
  41
  42/* To make any changes in the tree we find a node, that contains item
  43   to be changed/deleted or position in the node we insert a new item
  44   to. We call this node S. To do balancing we need to decide what we
  45   will shift to left/right neighbor, or to a new node, where new item
  46   will be etc. To make this analysis simpler we build virtual
  47   node. Virtual node is an array of items, that will replace items of
  48   node S. (For instance if we are going to delete an item, virtual
  49   node does not contain it). Virtual node keeps information about
  50   item sizes and types, mergeability of first and last items, sizes
  51   of all entries in directory item. We use this array of items when
  52   calculating what we can shift to neighbors and how many nodes we
  53   have to have if we do not any shiftings, if we shift to left/right
  54   neighbor or to both. */
  55
  56/* taking item number in virtual node, returns number of item, that it has in source buffer */
  57static inline int old_item_num(int new_num, int affected_item_num, int mode)
  58{
  59        if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
  60                return new_num;
  61
  62        if (mode == M_INSERT) {
  63
  64                RFALSE(new_num == 0,
  65                       "vs-8005: for INSERT mode and item number of inserted item");
  66
  67                return new_num - 1;
  68        }
  69
  70        RFALSE(mode != M_DELETE,
  71               "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'",
  72               mode);
  73        /* delete mode */
  74        return new_num + 1;
  75}
  76
  77static void create_virtual_node(struct tree_balance *tb, int h)
  78{
  79        struct item_head *ih;
  80        struct virtual_node *vn = tb->tb_vn;
  81        int new_num;
  82        struct buffer_head *Sh; /* this comes from tb->S[h] */
  83
  84        Sh = PATH_H_PBUFFER(tb->tb_path, h);
  85
  86        /* size of changed node */
  87        vn->vn_size =
  88            MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h];
  89
  90        /* for internal nodes array if virtual items is not created */
  91        if (h) {
  92                vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
  93                return;
  94        }
  95
  96        /* number of items in virtual node  */
  97        vn->vn_nr_item =
  98            B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) -
  99            ((vn->vn_mode == M_DELETE) ? 1 : 0);
 100
 101        /* first virtual item */
 102        vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
 103        memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item));
 104        vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
 105
 106        /* first item in the node */
 107        ih = B_N_PITEM_HEAD(Sh, 0);
 108
 109        /* define the mergeability for 0-th item (if it is not being deleted) */
 110        if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size)
 111            && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
 112                vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
 113
 114        /* go through all items those remain in the virtual node (except for the new (inserted) one) */
 115        for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
 116                int j;
 117                struct virtual_item *vi = vn->vn_vi + new_num;
 118                int is_affected =
 119                    ((new_num != vn->vn_affected_item_num) ? 0 : 1);
 120
 121                if (is_affected && vn->vn_mode == M_INSERT)
 122                        continue;
 123
 124                /* get item number in source node */
 125                j = old_item_num(new_num, vn->vn_affected_item_num,
 126                                 vn->vn_mode);
 127
 128                vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
 129                vi->vi_ih = ih + j;
 130                vi->vi_item = B_I_PITEM(Sh, ih + j);
 131                vi->vi_uarea = vn->vn_free_ptr;
 132
 133                // FIXME: there is no check, that item operation did not
 134                // consume too much memory
 135                vn->vn_free_ptr +=
 136                    op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
 137                if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
 138                        reiserfs_panic(tb->tb_sb,
 139                                       "vs-8030: create_virtual_node: "
 140                                       "virtual node space consumed");
 141
 142                if (!is_affected)
 143                        /* this is not being changed */
 144                        continue;
 145
 146                if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
 147                        vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
 148                        vi->vi_new_data = vn->vn_data;  // pointer to data which is going to be pasted
 149                }
 150        }
 151
 152        /* virtual inserted item is not defined yet */
 153        if (vn->vn_mode == M_INSERT) {
 154                struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num;
 155
 156                RFALSE(vn->vn_ins_ih == NULL,
 157                       "vs-8040: item header of inserted item is not specified");
 158                vi->vi_item_len = tb->insert_size[0];
 159                vi->vi_ih = vn->vn_ins_ih;
 160                vi->vi_item = vn->vn_data;
 161                vi->vi_uarea = vn->vn_free_ptr;
 162
 163                op_create_vi(vn, vi, 0 /*not pasted or cut */ ,
 164                             tb->insert_size[0]);
 165        }
 166
 167        /* set right merge flag we take right delimiting key and check whether it is a mergeable item */
 168        if (tb->CFR[0]) {
 169                struct reiserfs_key *key;
 170
 171                key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]);
 172                if (op_is_left_mergeable(key, Sh->b_size)
 173                    && (vn->vn_mode != M_DELETE
 174                        || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
 175                        vn->vn_vi[vn->vn_nr_item - 1].vi_type |=
 176                            VI_TYPE_RIGHT_MERGEABLE;
 177
 178#ifdef CONFIG_REISERFS_CHECK
 179                if (op_is_left_mergeable(key, Sh->b_size) &&
 180                    !(vn->vn_mode != M_DELETE
 181                      || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
 182                        /* we delete last item and it could be merged with right neighbor's first item */
 183                        if (!
 184                            (B_NR_ITEMS(Sh) == 1
 185                             && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0))
 186                             && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
 187                                /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
 188                                print_block(Sh, 0, -1, -1);
 189                                reiserfs_panic(tb->tb_sb,
 190                                               "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c",
 191                                               key, vn->vn_affected_item_num,
 192                                               vn->vn_mode, M_DELETE);
 193                        }
 194                }
 195#endif
 196
 197        }
 198}
 199
 200/* using virtual node check, how many items can be shifted to left
 201   neighbor */
 202static void check_left(struct tree_balance *tb, int h, int cur_free)
 203{
 204        int i;
 205        struct virtual_node *vn = tb->tb_vn;
 206        struct virtual_item *vi;
 207        int d_size, ih_size;
 208
 209        RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free);
 210
 211        /* internal level */
 212        if (h > 0) {
 213                tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
 214                return;
 215        }
 216
 217        /* leaf level */
 218
 219        if (!cur_free || !vn->vn_nr_item) {
 220                /* no free space or nothing to move */
 221                tb->lnum[h] = 0;
 222                tb->lbytes = -1;
 223                return;
 224        }
 225
 226        RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
 227               "vs-8055: parent does not exist or invalid");
 228
 229        vi = vn->vn_vi;
 230        if ((unsigned int)cur_free >=
 231            (vn->vn_size -
 232             ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
 233                /* all contents of S[0] fits into L[0] */
 234
 235                RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
 236                       "vs-8055: invalid mode or balance condition failed");
 237
 238                tb->lnum[0] = vn->vn_nr_item;
 239                tb->lbytes = -1;
 240                return;
 241        }
 242
 243        d_size = 0, ih_size = IH_SIZE;
 244
 245        /* first item may be merge with last item in left neighbor */
 246        if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
 247                d_size = -((int)IH_SIZE), ih_size = 0;
 248
 249        tb->lnum[0] = 0;
 250        for (i = 0; i < vn->vn_nr_item;
 251             i++, ih_size = IH_SIZE, d_size = 0, vi++) {
 252                d_size += vi->vi_item_len;
 253                if (cur_free >= d_size) {
 254                        /* the item can be shifted entirely */
 255                        cur_free -= d_size;
 256                        tb->lnum[0]++;
 257                        continue;
 258                }
 259
 260                /* the item cannot be shifted entirely, try to split it */
 261                /* check whether L[0] can hold ih and at least one byte of the item body */
 262                if (cur_free <= ih_size) {
 263                        /* cannot shift even a part of the current item */
 264                        tb->lbytes = -1;
 265                        return;
 266                }
 267                cur_free -= ih_size;
 268
 269                tb->lbytes = op_check_left(vi, cur_free, 0, 0);
 270                if (tb->lbytes != -1)
 271                        /* count partially shifted item */
 272                        tb->lnum[0]++;
 273
 274                break;
 275        }
 276
 277        return;
 278}
 279
 280/* using virtual node check, how many items can be shifted to right
 281   neighbor */
 282static void check_right(struct tree_balance *tb, int h, int cur_free)
 283{
 284        int i;
 285        struct virtual_node *vn = tb->tb_vn;
 286        struct virtual_item *vi;
 287        int d_size, ih_size;
 288
 289        RFALSE(cur_free < 0, "vs-8070: cur_free < 0");
 290
 291        /* internal level */
 292        if (h > 0) {
 293                tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
 294                return;
 295        }
 296
 297        /* leaf level */
 298
 299        if (!cur_free || !vn->vn_nr_item) {
 300                /* no free space  */
 301                tb->rnum[h] = 0;
 302                tb->rbytes = -1;
 303                return;
 304        }
 305
 306        RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
 307               "vs-8075: parent does not exist or invalid");
 308
 309        vi = vn->vn_vi + vn->vn_nr_item - 1;
 310        if ((unsigned int)cur_free >=
 311            (vn->vn_size -
 312             ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
 313                /* all contents of S[0] fits into R[0] */
 314
 315                RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
 316                       "vs-8080: invalid mode or balance condition failed");
 317
 318                tb->rnum[h] = vn->vn_nr_item;
 319                tb->rbytes = -1;
 320                return;
 321        }
 322
 323        d_size = 0, ih_size = IH_SIZE;
 324
 325        /* last item may be merge with first item in right neighbor */
 326        if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
 327                d_size = -(int)IH_SIZE, ih_size = 0;
 328
 329        tb->rnum[0] = 0;
 330        for (i = vn->vn_nr_item - 1; i >= 0;
 331             i--, d_size = 0, ih_size = IH_SIZE, vi--) {
 332                d_size += vi->vi_item_len;
 333                if (cur_free >= d_size) {
 334                        /* the item can be shifted entirely */
 335                        cur_free -= d_size;
 336                        tb->rnum[0]++;
 337                        continue;
 338                }
 339
 340                /* check whether R[0] can hold ih and at least one byte of the item body */
 341                if (cur_free <= ih_size) {      /* cannot shift even a part of the current item */
 342                        tb->rbytes = -1;
 343                        return;
 344                }
 345
 346                /* R[0] can hold the header of the item and at least one byte of its body */
 347                cur_free -= ih_size;    /* cur_free is still > 0 */
 348
 349                tb->rbytes = op_check_right(vi, cur_free);
 350                if (tb->rbytes != -1)
 351                        /* count partially shifted item */
 352                        tb->rnum[0]++;
 353
 354                break;
 355        }
 356
 357        return;
 358}
 359
 360/*
 361 * from - number of items, which are shifted to left neighbor entirely
 362 * to - number of item, which are shifted to right neighbor entirely
 363 * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
 364 * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
 365static int get_num_ver(int mode, struct tree_balance *tb, int h,
 366                       int from, int from_bytes,
 367                       int to, int to_bytes, short *snum012, int flow)
 368{
 369        int i;
 370        int cur_free;
 371        //    int bytes;
 372        int units;
 373        struct virtual_node *vn = tb->tb_vn;
 374        //    struct virtual_item * vi;
 375
 376        int total_node_size, max_node_size, current_item_size;
 377        int needed_nodes;
 378        int start_item,         /* position of item we start filling node from */
 379         end_item,              /* position of item we finish filling node by */
 380         start_bytes,           /* number of first bytes (entries for directory) of start_item-th item 
 381                                   we do not include into node that is being filled */
 382         end_bytes;             /* number of last bytes (entries for directory) of end_item-th item 
 383                                   we do node include into node that is being filled */
 384        int split_item_positions[2];    /* these are positions in virtual item of
 385                                           items, that are split between S[0] and
 386                                           S1new and S1new and S2new */
 387
 388        split_item_positions[0] = -1;
 389        split_item_positions[1] = -1;
 390
 391        /* We only create additional nodes if we are in insert or paste mode
 392           or we are in replace mode at the internal level. If h is 0 and
 393           the mode is M_REPLACE then in fix_nodes we change the mode to
 394           paste or insert before we get here in the code.  */
 395        RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
 396               "vs-8100: insert_size < 0 in overflow");
 397
 398        max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
 399
 400        /* snum012 [0-2] - number of items, that lay
 401           to S[0], first new node and second new node */
 402        snum012[3] = -1;        /* s1bytes */
 403        snum012[4] = -1;        /* s2bytes */
 404
 405        /* internal level */
 406        if (h > 0) {
 407                i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
 408                if (i == max_node_size)
 409                        return 1;
 410                return (i / max_node_size + 1);
 411        }
 412
 413        /* leaf level */
 414        needed_nodes = 1;
 415        total_node_size = 0;
 416        cur_free = max_node_size;
 417
 418        // start from 'from'-th item
 419        start_item = from;
 420        // skip its first 'start_bytes' units
 421        start_bytes = ((from_bytes != -1) ? from_bytes : 0);
 422
 423        // last included item is the 'end_item'-th one
 424        end_item = vn->vn_nr_item - to - 1;
 425        // do not count last 'end_bytes' units of 'end_item'-th item
 426        end_bytes = (to_bytes != -1) ? to_bytes : 0;
 427
 428        /* go through all item beginning from the start_item-th item and ending by
 429           the end_item-th item. Do not count first 'start_bytes' units of
 430           'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
 431
 432        for (i = start_item; i <= end_item; i++) {
 433                struct virtual_item *vi = vn->vn_vi + i;
 434                int skip_from_end = ((i == end_item) ? end_bytes : 0);
 435
 436                RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed");
 437
 438                /* get size of current item */
 439                current_item_size = vi->vi_item_len;
 440
 441                /* do not take in calculation head part (from_bytes) of from-th item */
 442                current_item_size -=
 443                    op_part_size(vi, 0 /*from start */ , start_bytes);
 444
 445                /* do not take in calculation tail part of last item */
 446                current_item_size -=
 447                    op_part_size(vi, 1 /*from end */ , skip_from_end);
 448
 449                /* if item fits into current node entierly */
 450                if (total_node_size + current_item_size <= max_node_size) {
 451                        snum012[needed_nodes - 1]++;
 452                        total_node_size += current_item_size;
 453                        start_bytes = 0;
 454                        continue;
 455                }
 456
 457                if (current_item_size > max_node_size) {
 458                        /* virtual item length is longer, than max size of item in
 459                           a node. It is impossible for direct item */
 460                        RFALSE(is_direct_le_ih(vi->vi_ih),
 461                               "vs-8110: "
 462                               "direct item length is %d. It can not be longer than %d",
 463                               current_item_size, max_node_size);
 464                        /* we will try to split it */
 465                        flow = 1;
 466                }
 467
 468                if (!flow) {
 469                        /* as we do not split items, take new node and continue */
 470                        needed_nodes++;
 471                        i--;
 472                        total_node_size = 0;
 473                        continue;
 474                }
 475                // calculate number of item units which fit into node being
 476                // filled
 477                {
 478                        int free_space;
 479
 480                        free_space = max_node_size - total_node_size - IH_SIZE;
 481                        units =
 482                            op_check_left(vi, free_space, start_bytes,
 483                                          skip_from_end);
 484                        if (units == -1) {
 485                                /* nothing fits into current node, take new node and continue */
 486                                needed_nodes++, i--, total_node_size = 0;
 487                                continue;
 488                        }
 489                }
 490
 491                /* something fits into the current node */
 492                //if (snum012[3] != -1 || needed_nodes != 1)
 493                //  reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
 494                //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
 495                start_bytes += units;
 496                snum012[needed_nodes - 1 + 3] = units;
 497
 498                if (needed_nodes > 2)
 499                        reiserfs_warning(tb->tb_sb, "vs-8111: get_num_ver: "
 500                                         "split_item_position is out of boundary");
 501                snum012[needed_nodes - 1]++;
 502                split_item_positions[needed_nodes - 1] = i;
 503                needed_nodes++;
 504                /* continue from the same item with start_bytes != -1 */
 505                start_item = i;
 506                i--;
 507                total_node_size = 0;
 508        }
 509
 510        // sum012[4] (if it is not -1) contains number of units of which
 511        // are to be in S1new, snum012[3] - to be in S0. They are supposed
 512        // to be S1bytes and S2bytes correspondingly, so recalculate
 513        if (snum012[4] > 0) {
 514                int split_item_num;
 515                int bytes_to_r, bytes_to_l;
 516                int bytes_to_S1new;
 517
 518                split_item_num = split_item_positions[1];
 519                bytes_to_l =
 520                    ((from == split_item_num
 521                      && from_bytes != -1) ? from_bytes : 0);
 522                bytes_to_r =
 523                    ((end_item == split_item_num
 524                      && end_bytes != -1) ? end_bytes : 0);
 525                bytes_to_S1new =
 526                    ((split_item_positions[0] ==
 527                      split_item_positions[1]) ? snum012[3] : 0);
 528
 529                // s2bytes
 530                snum012[4] =
 531                    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
 532                    bytes_to_r - bytes_to_l - bytes_to_S1new;
 533
 534                if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
 535                    vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
 536                        reiserfs_warning(tb->tb_sb, "vs-8115: get_num_ver: not "
 537                                         "directory or indirect item");
 538        }
 539
 540        /* now we know S2bytes, calculate S1bytes */
 541        if (snum012[3] > 0) {
 542                int split_item_num;
 543                int bytes_to_r, bytes_to_l;
 544                int bytes_to_S2new;
 545
 546                split_item_num = split_item_positions[0];
 547                bytes_to_l =
 548                    ((from == split_item_num
 549                      && from_bytes != -1) ? from_bytes : 0);
 550                bytes_to_r =
 551                    ((end_item == split_item_num
 552                      && end_bytes != -1) ? end_bytes : 0);
 553                bytes_to_S2new =
 554                    ((split_item_positions[0] == split_item_positions[1]
 555                      && snum012[4] != -1) ? snum012[4] : 0);
 556
 557                // s1bytes
 558                snum012[3] =
 559                    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
 560                    bytes_to_r - bytes_to_l - bytes_to_S2new;
 561        }
 562
 563        return needed_nodes;
 564}
 565
 566#ifdef CONFIG_REISERFS_CHECK
 567extern struct tree_balance *cur_tb;
 568#endif
 569
 570/* Set parameters for balancing.
 571 * Performs write of results of analysis of balancing into structure tb,
 572 * where it will later be used by the functions that actually do the balancing. 
 573 * Parameters:
 574 *      tb      tree_balance structure;
 575 *      h       current level of the node;
 576 *      lnum    number of items from S[h] that must be shifted to L[h];
 577 *      rnum    number of items from S[h] that must be shifted to R[h];
 578 *      blk_num number of blocks that S[h] will be splitted into;
 579 *      s012    number of items that fall into splitted nodes.
 580 *      lbytes  number of bytes which flow to the left neighbor from the item that is not
 581 *              not shifted entirely
 582 *      rbytes  number of bytes which flow to the right neighbor from the item that is not
 583 *              not shifted entirely
 584 *      s1bytes number of bytes which flow to the first  new node when S[0] splits (this number is contained in s012 array)
 585 */
 586
 587static void set_parameters(struct tree_balance *tb, int h, int lnum,
 588                           int rnum, int blk_num, short *s012, int lb, int rb)
 589{
 590
 591        tb->lnum[h] = lnum;
 592        tb->rnum[h] = rnum;
 593        tb->blknum[h] = blk_num;
 594
 595        if (h == 0) {           /* only for leaf level */
 596                if (s012 != NULL) {
 597                        tb->s0num = *s012++,
 598                            tb->s1num = *s012++, tb->s2num = *s012++;
 599                        tb->s1bytes = *s012++;
 600                        tb->s2bytes = *s012;
 601                }
 602                tb->lbytes = lb;
 603                tb->rbytes = rb;
 604        }
 605        PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum);
 606        PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum);
 607
 608        PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb);
 609        PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
 610}
 611
 612/* check, does node disappear if we shift tb->lnum[0] items to left
 613   neighbor and tb->rnum[0] to the right one. */
 614static int is_leaf_removable(struct tree_balance *tb)
 615{
 616        struct virtual_node *vn = tb->tb_vn;
 617        int to_left, to_right;
 618        int size;
 619        int remain_items;
 620
 621        /* number of items, that will be shifted to left (right) neighbor
 622           entirely */
 623        to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
 624        to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
 625        remain_items = vn->vn_nr_item;
 626
 627        /* how many items remain in S[0] after shiftings to neighbors */
 628        remain_items -= (to_left + to_right);
 629
 630        if (remain_items < 1) {
 631                /* all content of node can be shifted to neighbors */
 632                set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
 633                               NULL, -1, -1);
 634                return 1;
 635        }
 636
 637        if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
 638                /* S[0] is not removable */
 639                return 0;
 640
 641        /* check, whether we can divide 1 remaining item between neighbors */
 642
 643        /* get size of remaining item (in item units) */
 644        size = op_unit_num(&(vn->vn_vi[to_left]));
 645
 646        if (tb->lbytes + tb->rbytes >= size) {
 647                set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
 648                               tb->lbytes, -1);
 649                return 1;
 650        }
 651
 652        return 0;
 653}
 654
 655/* check whether L, S, R can be joined in one node */
 656static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
 657{
 658        struct virtual_node *vn = tb->tb_vn;
 659        int ih_size;
 660        struct buffer_head *S0;
 661
 662        S0 = PATH_H_PBUFFER(tb->tb_path, 0);
 663
 664        ih_size = 0;
 665        if (vn->vn_nr_item) {
 666                if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
 667                        ih_size += IH_SIZE;
 668
 669                if (vn->vn_vi[vn->vn_nr_item - 1].
 670                    vi_type & VI_TYPE_RIGHT_MERGEABLE)
 671                        ih_size += IH_SIZE;
 672        } else {
 673                /* there was only one item and it will be deleted */
 674                struct item_head *ih;
 675
 676                RFALSE(B_NR_ITEMS(S0) != 1,
 677                       "vs-8125: item number must be 1: it is %d",
 678                       B_NR_ITEMS(S0));
 679
 680                ih = B_N_PITEM_HEAD(S0, 0);
 681                if (tb->CFR[0]
 682                    && !comp_short_le_keys(&(ih->ih_key),
 683                                           B_N_PDELIM_KEY(tb->CFR[0],
 684                                                          tb->rkey[0])))
 685                        if (is_direntry_le_ih(ih)) {
 686                                /* Directory must be in correct state here: that is
 687                                   somewhere at the left side should exist first directory
 688                                   item. But the item being deleted can not be that first
 689                                   one because its right neighbor is item of the same
 690                                   directory. (But first item always gets deleted in last
 691                                   turn). So, neighbors of deleted item can be merged, so
 692                                   we can save ih_size */
 693                                ih_size = IH_SIZE;
 694
 695                                /* we might check that left neighbor exists and is of the
 696                                   same directory */
 697                                RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
 698                                       "vs-8130: first directory item can not be removed until directory is not empty");
 699                        }
 700
 701        }
 702
 703        if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) {
 704                set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1);
 705                PROC_INFO_INC(tb->tb_sb, leaves_removable);
 706                return 1;
 707        }
 708        return 0;
 709
 710}
 711
 712/* when we do not split item, lnum and rnum are numbers of entire items */
 713#define SET_PAR_SHIFT_LEFT \
 714if (h)\
 715{\
 716   int to_l;\
 717   \
 718   to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\
 719              (MAX_NR_KEY(Sh) + 1 - lpar);\
 720              \
 721              set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\
 722}\
 723else \
 724{\
 725   if (lset==LEFT_SHIFT_FLOW)\
 726     set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\
 727                     tb->lbytes, -1);\
 728   else\
 729     set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\
 730                     -1, -1);\
 731}
 732
 733#define SET_PAR_SHIFT_RIGHT \
 734if (h)\
 735{\
 736   int to_r;\
 737   \
 738   to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\
 739   \
 740   set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\
 741}\
 742else \
 743{\
 744   if (rset==RIGHT_SHIFT_FLOW)\
 745     set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\
 746                  -1, tb->rbytes);\
 747   else\
 748     set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\
 749                  -1, -1);\
 750}
 751
 752static void free_buffers_in_tb(struct tree_balance *p_s_tb)
 753{
 754        int n_counter;
 755
 756        decrement_counters_in_path(p_s_tb->tb_path);
 757
 758        for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) {
 759                decrement_bcount(p_s_tb->L[n_counter]);
 760                p_s_tb->L[n_counter] = NULL;
 761                decrement_bcount(p_s_tb->R[n_counter]);
 762                p_s_tb->R[n_counter] = NULL;
 763                decrement_bcount(p_s_tb->FL[n_counter]);
 764                p_s_tb->FL[n_counter] = NULL;
 765                decrement_bcount(p_s_tb->FR[n_counter]);
 766                p_s_tb->FR[n_counter] = NULL;
 767                decrement_bcount(p_s_tb->CFL[n_counter]);
 768                p_s_tb->CFL[n_counter] = NULL;
 769                decrement_bcount(p_s_tb->CFR[n_counter]);
 770                p_s_tb->CFR[n_counter] = NULL;
 771        }
 772}
 773
 774/* Get new buffers for storing new nodes that are created while balancing.
 775 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
 776 *              CARRY_ON - schedule didn't occur while the function worked;
 777 *              NO_DISK_SPACE - no disk space.
 778 */
 779/* The function is NOT SCHEDULE-SAFE! */
 780static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h)
 781{
 782        struct buffer_head *p_s_new_bh,
 783            *p_s_Sh = PATH_H_PBUFFER(p_s_tb->tb_path, n_h);
 784        b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
 785        int n_counter, n_number_of_freeblk, n_amount_needed,    /* number of needed empty blocks */
 786         n_retval = CARRY_ON;
 787        struct super_block *p_s_sb = p_s_tb->tb_sb;
 788
 789        /* number_of_freeblk is the number of empty blocks which have been
 790           acquired for use by the balancing algorithm minus the number of
 791           empty blocks used in the previous levels of the analysis,
 792           number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
 793           after empty blocks are acquired, and the balancing analysis is
 794           then restarted, amount_needed is the number needed by this level
 795           (n_h) of the balancing analysis.
 796
 797           Note that for systems with many processes writing, it would be
 798           more layout optimal to calculate the total number needed by all
 799           levels and then to run reiserfs_new_blocks to get all of them at once.  */
 800
 801        /* Initiate number_of_freeblk to the amount acquired prior to the restart of
 802           the analysis or 0 if not restarted, then subtract the amount needed
 803           by all of the levels of the tree below n_h. */
 804        /* blknum includes S[n_h], so we subtract 1 in this calculation */
 805        for (n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum;
 806             n_counter < n_h; n_counter++)
 807                n_number_of_freeblk -=
 808                    (p_s_tb->blknum[n_counter]) ? (p_s_tb->blknum[n_counter] -
 809                                                   1) : 0;
 810
 811        /* Allocate missing empty blocks. */
 812        /* if p_s_Sh == 0  then we are getting a new root */
 813        n_amount_needed = (p_s_Sh) ? (p_s_tb->blknum[n_h] - 1) : 1;
 814        /*  Amount_needed = the amount that we need more than the amount that we have. */
 815        if (n_amount_needed > n_number_of_freeblk)
 816                n_amount_needed -= n_number_of_freeblk;
 817        else                    /* If we have enough already then there is nothing to do. */
 818                return CARRY_ON;
 819
 820        /* No need to check quota - is not allocated for blocks used for formatted nodes */
 821        if (reiserfs_new_form_blocknrs(p_s_tb, a_n_blocknrs,
 822                                       n_amount_needed) == NO_DISK_SPACE)
 823                return NO_DISK_SPACE;
 824
 825        /* for each blocknumber we just got, get a buffer and stick it on FEB */
 826        for (p_n_blocknr = a_n_blocknrs, n_counter = 0;
 827             n_counter < n_amount_needed; p_n_blocknr++, n_counter++) {
 828
 829                RFALSE(!*p_n_blocknr,
 830                       "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
 831
 832                p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr);
 833                RFALSE(buffer_dirty(p_s_new_bh) ||
 834                       buffer_journaled(p_s_new_bh) ||
 835                       buffer_journal_dirty(p_s_new_bh),
 836                       "PAP-8140: journlaled or dirty buffer %b for the new block",
 837                       p_s_new_bh);
 838
 839                /* Put empty buffers into the array. */
 840                RFALSE(p_s_tb->FEB[p_s_tb->cur_blknum],
 841                       "PAP-8141: busy slot for new buffer");
 842
 843                set_buffer_journal_new(p_s_new_bh);
 844                p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh;
 845        }
 846
 847        if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(p_s_tb))
 848                n_retval = REPEAT_SEARCH;
 849
 850        return n_retval;
 851}
 852
 853/* Get free space of the left neighbor, which is stored in the parent
 854 * node of the left neighbor.  */
 855static int get_lfree(struct tree_balance *tb, int h)
 856{
 857        struct buffer_head *l, *f;
 858        int order;
 859
 860        if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
 861            (l = tb->FL[h]) == NULL)
 862                return 0;
 863
 864        if (f == l)
 865                order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1;
 866        else {
 867                order = B_NR_ITEMS(l);
 868                f = l;
 869        }
 870
 871        return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
 872}
 873
 874/* Get free space of the right neighbor,
 875 * which is stored in the parent node of the right neighbor.
 876 */
 877static int get_rfree(struct tree_balance *tb, int h)
 878{
 879        struct buffer_head *r, *f;
 880        int order;
 881
 882        if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
 883            (r = tb->FR[h]) == NULL)
 884                return 0;
 885
 886        if (f == r)
 887                order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1;
 888        else {
 889                order = 0;
 890                f = r;
 891        }
 892
 893        return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
 894
 895}
 896
 897/* Check whether left neighbor is in memory. */
 898static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h)
 899{
 900        struct buffer_head *p_s_father, *left;
 901        struct super_block *p_s_sb = p_s_tb->tb_sb;
 902        b_blocknr_t n_left_neighbor_blocknr;
 903        int n_left_neighbor_position;
 904
 905        if (!p_s_tb->FL[n_h])   /* Father of the left neighbor does not exist. */
 906                return 0;
 907
 908        /* Calculate father of the node to be balanced. */
 909        p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1);
 910
 911        RFALSE(!p_s_father ||
 912               !B_IS_IN_TREE(p_s_father) ||
 913               !B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
 914               !buffer_uptodate(p_s_father) ||
 915               !buffer_uptodate(p_s_tb->FL[n_h]),
 916               "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
 917               p_s_father, p_s_tb->FL[n_h]);
 918
 919        /* Get position of the pointer to the left neighbor into the left father. */
 920        n_left_neighbor_position = (p_s_father == p_s_tb->FL[n_h]) ?
 921            p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->FL[n_h]);
 922        /* Get left neighbor block number. */
 923        n_left_neighbor_blocknr =
 924            B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position);
 925        /* Look for the left neighbor in the cache. */
 926        if ((left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr))) {
 927
 928                RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
 929                       "vs-8170: left neighbor (%b %z) is not in the tree",
 930                       left, left);
 931                put_bh(left);
 932                return 1;
 933        }
 934
 935        return 0;
 936}
 937
 938#define LEFT_PARENTS  'l'
 939#define RIGHT_PARENTS 'r'
 940
 941static void decrement_key(struct cpu_key *p_s_key)
 942{
 943        // call item specific function for this key
 944        item_ops[cpu_key_k_type(p_s_key)]->decrement_key(p_s_key);
 945}
 946
 947/* Calculate far left/right parent of the left/right neighbor of the current node, that
 948 * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
 949 * Calculate left/right common parent of the current node and L[h]/R[h].
 950 * Calculate left/right delimiting key position.
 951 * Returns:     PATH_INCORRECT   - path in the tree is not correct;
 952                SCHEDULE_OCCURRED - schedule occurred while the function worked;
 953 *              CARRY_ON         - schedule didn't occur while the function worked;
 954 */
 955static int get_far_parent(struct tree_balance *p_s_tb,
 956                          int n_h,
 957                          struct buffer_head **pp_s_father,
 958                          struct buffer_head **pp_s_com_father, char c_lr_par)
 959{
 960        struct buffer_head *p_s_parent;
 961        INITIALIZE_PATH(s_path_to_neighbor_father);
 962        struct treepath *p_s_path = p_s_tb->tb_path;
 963        struct cpu_key s_lr_father_key;
 964        int n_counter,
 965            n_position = INT_MAX,
 966            n_first_last_position = 0,
 967            n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h);
 968
 969        /* Starting from F[n_h] go upwards in the tree, and look for the common
 970           ancestor of F[n_h], and its neighbor l/r, that should be obtained. */
 971
 972        n_counter = n_path_offset;
 973
 974        RFALSE(n_counter < FIRST_PATH_ELEMENT_OFFSET,
 975               "PAP-8180: invalid path length");
 976
 977        for (; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--) {
 978                /* Check whether parent of the current buffer in the path is really parent in the tree. */
 979                if (!B_IS_IN_TREE
 980                    (p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)))
 981                        return REPEAT_SEARCH;
 982                /* Check whether position in the parent is correct. */
 983                if ((n_position =
 984                     PATH_OFFSET_POSITION(p_s_path,
 985                                          n_counter - 1)) >
 986                    B_NR_ITEMS(p_s_parent))
 987                        return REPEAT_SEARCH;
 988                /* Check whether parent at the path really points to the child. */
 989                if (B_N_CHILD_NUM(p_s_parent, n_position) !=
 990                    PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr)
 991                        return REPEAT_SEARCH;
 992                /* Return delimiting key if position in the parent is not equal to first/last one. */
 993                if (c_lr_par == RIGHT_PARENTS)
 994                        n_first_last_position = B_NR_ITEMS(p_s_parent);
 995                if (n_position != n_first_last_position) {
 996                        *pp_s_com_father = p_s_parent;
 997                        get_bh(*pp_s_com_father);
 998                        /*(*pp_s_com_father = p_s_parent)->b_count++; */
 999                        break;
1000                }
1001        }
1002
1003        /* if we are in the root of the tree, then there is no common father */
1004        if (n_counter == FIRST_PATH_ELEMENT_OFFSET) {
1005                /* Check whether first buffer in the path is the root of the tree. */
1006                if (PATH_OFFSET_PBUFFER
1007                    (p_s_tb->tb_path,
1008                     FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1009                    SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
1010                        *pp_s_father = *pp_s_com_father = NULL;
1011                        return CARRY_ON;
1012                }
1013                return REPEAT_SEARCH;
1014        }
1015
1016        RFALSE(B_LEVEL(*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL,
1017               "PAP-8185: (%b %z) level too small",
1018               *pp_s_com_father, *pp_s_com_father);
1019
1020        /* Check whether the common parent is locked. */
1021
1022        if (buffer_locked(*pp_s_com_father)) {
1023                __wait_on_buffer(*pp_s_com_father);
1024                if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1025                        decrement_bcount(*pp_s_com_father);
1026                        return REPEAT_SEARCH;
1027                }
1028        }
1029
1030        /* So, we got common parent of the current node and its left/right neighbor.
1031           Now we are geting the parent of the left/right neighbor. */
1032
1033        /* Form key to get parent of the left/right neighbor. */
1034        le_key2cpu_key(&s_lr_father_key,
1035                       B_N_PDELIM_KEY(*pp_s_com_father,
1036                                      (c_lr_par ==
1037                                       LEFT_PARENTS) ? (p_s_tb->lkey[n_h - 1] =
1038                                                        n_position -
1039                                                        1) : (p_s_tb->rkey[n_h -
1040                                                                           1] =
1041                                                              n_position)));
1042
1043        if (c_lr_par == LEFT_PARENTS)
1044                decrement_key(&s_lr_father_key);
1045
1046        if (search_by_key
1047            (p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
1048             n_h + 1) == IO_ERROR)
1049                // path is released
1050                return IO_ERROR;
1051
1052        if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1053                decrement_counters_in_path(&s_path_to_neighbor_father);
1054                decrement_bcount(*pp_s_com_father);
1055                return REPEAT_SEARCH;
1056        }
1057
1058        *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
1059
1060        RFALSE(B_LEVEL(*pp_s_father) != n_h + 1,
1061               "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father);
1062        RFALSE(s_path_to_neighbor_father.path_length <
1063               FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
1064
1065        s_path_to_neighbor_father.path_length--;
1066        decrement_counters_in_path(&s_path_to_neighbor_father);
1067        return CARRY_ON;
1068}
1069
1070/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of
1071 * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset],
1072 * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset].
1073 * Calculate numbers of left and right delimiting keys position: lkey[n_path_offset], rkey[n_path_offset].
1074 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
1075 *              CARRY_ON - schedule didn't occur while the function worked;
1076 */
1077static int get_parents(struct tree_balance *p_s_tb, int n_h)
1078{
1079        struct treepath *p_s_path = p_s_tb->tb_path;
1080        int n_position,
1081            n_ret_value,
1082            n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1083        struct buffer_head *p_s_curf, *p_s_curcf;
1084
1085        /* Current node is the root of the tree or will be root of the tree */
1086        if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1087                /* The root can not have parents.
1088                   Release nodes which previously were obtained as parents of the current node neighbors. */
1089                decrement_bcount(p_s_tb->FL[n_h]);
1090                decrement_bcount(p_s_tb->CFL[n_h]);
1091                decrement_bcount(p_s_tb->FR[n_h]);
1092                decrement_bcount(p_s_tb->CFR[n_h]);
1093                p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] =
1094                    p_s_tb->CFR[n_h] = NULL;
1095                return CARRY_ON;
1096        }
1097
1098        /* Get parent FL[n_path_offset] of L[n_path_offset]. */
1099        if ((n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1))) {
1100                /* Current node is not the first child of its parent. */
1101                /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
1102                p_s_curf = p_s_curcf =
1103                    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1104                get_bh(p_s_curf);
1105                get_bh(p_s_curf);
1106                p_s_tb->lkey[n_h] = n_position - 1;
1107        } else {
1108                /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node.
1109                   Calculate current common parent of L[n_path_offset] and the current node. Note that
1110                   CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset].
1111                   Calculate lkey[n_path_offset]. */
1112                if ((n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,
1113                                                  &p_s_curcf,
1114                                                  LEFT_PARENTS)) != CARRY_ON)
1115                        return n_ret_value;
1116        }
1117
1118        decrement_bcount(p_s_tb->FL[n_h]);
1119        p_s_tb->FL[n_h] = p_s_curf;     /* New initialization of FL[n_h]. */
1120        decrement_bcount(p_s_tb->CFL[n_h]);
1121        p_s_tb->CFL[n_h] = p_s_curcf;   /* New initialization of CFL[n_h]. */
1122
1123        RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
1124               (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
1125               "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf);
1126
1127/* Get parent FR[n_h] of R[n_h]. */
1128
1129/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */
1130        if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(p_s_path, n_h + 1))) {
1131/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h].
1132   Calculate current common parent of R[n_h] and current node. Note that CFR[n_h]
1133   not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */
1134                if ((n_ret_value =
1135                     get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf,
1136                                    RIGHT_PARENTS)) != CARRY_ON)
1137                        return n_ret_value;
1138        } else {
1139/* Current node is not the last child of its parent F[n_h]. */
1140                /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
1141                p_s_curf = p_s_curcf =
1142                    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1143                get_bh(p_s_curf);
1144                get_bh(p_s_curf);
1145                p_s_tb->rkey[n_h] = n_position;
1146        }
1147
1148        decrement_bcount(p_s_tb->FR[n_h]);
1149        p_s_tb->FR[n_h] = p_s_curf;     /* New initialization of FR[n_path_offset]. */
1150
1151        decrement_bcount(p_s_tb->CFR[n_h]);
1152        p_s_tb->CFR[n_h] = p_s_curcf;   /* New initialization of CFR[n_path_offset]. */
1153
1154        RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
1155               (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
1156               "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf);
1157
1158        return CARRY_ON;
1159}
1160
1161/* it is possible to remove node as result of shiftings to
1162   neighbors even when we insert or paste item. */
1163static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1164                                      struct tree_balance *tb, int h)
1165{
1166        struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h);
1167        int levbytes = tb->insert_size[h];
1168        struct item_head *ih;
1169        struct reiserfs_key *r_key = NULL;
1170
1171        ih = B_N_PITEM_HEAD(Sh, 0);
1172        if (tb->CFR[h])
1173                r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]);
1174
1175        if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
1176            /* shifting may merge items which might save space */
1177            -
1178            ((!h
1179              && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0)
1180            -
1181            ((!h && r_key
1182              && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
1183            + ((h) ? KEY_SIZE : 0)) {
1184                /* node can not be removed */
1185                if (sfree >= levbytes) {        /* new item fits into node S[h] without any shifting */
1186                        if (!h)
1187                                tb->s0num =
1188                                    B_NR_ITEMS(Sh) +
1189                                    ((mode == M_INSERT) ? 1 : 0);
1190                        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1191                        return NO_BALANCING_NEEDED;
1192                }
1193        }
1194        PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]);
1195        return !NO_BALANCING_NEEDED;
1196}
1197
1198/* Check whether current node S[h] is balanced when increasing its size by
1199 * Inserting or Pasting.
1200 * Calculate parameters for balancing for current level h.
1201 * Parameters:
1202 *      tb      tree_balance structure;
1203 *      h       current level of the node;
1204 *      inum    item number in S[h];
1205 *      mode    i - insert, p - paste;
1206 * Returns:     1 - schedule occurred; 
1207 *              0 - balancing for higher levels needed;
1208 *             -1 - no balancing for higher levels needed;
1209 *             -2 - no disk space.
1210 */
1211/* ip means Inserting or Pasting */
1212static int ip_check_balance(struct tree_balance *tb, int h)
1213{
1214        struct virtual_node *vn = tb->tb_vn;
1215        int levbytes,           /* Number of bytes that must be inserted into (value
1216                                   is negative if bytes are deleted) buffer which
1217                                   contains node being balanced.  The mnemonic is
1218                                   that the attempted change in node space used level
1219                                   is levbytes bytes. */
1220         n_ret_value;
1221
1222        int lfree, sfree, rfree /* free space in L, S and R */ ;
1223
1224        /* nver is short for number of vertixes, and lnver is the number if
1225           we shift to the left, rnver is the number if we shift to the
1226           right, and lrnver is the number if we shift in both directions.
1227           The goal is to minimize first the number of vertixes, and second,
1228           the number of vertixes whose contents are changed by shifting,
1229           and third the number of uncached vertixes whose contents are
1230           changed by shifting and must be read from disk.  */
1231        int nver, lnver, rnver, lrnver;
1232
1233        /* used at leaf level only, S0 = S[0] is the node being balanced,
1234           sInum [ I = 0,1,2 ] is the number of items that will
1235           remain in node SI after balancing.  S1 and S2 are new
1236           nodes that might be created. */
1237
1238        /* we perform 8 calls to get_num_ver().  For each call we calculate five parameters.
1239           where 4th parameter is s1bytes and 5th - s2bytes
1240         */
1241        short snum012[40] = { 0, };     /* s0num, s1num, s2num for 8 cases 
1242                                           0,1 - do not shift and do not shift but bottle
1243                                           2 - shift only whole item to left
1244                                           3 - shift to left and bottle as much as possible
1245                                           4,5 - shift to right (whole items and as much as possible
1246                                           6,7 - shift to both directions (whole items and as much as possible)
1247                                         */
1248
1249        /* Sh is the node whose balance is currently being checked */
1250        struct buffer_head *Sh;
1251
1252        Sh = PATH_H_PBUFFER(tb->tb_path, h);
1253        levbytes = tb->insert_size[h];
1254
1255        /* Calculate balance parameters for creating new root. */
1256        if (!Sh) {
1257                if (!h)
1258                        reiserfs_panic(tb->tb_sb,
1259                                       "vs-8210: ip_check_balance: S[0] can not be 0");
1260                switch (n_ret_value = get_empty_nodes(tb, h)) {
1261                case CARRY_ON:
1262                        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1263                        return NO_BALANCING_NEEDED;     /* no balancing for higher levels needed */
1264
1265                case NO_DISK_SPACE:
1266                case REPEAT_SEARCH:
1267                        return n_ret_value;
1268                default:
1269                        reiserfs_panic(tb->tb_sb,
1270                                       "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes");
1271                }
1272        }
1273
1274        if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)     /* get parents of S[h] neighbors. */
1275                return n_ret_value;
1276
1277        sfree = B_FREE_SPACE(Sh);
1278
1279        /* get free space of neighbors */
1280        rfree = get_rfree(tb, h);
1281        lfree = get_lfree(tb, h);
1282
1283        if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
1284            NO_BALANCING_NEEDED)
1285                /* and new item fits into node S[h] without any shifting */
1286                return NO_BALANCING_NEEDED;
1287
1288        create_virtual_node(tb, h);
1289
1290        /*  
1291           determine maximal number of items we can shift to the left neighbor (in tb structure)
1292           and the maximal number of bytes that can flow to the left neighbor
1293           from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
1294         */
1295        check_left(tb, h, lfree);
1296
1297        /*
1298           determine maximal number of items we can shift to the right neighbor (in tb structure)
1299           and the maximal number of bytes that can flow to the right neighbor
1300           from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
1301         */
1302        check_right(tb, h, rfree);
1303
1304        /* all contents of internal node S[h] can be moved into its
1305           neighbors, S[h] will be removed after balancing */
1306        if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1307                int to_r;
1308
1309                /* Since we are working on internal nodes, and our internal
1310                   nodes have fixed size entries, then we can balance by the
1311                   number of items rather than the space they consume.  In this
1312                   routine we set the left node equal to the right node,
1313                   allowing a difference of less than or equal to 1 child
1314                   pointer. */
1315                to_r =
1316                    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1317                     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1318                                                tb->rnum[h]);
1319                set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1320                               -1, -1);
1321                return CARRY_ON;
1322        }
1323
1324        /* this checks balance condition, that any two neighboring nodes can not fit in one node */
1325        RFALSE(h &&
1326               (tb->lnum[h] >= vn->vn_nr_item + 1 ||
1327                tb->rnum[h] >= vn->vn_nr_item + 1),
1328               "vs-8220: tree is not balanced on internal level");
1329        RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1330                      (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
1331               "vs-8225: tree is not balanced on leaf level");
1332
1333        /* all contents of S[0] can be moved into its neighbors
1334           S[0] will be removed after balancing. */
1335        if (!h && is_leaf_removable(tb))
1336                return CARRY_ON;
1337
1338        /* why do we perform this check here rather than earlier??
1339           Answer: we can win 1 node in some cases above. Moreover we
1340           checked it above, when we checked, that S[0] is not removable
1341           in principle */
1342        if (sfree >= levbytes) {        /* new item fits into node S[h] without any shifting */
1343                if (!h)
1344                        tb->s0num = vn->vn_nr_item;
1345                set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1346                return NO_BALANCING_NEEDED;
1347        }
1348
1349        {
1350                int lpar, rpar, nset, lset, rset, lrset;
1351                /* 
1352                 * regular overflowing of the node
1353                 */
1354
1355                /* get_num_ver works in 2 modes (FLOW & NO_FLOW) 
1356                   lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
1357                   nset, lset, rset, lrset - shows, whether flowing items give better packing 
1358                 */
1359#define FLOW 1
1360#define NO_FLOW 0               /* do not any splitting */
1361
1362                /* we choose one the following */
1363#define NOTHING_SHIFT_NO_FLOW   0
1364#define NOTHING_SHIFT_FLOW      5
1365#define LEFT_SHIFT_NO_FLOW      10
1366#define LEFT_SHIFT_FLOW         15
1367#define RIGHT_SHIFT_NO_FLOW     20
1368#define RIGHT_SHIFT_FLOW        25
1369#define LR_SHIFT_NO_FLOW        30
1370#define LR_SHIFT_FLOW           35
1371
1372                lpar = tb->lnum[h];
1373                rpar = tb->rnum[h];
1374
1375                /* calculate number of blocks S[h] must be split into when
1376                   nothing is shifted to the neighbors,
1377                   as well as number of items in each part of the split node (s012 numbers),
1378                   and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
1379                nset = NOTHING_SHIFT_NO_FLOW;
1380                nver = get_num_ver(vn->vn_mode, tb, h,
1381                                   0, -1, h ? vn->vn_nr_item : 0, -1,
1382                                   snum012, NO_FLOW);
1383
1384                if (!h) {
1385                        int nver1;
1386
1387                        /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
1388                        nver1 = get_num_ver(vn->vn_mode, tb, h,
1389                                            0, -1, 0, -1,
1390                                            snum012 + NOTHING_SHIFT_FLOW, FLOW);
1391                        if (nver > nver1)
1392                                nset = NOTHING_SHIFT_FLOW, nver = nver1;
1393                }
1394
1395                /* calculate number of blocks S[h] must be split into when
1396                   l_shift_num first items and l_shift_bytes of the right most
1397                   liquid item to be shifted are shifted to the left neighbor,
1398                   as well as number of items in each part of the splitted node (s012 numbers),
1399                   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1400                 */
1401                lset = LEFT_SHIFT_NO_FLOW;
1402                lnver = get_num_ver(vn->vn_mode, tb, h,
1403                                    lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1404                                    -1, h ? vn->vn_nr_item : 0, -1,
1405                                    snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1406                if (!h) {
1407                        int lnver1;
1408
1409                        lnver1 = get_num_ver(vn->vn_mode, tb, h,
1410                                             lpar -
1411                                             ((tb->lbytes != -1) ? 1 : 0),
1412                                             tb->lbytes, 0, -1,
1413                                             snum012 + LEFT_SHIFT_FLOW, FLOW);
1414                        if (lnver > lnver1)
1415                                lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1416                }
1417
1418                /* calculate number of blocks S[h] must be split into when
1419                   r_shift_num first items and r_shift_bytes of the left most
1420                   liquid item to be shifted are shifted to the right neighbor,
1421                   as well as number of items in each part of the splitted node (s012 numbers),
1422                   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1423                 */
1424                rset = RIGHT_SHIFT_NO_FLOW;
1425                rnver = get_num_ver(vn->vn_mode, tb, h,
1426                                    0, -1,
1427                                    h ? (vn->vn_nr_item - rpar) : (rpar -
1428                                                                   ((tb->
1429                                                                     rbytes !=
1430                                                                     -1) ? 1 :
1431                                                                    0)), -1,
1432                                    snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1433                if (!h) {
1434                        int rnver1;
1435
1436                        rnver1 = get_num_ver(vn->vn_mode, tb, h,
1437                                             0, -1,
1438                                             (rpar -
1439                                              ((tb->rbytes != -1) ? 1 : 0)),
1440                                             tb->rbytes,
1441                                             snum012 + RIGHT_SHIFT_FLOW, FLOW);
1442
1443                        if (rnver > rnver1)
1444                                rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1445                }
1446
1447                /* calculate number of blocks S[h] must be split into when
1448                   items are shifted in both directions,
1449                   as well as number of items in each part of the splitted node (s012 numbers),
1450                   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1451                 */
1452                lrset = LR_SHIFT_NO_FLOW;
1453                lrnver = get_num_ver(vn->vn_mode, tb, h,
1454                                     lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1455                                     -1,
1456                                     h ? (vn->vn_nr_item - rpar) : (rpar -
1457                                                                    ((tb->
1458                                                                      rbytes !=
1459                                                                      -1) ? 1 :
1460                                                                     0)), -1,
1461                                     snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1462                if (!h) {
1463                        int lrnver1;
1464
1465                        lrnver1 = get_num_ver(vn->vn_mode, tb, h,
1466                                              lpar -
1467                                              ((tb->lbytes != -1) ? 1 : 0),
1468                                              tb->lbytes,
1469                                              (rpar -
1470                                               ((tb->rbytes != -1) ? 1 : 0)),
1471                                              tb->rbytes,
1472                                              snum012 + LR_SHIFT_FLOW, FLOW);
1473                        if (lrnver > lrnver1)
1474                                lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1475                }
1476
1477                /* Our general shifting strategy is:
1478                   1) to minimized number of new nodes;
1479                   2) to minimized number of neighbors involved in shifting;
1480                   3) to minimized number of disk reads; */
1481
1482                /* we can win TWO or ONE nodes by shifting in both directions */
1483                if (lrnver < lnver && lrnver < rnver) {
1484                        RFALSE(h &&
1485                               (tb->lnum[h] != 1 ||
1486                                tb->rnum[h] != 1 ||
1487                                lrnver != 1 || rnver != 2 || lnver != 2
1488                                || h != 1), "vs-8230: bad h");
1489                        if (lrset == LR_SHIFT_FLOW)
1490                                set_parameters(tb, h, tb->lnum[h], tb->rnum[h],
1491                                               lrnver, snum012 + lrset,
1492                                               tb->lbytes, tb->rbytes);
1493                        else
1494                                set_parameters(tb, h,
1495                                               tb->lnum[h] -
1496                                               ((tb->lbytes == -1) ? 0 : 1),
1497                                               tb->rnum[h] -
1498                                               ((tb->rbytes == -1) ? 0 : 1),
1499                                               lrnver, snum012 + lrset, -1, -1);
1500
1501                        return CARRY_ON;
1502                }
1503
1504                /* if shifting doesn't lead to better packing then don't shift */
1505                if (nver == lrnver) {
1506                        set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
1507                                       -1);
1508                        return CARRY_ON;
1509                }
1510
1511                /* now we know that for better packing shifting in only one
1512                   direction either to the left or to the right is required */
1513
1514                /*  if shifting to the left is better than shifting to the right */
1515                if (lnver < rnver) {
1516                        SET_PAR_SHIFT_LEFT;
1517                        return CARRY_ON;
1518                }
1519
1520                /* if shifting to the right is better than shifting to the left */
1521                if (lnver > rnver) {
1522                        SET_PAR_SHIFT_RIGHT;
1523                        return CARRY_ON;
1524                }
1525
1526                /* now shifting in either direction gives the same number
1527                   of nodes and we can make use of the cached neighbors */
1528                if (is_left_neighbor_in_cache(tb, h)) {
1529                        SET_PAR_SHIFT_LEFT;
1530                        return CARRY_ON;
1531                }
1532
1533                /* shift to the right independently on whether the right neighbor in cache or not */
1534                SET_PAR_SHIFT_RIGHT;
1535                return CARRY_ON;
1536        }
1537}
1538
1539/* Check whether current node S[h] is balanced when Decreasing its size by
1540 * Deleting or Cutting for INTERNAL node of S+tree.
1541 * Calculate parameters for balancing for current level h.
1542 * Parameters:
1543 *      tb      tree_balance structure;
1544 *      h       current level of the node;
1545 *      inum    item number in S[h];
1546 *      mode    i - insert, p - paste;
1547 * Returns:     1 - schedule occurred; 
1548 *              0 - balancing for higher levels needed;
1549 *             -1 - no balancing for higher levels needed;
1550 *             -2 - no disk space.
1551 *
1552 * Note: Items of internal nodes have fixed size, so the balance condition for
1553 * the internal part of S+tree is as for the B-trees.
1554 */
1555static int dc_check_balance_internal(struct tree_balance *tb, int h)
1556{
1557        struct virtual_node *vn = tb->tb_vn;
1558
1559        /* Sh is the node whose balance is currently being checked,
1560           and Fh is its father.  */
1561        struct buffer_head *Sh, *Fh;
1562        int maxsize, n_ret_value;
1563        int lfree, rfree /* free space in L and R */ ;
1564
1565        Sh = PATH_H_PBUFFER(tb->tb_path, h);
1566        Fh = PATH_H_PPARENT(tb->tb_path, h);
1567
1568        maxsize = MAX_CHILD_SIZE(Sh);
1569
1570/*   using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
1571/*   new_nr_item = number of items node would have if operation is */
1572/*      performed without balancing (new_nr_item); */
1573        create_virtual_node(tb, h);
1574
1575        if (!Fh) {              /* S[h] is the root. */
1576                if (vn->vn_nr_item > 0) {
1577                        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1578                        return NO_BALANCING_NEEDED;     /* no balancing for higher levels needed */
1579                }
1580                /* new_nr_item == 0.
1581                 * Current root will be deleted resulting in
1582                 * decrementing the tree height. */
1583                set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
1584                return CARRY_ON;
1585        }
1586
1587        if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
1588                return n_ret_value;
1589
1590        /* get free space of neighbors */
1591        rfree = get_rfree(tb, h);
1592        lfree = get_lfree(tb, h);
1593
1594        /* determine maximal number of items we can fit into neighbors */
1595        check_left(tb, h, lfree);
1596        check_right(tb, h, rfree);
1597
1598        if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid.
1599                                                 * In this case we balance only if it leads to better packing. */
1600                if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors,
1601                                                         * which is impossible with greater values of new_nr_item. */
1602                        if (tb->lnum[h] >= vn->vn_nr_item + 1) {
1603                                /* All contents of S[h] can be moved to L[h]. */
1604                                int n;
1605                                int order_L;
1606
1607                                order_L =
1608                                    ((n =
1609                                      PATH_H_B_ITEM_ORDER(tb->tb_path,
1610                                                          h)) ==
1611                                     0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1612                                n = dc_size(B_N_CHILD(tb->FL[h], order_L)) /
1613                                    (DC_SIZE + KEY_SIZE);
1614                                set_parameters(tb, h, -n - 1, 0, 0, NULL, -1,
1615                                               -1);
1616                                return CARRY_ON;
1617                        }
1618
1619                        if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1620                                /* All contents of S[h] can be moved to R[h]. */
1621                                int n;
1622                                int order_R;
1623
1624                                order_R =
1625                                    ((n =
1626                                      PATH_H_B_ITEM_ORDER(tb->tb_path,
1627                                                          h)) ==
1628                                     B_NR_ITEMS(Fh)) ? 0 : n + 1;
1629                                n = dc_size(B_N_CHILD(tb->FR[h], order_R)) /
1630                                    (DC_SIZE + KEY_SIZE);
1631                                set_parameters(tb, h, 0, -n - 1, 0, NULL, -1,
1632                                               -1);
1633                                return CARRY_ON;
1634                        }
1635                }
1636
1637                if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1638                        /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1639                        int to_r;
1640
1641                        to_r =
1642                            ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] -
1643                             tb->rnum[h] + vn->vn_nr_item + 1) / 2 -
1644                            (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1645                        set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r,
1646                                       0, NULL, -1, -1);
1647                        return CARRY_ON;
1648                }
1649
1650                /* Balancing does not lead to better packing. */
1651                set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1652                return NO_BALANCING_NEEDED;
1653        }
1654
1655        /* Current node contain insufficient number of items. Balancing is required. */
1656        /* Check whether we can merge S[h] with left neighbor. */
1657        if (tb->lnum[h] >= vn->vn_nr_item + 1)
1658                if (is_left_neighbor_in_cache(tb, h)
1659                    || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) {
1660                        int n;
1661                        int order_L;
1662
1663                        order_L =
1664                            ((n =
1665                              PATH_H_B_ITEM_ORDER(tb->tb_path,
1666                                                  h)) ==
1667                             0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1668                        n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE +
1669                                                                      KEY_SIZE);
1670                        set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1);
1671                        return CARRY_ON;
1672                }
1673
1674        /* Check whether we can merge S[h] with right neighbor. */
1675        if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1676                int n;
1677                int order_R;
1678
1679                order_R =
1680                    ((n =
1681                      PATH_H_B_ITEM_ORDER(tb->tb_path,
1682                                          h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1683                n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE +
1684                                                              KEY_SIZE);
1685                set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1);
1686                return CARRY_ON;
1687        }
1688
1689        /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1690        if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1691                int to_r;
1692
1693                to_r =
1694                    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1695                     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1696                                                tb->rnum[h]);
1697                set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1698                               -1, -1);
1699                return CARRY_ON;
1700        }
1701
1702        /* For internal nodes try to borrow item from a neighbor */
1703        RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
1704
1705        /* Borrow one or two items from caching neighbor */
1706        if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) {
1707                int from_l;
1708
1709                from_l =
1710                    (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item +
1711                     1) / 2 - (vn->vn_nr_item + 1);
1712                set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1);
1713                return CARRY_ON;
1714        }
1715
1716        set_parameters(tb, h, 0,
1717                       -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item +
1718                          1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1);
1719        return CARRY_ON;
1720}
1721
1722/* Check whether current node S[h] is balanced when Decreasing its size by
1723 * Deleting or Truncating for LEAF node of S+tree.
1724 * Calculate parameters for balancing for current level h.
1725 * Parameters:
1726 *      tb      tree_balance structure;
1727 *      h       current level of the node;
1728 *      inum    item number in S[h];
1729 *      mode    i - insert, p - paste;
1730 * Returns:     1 - schedule occurred; 
1731 *              0 - balancing for higher levels needed;
1732 *             -1 - no balancing for higher levels needed;
1733 *             -2 - no disk space.
1734 */
1735static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1736{
1737        struct virtual_node *vn = tb->tb_vn;
1738
1739        /* Number of bytes that must be deleted from
1740           (value is negative if bytes are deleted) buffer which
1741           contains node being balanced.  The mnemonic is that the
1742           attempted change in node space used level is levbytes bytes. */
1743        int levbytes;
1744        /* the maximal item size */
1745        int maxsize, n_ret_value;
1746        /* S0 is the node whose balance is currently being checked,
1747           and F0 is its father.  */
1748        struct buffer_head *S0, *F0;
1749        int lfree, rfree /* free space in L and R */ ;
1750
1751        S0 = PATH_H_PBUFFER(tb->tb_path, 0);
1752        F0 = PATH_H_PPARENT(tb->tb_path, 0);
1753
1754        levbytes = tb->insert_size[h];
1755
1756        maxsize = MAX_CHILD_SIZE(S0);   /* maximal possible size of an item */
1757
1758        if (!F0) {              /* S[0] is the root now. */
1759
1760                RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0),
1761                       "vs-8240: attempt to create empty buffer tree");
1762
1763                set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1764                return NO_BALANCING_NEEDED;
1765        }
1766
1767        if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
1768                return n_ret_value;
1769
1770        /* get free space of neighbors */
1771        rfree = get_rfree(tb, h);
1772        lfree = get_lfree(tb, h);
1773
1774        create_virtual_node(tb, h);
1775
1776        /* if 3 leaves can be merge to one, set parameters and return */
1777        if (are_leaves_removable(tb, lfree, rfree))
1778                return CARRY_ON;
1779
1780        /* determine maximal number of items we can shift to the left/right  neighbor
1781           and the maximal number of bytes that can flow to the left/right neighbor
1782           from the left/right most liquid item that cannot be shifted from S[0] entirely
1783         */
1784        check_left(tb, h, lfree);
1785        check_right(tb, h, rfree);
1786
1787        /* check whether we can merge S with left neighbor. */
1788        if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1789                if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) ||      /* S can not be merged with R */
1790                    !tb->FR[h]) {
1791
1792                        RFALSE(!tb->FL[h],
1793                               "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1794
1795                        /* set parameter to merge S[0] with its left neighbor */
1796                        set_parameters(tb, h, -1, 0, 0, NULL, -1, -1);
1797                        return CARRY_ON;
1798                }
1799
1800        /* check whether we can merge S[0] with right neighbor. */
1801        if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
1802                set_parameters(tb, h, 0, -1, 0, NULL, -1, -1);
1803                return CARRY_ON;
1804        }
1805
1806        /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
1807        if (is_leaf_removable(tb))
1808                return CARRY_ON;
1809
1810        /* Balancing is not required. */
1811        tb->s0num = vn->vn_nr_item;
1812        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1813        return NO_BALANCING_NEEDED;
1814}
1815
1816/* Check whether current node S[h] is balanced when Decreasing its size by
1817 * Deleting or Cutting.
1818 * Calculate parameters for balancing for current level h.
1819 * Parameters:
1820 *      tb      tree_balance structure;
1821 *      h       current level of the node;
1822 *      inum    item number in S[h];
1823 *      mode    d - delete, c - cut.
1824 * Returns:     1 - schedule occurred; 
1825 *              0 - balancing for higher levels needed;
1826 *             -1 - no balancing for higher levels needed;
1827 *             -2 - no disk space.
1828 */
1829static int dc_check_balance(struct tree_balance *tb, int h)
1830{
1831        RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)),
1832               "vs-8250: S is not initialized");
1833
1834        if (h)
1835                return dc_check_balance_internal(tb, h);
1836        else
1837                return dc_check_balance_leaf(tb, h);
1838}
1839
1840/* Check whether current node S[h] is balanced.
1841 * Calculate parameters for balancing for current level h.
1842 * Parameters:
1843 *
1844 *      tb      tree_balance structure:
1845 *
1846 *              tb is a large structure that must be read about in the header file
1847 *              at the same time as this procedure if the reader is to successfully
1848 *              understand this procedure
1849 *
1850 *      h       current level of the node;
1851 *      inum    item number in S[h];
1852 *      mode    i - insert, p - paste, d - delete, c - cut.
1853 * Returns:     1 - schedule occurred; 
1854 *              0 - balancing for higher levels needed;
1855 *             -1 - no balancing for higher levels needed;
1856 *             -2 - no disk space.
1857 */
1858static int check_balance(int mode,
1859                         struct tree_balance *tb,
1860                         int h,
1861                         int inum,
1862                         int pos_in_item,
1863                         struct item_head *ins_ih, const void *data)
1864{
1865        struct virtual_node *vn;
1866
1867        vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
1868        vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
1869        vn->vn_mode = mode;
1870        vn->vn_affected_item_num = inum;
1871        vn->vn_pos_in_item = pos_in_item;
1872        vn->vn_ins_ih = ins_ih;
1873        vn->vn_data = data;
1874
1875        RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
1876               "vs-8255: ins_ih can not be 0 in insert mode");
1877
1878        if (tb->insert_size[h] > 0)
1879                /* Calculate balance parameters when size of node is increasing. */
1880                return ip_check_balance(tb, h);
1881
1882        /* Calculate balance parameters when  size of node is decreasing. */
1883        return dc_check_balance(tb, h);
1884}
1885
1886/* Check whether parent at the path is the really parent of the current node.*/
1887static int get_direct_parent(struct tree_balance *p_s_tb, int n_h)
1888{
1889        struct buffer_head *p_s_bh;
1890        struct treepath *p_s_path = p_s_tb->tb_path;
1891        int n_position,
1892            n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1893
1894        /* We are in the root or in the new root. */
1895        if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1896
1897                RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
1898                       "PAP-8260: invalid offset in the path");
1899
1900                if (PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->
1901                    b_blocknr == SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
1902                        /* Root is not changed. */
1903                        PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL;
1904                        PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0;
1905                        return CARRY_ON;
1906                }
1907                return REPEAT_SEARCH;   /* Root is changed and we must recalculate the path. */
1908        }
1909
1910        if (!B_IS_IN_TREE
1911            (p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)))
1912                return REPEAT_SEARCH;   /* Parent in the path is not in the tree. */
1913
1914        if ((n_position =
1915             PATH_OFFSET_POSITION(p_s_path,
1916                                  n_path_offset - 1)) > B_NR_ITEMS(p_s_bh))
1917                return REPEAT_SEARCH;
1918
1919        if (B_N_CHILD_NUM(p_s_bh, n_position) !=
1920            PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr)
1921                /* Parent in the path is not parent of the current node in the tree. */
1922                return REPEAT_SEARCH;
1923
1924        if (buffer_locked(p_s_bh)) {
1925                __wait_on_buffer(p_s_bh);
1926                if (FILESYSTEM_CHANGED_TB(p_s_tb))
1927                        return REPEAT_SEARCH;
1928        }
1929
1930        return CARRY_ON;        /* Parent in the path is unlocked and really parent of the current node.  */
1931}
1932
1933/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors
1934 * of S[n_h] we
1935 * need in order to balance S[n_h], and get them if necessary.
1936 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
1937 *              CARRY_ON - schedule didn't occur while the function worked;
1938 */
1939static int get_neighbors(struct tree_balance *p_s_tb, int n_h)
1940{
1941        int n_child_position,
1942            n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1);
1943        unsigned long n_son_number;
1944        struct super_block *p_s_sb = p_s_tb->tb_sb;
1945        struct buffer_head *p_s_bh;
1946
1947        PROC_INFO_INC(p_s_sb, get_neighbors[n_h]);
1948
1949        if (p_s_tb->lnum[n_h]) {
1950                /* We need left neighbor to balance S[n_h]. */
1951                PROC_INFO_INC(p_s_sb, need_l_neighbor[n_h]);
1952                p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1953
1954                RFALSE(p_s_bh == p_s_tb->FL[n_h] &&
1955                       !PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset),
1956                       "PAP-8270: invalid position in the parent");
1957
1958                n_child_position =
1959                    (p_s_bh ==
1960                     p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->
1961                                                                       FL[n_h]);
1962                n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position);
1963                p_s_bh = sb_bread(p_s_sb, n_son_number);
1964                if (!p_s_bh)
1965                        return IO_ERROR;
1966                if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1967                        decrement_bcount(p_s_bh);
1968                        PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
1969                        return REPEAT_SEARCH;
1970                }
1971
1972                RFALSE(!B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
1973                       n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) ||
1974                       B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) !=
1975                       p_s_bh->b_blocknr, "PAP-8275: invalid parent");
1976                RFALSE(!B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child");
1977                RFALSE(!n_h &&
1978                       B_FREE_SPACE(p_s_bh) !=
1979                       MAX_CHILD_SIZE(p_s_bh) -
1980                       dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)),
1981                       "PAP-8290: invalid child size of left neighbor");
1982
1983                decrement_bcount(p_s_tb->L[n_h]);
1984                p_s_tb->L[n_h] = p_s_bh;
1985        }
1986
1987        if (p_s_tb->rnum[n_h]) {        /* We need right neighbor to balance S[n_path_offset]. */
1988                PROC_INFO_INC(p_s_sb, need_r_neighbor[n_h]);
1989                p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1990
1991                RFALSE(p_s_bh == p_s_tb->FR[n_h] &&
1992                       PATH_OFFSET_POSITION(p_s_tb->tb_path,
1993                                            n_path_offset) >=
1994                       B_NR_ITEMS(p_s_bh),
1995                       "PAP-8295: invalid position in the parent");
1996
1997                n_child_position =
1998                    (p_s_bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0;
1999                n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position);
2000                p_s_bh = sb_bread(p_s_sb, n_son_number);
2001                if (!p_s_bh)
2002                        return IO_ERROR;
2003                if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2004                        decrement_bcount(p_s_bh);
2005                        PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
2006                        return REPEAT_SEARCH;
2007                }
2008                decrement_bcount(p_s_tb->R[n_h]);
2009                p_s_tb->R[n_h] = p_s_bh;
2010
2011                RFALSE(!n_h
2012                       && B_FREE_SPACE(p_s_bh) !=
2013                       MAX_CHILD_SIZE(p_s_bh) -
2014                       dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)),
2015                       "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
2016                       B_FREE_SPACE(p_s_bh), MAX_CHILD_SIZE(p_s_bh),
2017                       dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)));
2018
2019        }
2020        return CARRY_ON;
2021}
2022
2023static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
2024{
2025        int max_num_of_items;
2026        int max_num_of_entries;
2027        unsigned long blocksize = sb->s_blocksize;
2028
2029#define MIN_NAME_LEN 1
2030
2031        max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN);
2032        max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) /
2033            (DEH_SIZE + MIN_NAME_LEN);
2034
2035        return sizeof(struct virtual_node) +
2036            max(max_num_of_items * sizeof(struct virtual_item),
2037                sizeof(struct virtual_item) + sizeof(struct direntry_uarea) +
2038                (max_num_of_entries - 1) * sizeof(__u16));
2039}
2040
2041/* maybe we should fail balancing we are going to perform when kmalloc
2042   fails several times. But now it will loop until kmalloc gets
2043   required memory */
2044static int get_mem_for_virtual_node(struct tree_balance *tb)
2045{
2046        int check_fs = 0;
2047        int size;
2048        char *buf;
2049
2050        size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
2051
2052        if (size > tb->vn_buf_size) {
2053                /* we have to allocate more memory for virtual node */
2054                if (tb->vn_buf) {
2055                        /* free memory allocated before */
2056                        kfree(tb->vn_buf);
2057                        /* this is not needed if kfree is atomic */
2058                        check_fs = 1;
2059                }
2060
2061                /* virtual node requires now more memory */
2062                tb->vn_buf_size = size;
2063
2064                /* get memory for virtual item */
2065                buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
2066                if (!buf) {
2067                        /* getting memory with GFP_KERNEL priority may involve
2068                           balancing now (due to indirect_to_direct conversion on
2069                           dcache shrinking). So, release path and collected
2070                           resources here */
2071                        free_buffers_in_tb(tb);
2072                        buf = kmalloc(size, GFP_NOFS);
2073                        if (!buf) {
2074                                tb->vn_buf_size = 0;
2075                        }
2076                        tb->vn_buf = buf;
2077                        schedule();
2078                        return REPEAT_SEARCH;
2079                }
2080
2081                tb->vn_buf = buf;
2082        }
2083
2084        if (check_fs && FILESYSTEM_CHANGED_TB(tb))
2085                return REPEAT_SEARCH;
2086
2087        return CARRY_ON;
2088}
2089
2090#ifdef CONFIG_REISERFS_CHECK
2091static void tb_buffer_sanity_check(struct super_block *p_s_sb,
2092                                   struct buffer_head *p_s_bh,
2093                                   const char *descr, int level)
2094{
2095        if (p_s_bh) {
2096                if (atomic_read(&(p_s_bh->b_count)) <= 0) {
2097
2098                        reiserfs_panic(p_s_sb,
2099                                       "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n",
2100                                       descr, level, p_s_bh);
2101                }
2102
2103                if (!buffer_uptodate(p_s_bh)) {
2104                        reiserfs_panic(p_s_sb,
2105                                       "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n",
2106                                       descr, level, p_s_bh);
2107                }
2108
2109                if (!B_IS_IN_TREE(p_s_bh)) {
2110                        reiserfs_panic(p_s_sb,
2111                                       "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n",
2112                                       descr, level, p_s_bh);
2113                }
2114
2115                if (p_s_bh->b_bdev != p_s_sb->s_bdev) {
2116                        reiserfs_panic(p_s_sb,
2117                                       "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n",
2118                                       descr, level, p_s_bh);
2119                }
2120
2121                if (p_s_bh->b_size != p_s_sb->s_blocksize) {
2122                        reiserfs_panic(p_s_sb,
2123                                       "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n",
2124                                       descr, level, p_s_bh);
2125                }
2126
2127                if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2128                        reiserfs_panic(p_s_sb,
2129                                       "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n",
2130                                       descr, level, p_s_bh);
2131                }
2132        }
2133}
2134#else
2135static void tb_buffer_sanity_check(struct super_block *p_s_sb,
2136                                   struct buffer_head *p_s_bh,
2137                                   const char *descr, int level)
2138{;
2139}
2140#endif
2141
2142static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
2143{
2144        return reiserfs_prepare_for_journal(s, bh, 0);
2145}
2146
2147static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
2148{
2149        struct buffer_head *locked;
2150#ifdef CONFIG_REISERFS_CHECK
2151        int repeat_counter = 0;
2152#endif
2153        int i;
2154
2155        do {
2156
2157                locked = NULL;
2158
2159                for (i = p_s_tb->tb_path->path_length;
2160                     !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
2161                        if (PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2162                                /* if I understand correctly, we can only be sure the last buffer
2163                                 ** in the path is in the tree --clm
2164                                 */
2165#ifdef CONFIG_REISERFS_CHECK
2166                                if (PATH_PLAST_BUFFER(p_s_tb->tb_path) ==
2167                                    PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2168                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2169                                                               PATH_OFFSET_PBUFFER
2170                                                               (p_s_tb->tb_path,
2171                                                                i), "S",
2172                                                               p_s_tb->tb_path->
2173                                                               path_length - i);
2174                                }
2175#endif
2176                                if (!clear_all_dirty_bits(p_s_tb->tb_sb,
2177                                                          PATH_OFFSET_PBUFFER
2178                                                          (p_s_tb->tb_path,
2179                                                           i))) {
2180                                        locked =
2181                                            PATH_OFFSET_PBUFFER(p_s_tb->tb_path,
2182                                                                i);
2183                                }
2184                        }
2185                }
2186
2187                for (i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i];
2188                     i++) {
2189
2190                        if (p_s_tb->lnum[i]) {
2191
2192                                if (p_s_tb->L[i]) {
2193                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2194                                                               p_s_tb->L[i],
2195                                                               "L", i);
2196                                        if (!clear_all_dirty_bits
2197                                            (p_s_tb->tb_sb, p_s_tb->L[i]))
2198                                                locked = p_s_tb->L[i];
2199                                }
2200
2201                                if (!locked && p_s_tb->FL[i]) {
2202                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2203                                                               p_s_tb->FL[i],
2204                                                               "FL", i);
2205                                        if (!clear_all_dirty_bits
2206                                            (p_s_tb->tb_sb, p_s_tb->FL[i]))
2207                                                locked = p_s_tb->FL[i];
2208                                }
2209
2210                                if (!locked && p_s_tb->CFL[i]) {
2211                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2212                                                               p_s_tb->CFL[i],
2213                                                               "CFL", i);
2214                                        if (!clear_all_dirty_bits
2215                                            (p_s_tb->tb_sb, p_s_tb->CFL[i]))
2216                                                locked = p_s_tb->CFL[i];
2217                                }
2218
2219                        }
2220
2221                        if (!locked && (p_s_tb->rnum[i])) {
2222
2223                                if (p_s_tb->R[i]) {
2224                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2225                                                               p_s_tb->R[i],
2226                                                               "R", i);
2227                                        if (!clear_all_dirty_bits
2228                                            (p_s_tb->tb_sb, p_s_tb->R[i]))
2229                                                locked = p_s_tb->R[i];
2230                                }
2231
2232                                if (!locked && p_s_tb->FR[i]) {
2233                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2234                                                               p_s_tb->FR[i],
2235                                                               "FR", i);
2236                                        if (!clear_all_dirty_bits
2237                                            (p_s_tb->tb_sb, p_s_tb->FR[i]))
2238                                                locked = p_s_tb->FR[i];
2239                                }
2240
2241                                if (!locked && p_s_tb->CFR[i]) {
2242                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2243                                                               p_s_tb->CFR[i],
2244                                                               "CFR", i);
2245                                        if (!clear_all_dirty_bits
2246                                            (p_s_tb->tb_sb, p_s_tb->CFR[i]))
2247                                                locked = p_s_tb->CFR[i];
2248                                }
2249                        }
2250                }
2251                /* as far as I can tell, this is not required.  The FEB list seems
2252                 ** to be full of newly allocated nodes, which will never be locked,
2253                 ** dirty, or anything else.
2254                 ** To be safe, I'm putting in the checks and waits in.  For the moment,
2255                 ** they are needed to keep the code in journal.c from complaining
2256                 ** about the buffer.  That code is inside CONFIG_REISERFS_CHECK as well.
2257                 ** --clm
2258                 */
2259                for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
2260                        if (p_s_tb->FEB[i]) {
2261                                if (!clear_all_dirty_bits
2262                                    (p_s_tb->tb_sb, p_s_tb->FEB[i]))
2263                                        locked = p_s_tb->FEB[i];
2264                        }
2265                }
2266
2267                if (locked) {
2268#ifdef CONFIG_REISERFS_CHECK
2269                        repeat_counter++;
2270                        if ((repeat_counter % 10000) == 0) {
2271                                reiserfs_warning(p_s_tb->tb_sb,
2272                                                 "wait_tb_buffers_until_released(): too many "
2273                                                 "iterations waiting for buffer to unlock "
2274                                                 "(%b)", locked);
2275
2276                                /* Don't loop forever.  Try to recover from possible error. */
2277
2278                                return (FILESYSTEM_CHANGED_TB(p_s_tb)) ?
2279                                    REPEAT_SEARCH : CARRY_ON;
2280                        }
2281#endif
2282                        __wait_on_buffer(locked);
2283                        if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2284                                return REPEAT_SEARCH;
2285                        }
2286                }
2287
2288        } while (locked);
2289
2290        return CARRY_ON;
2291}
2292
2293/* Prepare for balancing, that is
2294 *      get all necessary parents, and neighbors;
2295 *      analyze what and where should be moved;
2296 *      get sufficient number of new nodes;
2297 * Balancing will start only after all resources will be collected at a time.
2298 * 
2299 * When ported to SMP kernels, only at the last moment after all needed nodes
2300 * are collected in cache, will the resources be locked using the usual
2301 * textbook ordered lock acquisition algorithms.  Note that ensuring that
2302 * this code neither write locks what it does not need to write lock nor locks out of order
2303 * will be a pain in the butt that could have been avoided.  Grumble grumble. -Hans
2304 * 
2305 * fix is meant in the sense of render unchanging
2306 * 
2307 * Latency might be improved by first gathering a list of what buffers are needed
2308 * and then getting as many of them in parallel as possible? -Hans
2309 *
2310 * Parameters:
2311 *      op_mode i - insert, d - delete, c - cut (truncate), p - paste (append)
2312 *      tb      tree_balance structure;
2313 *      inum    item number in S[h];
2314 *      pos_in_item - comment this if you can
2315 *      ins_ih & ins_sd are used when inserting
2316 * Returns:     1 - schedule occurred while the function worked;
2317 *              0 - schedule didn't occur while the function worked;
2318 *             -1 - if no_disk_space 
2319 */
2320
2321int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_ins_ih, // item head of item being inserted
2322              const void *data  // inserted item or data to be pasted
2323    )
2324{
2325        int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path);
2326        int n_pos_in_item;
2327
2328        /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2329         ** during wait_tb_buffers_run
2330         */
2331        int wait_tb_buffers_run = 0;
2332        struct buffer_head *p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path);
2333
2334        ++REISERFS_SB(p_s_tb->tb_sb)->s_fix_nodes;
2335
2336        n_pos_in_item = p_s_tb->tb_path->pos_in_item;
2337
2338        p_s_tb->fs_gen = get_generation(p_s_tb->tb_sb);
2339
2340        /* we prepare and log the super here so it will already be in the
2341         ** transaction when do_balance needs to change it.
2342         ** This way do_balance won't have to schedule when trying to prepare
2343         ** the super for logging
2344         */
2345        reiserfs_prepare_for_journal(p_s_tb->tb_sb,
2346                                     SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1);
2347        journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb,
2348                           SB_BUFFER_WITH_SB(p_s_tb->tb_sb));
2349        if (FILESYSTEM_CHANGED_TB(p_s_tb))
2350                return REPEAT_SEARCH;
2351
2352        /* if it possible in indirect_to_direct conversion */
2353        if (buffer_locked(p_s_tbS0)) {
2354                __wait_on_buffer(p_s_tbS0);
2355                if (FILESYSTEM_CHANGED_TB(p_s_tb))
2356                        return REPEAT_SEARCH;
2357        }
2358#ifdef CONFIG_REISERFS_CHECK
2359        if (cur_tb) {
2360                print_cur_tb("fix_nodes");
2361                reiserfs_panic(p_s_tb->tb_sb,
2362                               "PAP-8305: fix_nodes:  there is pending do_balance");
2363        }
2364
2365        if (!buffer_uptodate(p_s_tbS0) || !B_IS_IN_TREE(p_s_tbS0)) {
2366                reiserfs_panic(p_s_tb->tb_sb,
2367                               "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate "
2368                               "at the beginning of fix_nodes or not in tree (mode %c)",
2369                               p_s_tbS0, p_s_tbS0, n_op_mode);
2370        }
2371
2372        /* Check parameters. */
2373        switch (n_op_mode) {
2374        case M_INSERT:
2375                if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0))
2376                        reiserfs_panic(p_s_tb->tb_sb,
2377                                       "PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
2378                                       n_item_num, B_NR_ITEMS(p_s_tbS0));
2379                break;
2380        case M_PASTE:
2381        case M_DELETE:
2382        case M_CUT:
2383                if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0)) {
2384                        print_block(p_s_tbS0, 0, -1, -1);
2385                        reiserfs_panic(p_s_tb->tb_sb,
2386                                       "PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n",
2387                                       n_item_num, n_op_mode,
2388                                       p_s_tb->insert_size[0]);
2389                }
2390                break;
2391        default:
2392                reiserfs_panic(p_s_tb->tb_sb,
2393                               "PAP-8340: fix_nodes: Incorrect mode of operation");
2394        }
2395#endif
2396
2397        if (get_mem_for_virtual_node(p_s_tb) == REPEAT_SEARCH)
2398                // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
2399                return REPEAT_SEARCH;
2400
2401        /* Starting from the leaf level; for all levels n_h of the tree. */
2402        for (n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++) {
2403                if ((n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON) {
2404                        goto repeat;
2405                }
2406
2407                if ((n_ret_value =
2408                     check_balance(n_op_mode, p_s_tb, n_h, n_item_num,
2409                                   n_pos_in_item, p_s_ins_ih,
2410                                   data)) != CARRY_ON) {
2411                        if (n_ret_value == NO_BALANCING_NEEDED) {
2412                                /* No balancing for higher levels needed. */
2413                                if ((n_ret_value =
2414                                     get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
2415                                        goto repeat;
2416                                }
2417                                if (n_h != MAX_HEIGHT - 1)
2418                                        p_s_tb->insert_size[n_h + 1] = 0;
2419                                /* ok, analysis and resource gathering are complete */
2420                                break;
2421                        }
2422                        goto repeat;
2423                }
2424
2425                if ((n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
2426                        goto repeat;
2427                }
2428
2429                if ((n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON) {
2430                        goto repeat;    /* No disk space, or schedule occurred and
2431                                           analysis may be invalid and needs to be redone. */
2432                }
2433
2434                if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h)) {
2435                        /* We have a positive insert size but no nodes exist on this
2436                           level, this means that we are creating a new root. */
2437
2438                        RFALSE(p_s_tb->blknum[n_h] != 1,
2439                               "PAP-8350: creating new empty root");
2440
2441                        if (n_h < MAX_HEIGHT - 1)
2442                                p_s_tb->insert_size[n_h + 1] = 0;
2443                } else if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1)) {
2444                        if (p_s_tb->blknum[n_h] > 1) {
2445                                /* The tree needs to be grown, so this node S[n_h]
2446                                   which is the root node is split into two nodes,
2447                                   and a new node (S[n_h+1]) will be created to
2448                                   become the root node.  */
2449
2450                                RFALSE(n_h == MAX_HEIGHT - 1,
2451                                       "PAP-8355: attempt to create too high of a tree");
2452
2453                                p_s_tb->insert_size[n_h + 1] =
2454                                    (DC_SIZE +
2455                                     KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) +
2456                                    DC_SIZE;
2457                        } else if (n_h < MAX_HEIGHT - 1)
2458                                p_s_tb->insert_size[n_h + 1] = 0;
2459                } else
2460                        p_s_tb->insert_size[n_h + 1] =
2461                            (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1);
2462        }
2463
2464        if ((n_ret_value = wait_tb_buffers_until_unlocked(p_s_tb)) == CARRY_ON) {
2465                if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2466                        wait_tb_buffers_run = 1;
2467                        n_ret_value = REPEAT_SEARCH;
2468                        goto repeat;
2469                } else {
2470                        return CARRY_ON;
2471                }
2472        } else {
2473                wait_tb_buffers_run = 1;
2474                goto repeat;
2475        }
2476
2477      repeat:
2478        // fix_nodes was unable to perform its calculation due to
2479        // filesystem got changed under us, lack of free disk space or i/o
2480        // failure. If the first is the case - the search will be
2481        // repeated. For now - free all resources acquired so far except
2482        // for the new allocated nodes
2483        {
2484                int i;
2485
2486                /* Release path buffers. */
2487                if (wait_tb_buffers_run) {
2488                        pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path);
2489                } else {
2490                        pathrelse(p_s_tb->tb_path);
2491                }
2492                /* brelse all resources collected for balancing */
2493                for (i = 0; i < MAX_HEIGHT; i++) {
2494                        if (wait_tb_buffers_run) {
2495                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2496                                                                 p_s_tb->L[i]);
2497                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2498                                                                 p_s_tb->R[i]);
2499                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2500                                                                 p_s_tb->FL[i]);
2501                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2502                                                                 p_s_tb->FR[i]);
2503                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2504                                                                 p_s_tb->
2505                                                                 CFL[i]);
2506                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2507                                                                 p_s_tb->
2508                                                                 CFR[i]);
2509                        }
2510
2511                        brelse(p_s_tb->L[i]);
2512                        p_s_tb->L[i] = NULL;
2513                        brelse(p_s_tb->R[i]);
2514                        p_s_tb->R[i] = NULL;
2515                        brelse(p_s_tb->FL[i]);
2516                        p_s_tb->FL[i] = NULL;
2517                        brelse(p_s_tb->FR[i]);
2518                        p_s_tb->FR[i] = NULL;
2519                        brelse(p_s_tb->CFL[i]);
2520                        p_s_tb->CFL[i] = NULL;
2521                        brelse(p_s_tb->CFR[i]);
2522                        p_s_tb->CFR[i] = NULL;
2523                }
2524
2525                if (wait_tb_buffers_run) {
2526                        for (i = 0; i < MAX_FEB_SIZE; i++) {
2527                                if (p_s_tb->FEB[i]) {
2528                                        reiserfs_restore_prepared_buffer
2529                                            (p_s_tb->tb_sb, p_s_tb->FEB[i]);
2530                                }
2531                        }
2532                }
2533                return n_ret_value;
2534        }
2535
2536}
2537
2538/* Anatoly will probably forgive me renaming p_s_tb to tb. I just
2539   wanted to make lines shorter */
2540void unfix_nodes(struct tree_balance *tb)
2541{
2542        int i;
2543
2544        /* Release path buffers. */
2545        pathrelse_and_restore(tb->tb_sb, tb->tb_path);
2546
2547        /* brelse all resources collected for balancing */
2548        for (i = 0; i < MAX_HEIGHT; i++) {
2549                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]);
2550                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]);
2551                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]);
2552                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]);
2553                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]);
2554                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]);
2555
2556                brelse(tb->L[i]);
2557                brelse(tb->R[i]);
2558                brelse(tb->FL[i]);
2559                brelse(tb->FR[i]);
2560                brelse(tb->CFL[i]);
2561                brelse(tb->CFR[i]);
2562        }
2563
2564        /* deal with list of allocated (used and unused) nodes */
2565        for (i = 0; i < MAX_FEB_SIZE; i++) {
2566                if (tb->FEB[i]) {
2567                        b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
2568                        /* de-allocated block which was not used by balancing and
2569                           bforget about buffer for it */
2570                        brelse(tb->FEB[i]);
2571                        reiserfs_free_block(tb->transaction_handle, NULL,
2572                                            blocknr, 0);
2573                }
2574                if (tb->used[i]) {
2575                        /* release used as new nodes including a new root */
2576                        brelse(tb->used[i]);
2577                }
2578        }
2579
2580        kfree(tb->vn_buf);
2581
2582}
2583
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.