1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/sched.h>
20#include <linux/slab.h>
21#include <linux/rbtree.h>
22#include "ctree.h"
23#include "disk-io.h"
24#include "transaction.h"
25#include "print-tree.h"
26#include "locking.h"
27
28static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
29 *root, struct btrfs_path *path, int level);
30static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
31 *root, struct btrfs_key *ins_key,
32 struct btrfs_path *path, int data_size, int extend);
33static int push_node_left(struct btrfs_trans_handle *trans,
34 struct btrfs_root *root, struct extent_buffer *dst,
35 struct extent_buffer *src, int empty);
36static int balance_node_right(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root,
38 struct extent_buffer *dst_buf,
39 struct extent_buffer *src_buf);
40static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
41 struct btrfs_path *path, int level, int slot,
42 int tree_mod_log);
43static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
44 struct extent_buffer *eb);
45struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr,
46 u32 blocksize, u64 parent_transid,
47 u64 time_seq);
48struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root,
49 u64 bytenr, u32 blocksize,
50 u64 time_seq);
51
52struct btrfs_path *btrfs_alloc_path(void)
53{
54 struct btrfs_path *path;
55 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
56 return path;
57}
58
59
60
61
62
63noinline void btrfs_set_path_blocking(struct btrfs_path *p)
64{
65 int i;
66 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
67 if (!p->nodes[i] || !p->locks[i])
68 continue;
69 btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
70 if (p->locks[i] == BTRFS_READ_LOCK)
71 p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
72 else if (p->locks[i] == BTRFS_WRITE_LOCK)
73 p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
74 }
75}
76
77
78
79
80
81
82
83
84
85noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
86 struct extent_buffer *held, int held_rw)
87{
88 int i;
89
90#ifdef CONFIG_DEBUG_LOCK_ALLOC
91
92
93
94
95
96
97 if (held) {
98 btrfs_set_lock_blocking_rw(held, held_rw);
99 if (held_rw == BTRFS_WRITE_LOCK)
100 held_rw = BTRFS_WRITE_LOCK_BLOCKING;
101 else if (held_rw == BTRFS_READ_LOCK)
102 held_rw = BTRFS_READ_LOCK_BLOCKING;
103 }
104 btrfs_set_path_blocking(p);
105#endif
106
107 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
108 if (p->nodes[i] && p->locks[i]) {
109 btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
110 if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
111 p->locks[i] = BTRFS_WRITE_LOCK;
112 else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
113 p->locks[i] = BTRFS_READ_LOCK;
114 }
115 }
116
117#ifdef CONFIG_DEBUG_LOCK_ALLOC
118 if (held)
119 btrfs_clear_lock_blocking_rw(held, held_rw);
120#endif
121}
122
123
124void btrfs_free_path(struct btrfs_path *p)
125{
126 if (!p)
127 return;
128 btrfs_release_path(p);
129 kmem_cache_free(btrfs_path_cachep, p);
130}
131
132
133
134
135
136
137
138noinline void btrfs_release_path(struct btrfs_path *p)
139{
140 int i;
141
142 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
143 p->slots[i] = 0;
144 if (!p->nodes[i])
145 continue;
146 if (p->locks[i]) {
147 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
148 p->locks[i] = 0;
149 }
150 free_extent_buffer(p->nodes[i]);
151 p->nodes[i] = NULL;
152 }
153}
154
155
156
157
158
159
160
161
162
163
164
165struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
166{
167 struct extent_buffer *eb;
168
169 while (1) {
170 rcu_read_lock();
171 eb = rcu_dereference(root->node);
172
173
174
175
176
177
178
179 if (atomic_inc_not_zero(&eb->refs)) {
180 rcu_read_unlock();
181 break;
182 }
183 rcu_read_unlock();
184 synchronize_rcu();
185 }
186 return eb;
187}
188
189
190
191
192
193struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
194{
195 struct extent_buffer *eb;
196
197 while (1) {
198 eb = btrfs_root_node(root);
199 btrfs_tree_lock(eb);
200 if (eb == root->node)
201 break;
202 btrfs_tree_unlock(eb);
203 free_extent_buffer(eb);
204 }
205 return eb;
206}
207
208
209
210
211
212struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
213{
214 struct extent_buffer *eb;
215
216 while (1) {
217 eb = btrfs_root_node(root);
218 btrfs_tree_read_lock(eb);
219 if (eb == root->node)
220 break;
221 btrfs_tree_read_unlock(eb);
222 free_extent_buffer(eb);
223 }
224 return eb;
225}
226
227
228
229
230
231static void add_root_to_dirty_list(struct btrfs_root *root)
232{
233 spin_lock(&root->fs_info->trans_lock);
234 if (root->track_dirty && list_empty(&root->dirty_list)) {
235 list_add(&root->dirty_list,
236 &root->fs_info->dirty_cowonly_roots);
237 }
238 spin_unlock(&root->fs_info->trans_lock);
239}
240
241
242
243
244
245
246int btrfs_copy_root(struct btrfs_trans_handle *trans,
247 struct btrfs_root *root,
248 struct extent_buffer *buf,
249 struct extent_buffer **cow_ret, u64 new_root_objectid)
250{
251 struct extent_buffer *cow;
252 int ret = 0;
253 int level;
254 struct btrfs_disk_key disk_key;
255
256 WARN_ON(root->ref_cows && trans->transid !=
257 root->fs_info->running_transaction->transid);
258 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
259
260 level = btrfs_header_level(buf);
261 if (level == 0)
262 btrfs_item_key(buf, &disk_key, 0);
263 else
264 btrfs_node_key(buf, &disk_key, 0);
265
266 cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
267 new_root_objectid, &disk_key, level,
268 buf->start, 0);
269 if (IS_ERR(cow))
270 return PTR_ERR(cow);
271
272 copy_extent_buffer(cow, buf, 0, 0, cow->len);
273 btrfs_set_header_bytenr(cow, cow->start);
274 btrfs_set_header_generation(cow, trans->transid);
275 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
276 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
277 BTRFS_HEADER_FLAG_RELOC);
278 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
279 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
280 else
281 btrfs_set_header_owner(cow, new_root_objectid);
282
283 write_extent_buffer(cow, root->fs_info->fsid,
284 (unsigned long)btrfs_header_fsid(cow),
285 BTRFS_FSID_SIZE);
286
287 WARN_ON(btrfs_header_generation(buf) > trans->transid);
288 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
289 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
290 else
291 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
292
293 if (ret)
294 return ret;
295
296 btrfs_mark_buffer_dirty(cow);
297 *cow_ret = cow;
298 return 0;
299}
300
301enum mod_log_op {
302 MOD_LOG_KEY_REPLACE,
303 MOD_LOG_KEY_ADD,
304 MOD_LOG_KEY_REMOVE,
305 MOD_LOG_KEY_REMOVE_WHILE_FREEING,
306 MOD_LOG_KEY_REMOVE_WHILE_MOVING,
307 MOD_LOG_MOVE_KEYS,
308 MOD_LOG_ROOT_REPLACE,
309};
310
311struct tree_mod_move {
312 int dst_slot;
313 int nr_items;
314};
315
316struct tree_mod_root {
317 u64 logical;
318 u8 level;
319};
320
321struct tree_mod_elem {
322 struct rb_node node;
323 u64 index;
324 u64 seq;
325 enum mod_log_op op;
326
327
328 int slot;
329
330
331 u64 generation;
332
333
334 struct btrfs_disk_key key;
335 u64 blockptr;
336
337
338 struct tree_mod_move move;
339
340
341 struct tree_mod_root old_root;
342};
343
344static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info)
345{
346 read_lock(&fs_info->tree_mod_log_lock);
347}
348
349static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info)
350{
351 read_unlock(&fs_info->tree_mod_log_lock);
352}
353
354static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info)
355{
356 write_lock(&fs_info->tree_mod_log_lock);
357}
358
359static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
360{
361 write_unlock(&fs_info->tree_mod_log_lock);
362}
363
364
365
366
367
368
369
370
371
372u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
373 struct seq_list *elem)
374{
375 u64 seq;
376
377 tree_mod_log_write_lock(fs_info);
378 spin_lock(&fs_info->tree_mod_seq_lock);
379 if (!elem->seq) {
380 elem->seq = btrfs_inc_tree_mod_seq(fs_info);
381 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
382 }
383 seq = btrfs_inc_tree_mod_seq(fs_info);
384 spin_unlock(&fs_info->tree_mod_seq_lock);
385 tree_mod_log_write_unlock(fs_info);
386
387 return seq;
388}
389
390void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
391 struct seq_list *elem)
392{
393 struct rb_root *tm_root;
394 struct rb_node *node;
395 struct rb_node *next;
396 struct seq_list *cur_elem;
397 struct tree_mod_elem *tm;
398 u64 min_seq = (u64)-1;
399 u64 seq_putting = elem->seq;
400
401 if (!seq_putting)
402 return;
403
404 spin_lock(&fs_info->tree_mod_seq_lock);
405 list_del(&elem->list);
406 elem->seq = 0;
407
408 list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
409 if (cur_elem->seq < min_seq) {
410 if (seq_putting > cur_elem->seq) {
411
412
413
414
415 spin_unlock(&fs_info->tree_mod_seq_lock);
416 return;
417 }
418 min_seq = cur_elem->seq;
419 }
420 }
421 spin_unlock(&fs_info->tree_mod_seq_lock);
422
423
424
425
426
427 tree_mod_log_write_lock(fs_info);
428 tm_root = &fs_info->tree_mod_log;
429 for (node = rb_first(tm_root); node; node = next) {
430 next = rb_next(node);
431 tm = container_of(node, struct tree_mod_elem, node);
432 if (tm->seq > min_seq)
433 continue;
434 rb_erase(node, tm_root);
435 kfree(tm);
436 }
437 tree_mod_log_write_unlock(fs_info);
438}
439
440
441
442
443
444
445
446
447
448static noinline int
449__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
450{
451 struct rb_root *tm_root;
452 struct rb_node **new;
453 struct rb_node *parent = NULL;
454 struct tree_mod_elem *cur;
455
456 BUG_ON(!tm || !tm->seq);
457
458 tm_root = &fs_info->tree_mod_log;
459 new = &tm_root->rb_node;
460 while (*new) {
461 cur = container_of(*new, struct tree_mod_elem, node);
462 parent = *new;
463 if (cur->index < tm->index)
464 new = &((*new)->rb_left);
465 else if (cur->index > tm->index)
466 new = &((*new)->rb_right);
467 else if (cur->seq < tm->seq)
468 new = &((*new)->rb_left);
469 else if (cur->seq > tm->seq)
470 new = &((*new)->rb_right);
471 else {
472 kfree(tm);
473 return -EEXIST;
474 }
475 }
476
477 rb_link_node(&tm->node, parent, new);
478 rb_insert_color(&tm->node, tm_root);
479 return 0;
480}
481
482
483
484
485
486
487
488static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
489 struct extent_buffer *eb) {
490 smp_mb();
491 if (list_empty(&(fs_info)->tree_mod_seq_list))
492 return 1;
493 if (eb && btrfs_header_level(eb) == 0)
494 return 1;
495
496 tree_mod_log_write_lock(fs_info);
497 if (list_empty(&fs_info->tree_mod_seq_list)) {
498
499
500
501
502 tree_mod_log_write_unlock(fs_info);
503 return 1;
504 }
505
506 return 0;
507}
508
509
510
511
512
513
514
515static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
516 struct tree_mod_elem **tm_ret)
517{
518 struct tree_mod_elem *tm;
519
520
521
522
523
524 tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC);
525 if (!tm)
526 return -ENOMEM;
527
528 tm->seq = btrfs_inc_tree_mod_seq(fs_info);
529 return tm->seq;
530}
531
532static inline int
533__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
534 struct extent_buffer *eb, int slot,
535 enum mod_log_op op, gfp_t flags)
536{
537 int ret;
538 struct tree_mod_elem *tm;
539
540 ret = tree_mod_alloc(fs_info, flags, &tm);
541 if (ret < 0)
542 return ret;
543
544 tm->index = eb->start >> PAGE_CACHE_SHIFT;
545 if (op != MOD_LOG_KEY_ADD) {
546 btrfs_node_key(eb, &tm->key, slot);
547 tm->blockptr = btrfs_node_blockptr(eb, slot);
548 }
549 tm->op = op;
550 tm->slot = slot;
551 tm->generation = btrfs_node_ptr_generation(eb, slot);
552
553 return __tree_mod_log_insert(fs_info, tm);
554}
555
556static noinline int
557tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
558 struct extent_buffer *eb, int slot,
559 enum mod_log_op op, gfp_t flags)
560{
561 int ret;
562
563 if (tree_mod_dont_log(fs_info, eb))
564 return 0;
565
566 ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
567
568 tree_mod_log_write_unlock(fs_info);
569 return ret;
570}
571
572static noinline int
573tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
574 int slot, enum mod_log_op op)
575{
576 return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS);
577}
578
579static noinline int
580tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info,
581 struct extent_buffer *eb, int slot,
582 enum mod_log_op op)
583{
584 return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS);
585}
586
587static noinline int
588tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
589 struct extent_buffer *eb, int dst_slot, int src_slot,
590 int nr_items, gfp_t flags)
591{
592 struct tree_mod_elem *tm;
593 int ret;
594 int i;
595
596 if (tree_mod_dont_log(fs_info, eb))
597 return 0;
598
599
600
601
602
603
604 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
605 ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot,
606 MOD_LOG_KEY_REMOVE_WHILE_MOVING);
607 BUG_ON(ret < 0);
608 }
609
610 ret = tree_mod_alloc(fs_info, flags, &tm);
611 if (ret < 0)
612 goto out;
613
614 tm->index = eb->start >> PAGE_CACHE_SHIFT;
615 tm->slot = src_slot;
616 tm->move.dst_slot = dst_slot;
617 tm->move.nr_items = nr_items;
618 tm->op = MOD_LOG_MOVE_KEYS;
619
620 ret = __tree_mod_log_insert(fs_info, tm);
621out:
622 tree_mod_log_write_unlock(fs_info);
623 return ret;
624}
625
626static inline void
627__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
628{
629 int i;
630 u32 nritems;
631 int ret;
632
633 if (btrfs_header_level(eb) == 0)
634 return;
635
636 nritems = btrfs_header_nritems(eb);
637 for (i = nritems - 1; i >= 0; i--) {
638 ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
639 MOD_LOG_KEY_REMOVE_WHILE_FREEING);
640 BUG_ON(ret < 0);
641 }
642}
643
644static noinline int
645tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
646 struct extent_buffer *old_root,
647 struct extent_buffer *new_root, gfp_t flags)
648{
649 struct tree_mod_elem *tm;
650 int ret;
651
652 if (tree_mod_dont_log(fs_info, NULL))
653 return 0;
654
655 ret = tree_mod_alloc(fs_info, flags, &tm);
656 if (ret < 0)
657 goto out;
658
659 tm->index = new_root->start >> PAGE_CACHE_SHIFT;
660 tm->old_root.logical = old_root->start;
661 tm->old_root.level = btrfs_header_level(old_root);
662 tm->generation = btrfs_header_generation(old_root);
663 tm->op = MOD_LOG_ROOT_REPLACE;
664
665 ret = __tree_mod_log_insert(fs_info, tm);
666out:
667 tree_mod_log_write_unlock(fs_info);
668 return ret;
669}
670
671static struct tree_mod_elem *
672__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
673 int smallest)
674{
675 struct rb_root *tm_root;
676 struct rb_node *node;
677 struct tree_mod_elem *cur = NULL;
678 struct tree_mod_elem *found = NULL;
679 u64 index = start >> PAGE_CACHE_SHIFT;
680
681 tree_mod_log_read_lock(fs_info);
682 tm_root = &fs_info->tree_mod_log;
683 node = tm_root->rb_node;
684 while (node) {
685 cur = container_of(node, struct tree_mod_elem, node);
686 if (cur->index < index) {
687 node = node->rb_left;
688 } else if (cur->index > index) {
689 node = node->rb_right;
690 } else if (cur->seq < min_seq) {
691 node = node->rb_left;
692 } else if (!smallest) {
693
694 if (found)
695 BUG_ON(found->seq > cur->seq);
696 found = cur;
697 node = node->rb_left;
698 } else if (cur->seq > min_seq) {
699
700 if (found)
701 BUG_ON(found->seq < cur->seq);
702 found = cur;
703 node = node->rb_right;
704 } else {
705 found = cur;
706 break;
707 }
708 }
709 tree_mod_log_read_unlock(fs_info);
710
711 return found;
712}
713
714
715
716
717
718
719static struct tree_mod_elem *
720tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
721 u64 min_seq)
722{
723 return __tree_mod_log_search(fs_info, start, min_seq, 1);
724}
725
726
727
728
729
730
731static struct tree_mod_elem *
732tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
733{
734 return __tree_mod_log_search(fs_info, start, min_seq, 0);
735}
736
737static noinline void
738tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
739 struct extent_buffer *src, unsigned long dst_offset,
740 unsigned long src_offset, int nr_items)
741{
742 int ret;
743 int i;
744
745 if (tree_mod_dont_log(fs_info, NULL))
746 return;
747
748 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) {
749 tree_mod_log_write_unlock(fs_info);
750 return;
751 }
752
753 for (i = 0; i < nr_items; i++) {
754 ret = tree_mod_log_insert_key_locked(fs_info, src,
755 i + src_offset,
756 MOD_LOG_KEY_REMOVE);
757 BUG_ON(ret < 0);
758 ret = tree_mod_log_insert_key_locked(fs_info, dst,
759 i + dst_offset,
760 MOD_LOG_KEY_ADD);
761 BUG_ON(ret < 0);
762 }
763
764 tree_mod_log_write_unlock(fs_info);
765}
766
767static inline void
768tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
769 int dst_offset, int src_offset, int nr_items)
770{
771 int ret;
772 ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
773 nr_items, GFP_NOFS);
774 BUG_ON(ret < 0);
775}
776
777static noinline void
778tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
779 struct extent_buffer *eb,
780 struct btrfs_disk_key *disk_key, int slot, int atomic)
781{
782 int ret;
783
784 ret = tree_mod_log_insert_key_mask(fs_info, eb, slot,
785 MOD_LOG_KEY_REPLACE,
786 atomic ? GFP_ATOMIC : GFP_NOFS);
787 BUG_ON(ret < 0);
788}
789
790static noinline void
791tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
792{
793 if (tree_mod_dont_log(fs_info, eb))
794 return;
795
796 __tree_mod_log_free_eb(fs_info, eb);
797
798 tree_mod_log_write_unlock(fs_info);
799}
800
801static noinline void
802tree_mod_log_set_root_pointer(struct btrfs_root *root,
803 struct extent_buffer *new_root_node)
804{
805 int ret;
806 ret = tree_mod_log_insert_root(root->fs_info, root->node,
807 new_root_node, GFP_NOFS);
808 BUG_ON(ret < 0);
809}
810
811
812
813
814int btrfs_block_can_be_shared(struct btrfs_root *root,
815 struct extent_buffer *buf)
816{
817
818
819
820
821
822
823 if (root->ref_cows &&
824 buf != root->node && buf != root->commit_root &&
825 (btrfs_header_generation(buf) <=
826 btrfs_root_last_snapshot(&root->root_item) ||
827 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
828 return 1;
829#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
830 if (root->ref_cows &&
831 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
832 return 1;
833#endif
834 return 0;
835}
836
837static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
838 struct btrfs_root *root,
839 struct extent_buffer *buf,
840 struct extent_buffer *cow,
841 int *last_ref)
842{
843 u64 refs;
844 u64 owner;
845 u64 flags;
846 u64 new_flags = 0;
847 int ret;
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866 if (btrfs_block_can_be_shared(root, buf)) {
867 ret = btrfs_lookup_extent_info(trans, root, buf->start,
868 buf->len, &refs, &flags);
869 if (ret)
870 return ret;
871 if (refs == 0) {
872 ret = -EROFS;
873 btrfs_std_error(root->fs_info, ret);
874 return ret;
875 }
876 } else {
877 refs = 1;
878 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
879 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
880 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
881 else
882 flags = 0;
883 }
884
885 owner = btrfs_header_owner(buf);
886 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
887 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
888
889 if (refs > 1) {
890 if ((owner == root->root_key.objectid ||
891 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
892 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
893 ret = btrfs_inc_ref(trans, root, buf, 1, 1);
894 BUG_ON(ret);
895
896 if (root->root_key.objectid ==
897 BTRFS_TREE_RELOC_OBJECTID) {
898 ret = btrfs_dec_ref(trans, root, buf, 0, 1);
899 BUG_ON(ret);
900 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
901 BUG_ON(ret);
902 }
903 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
904 } else {
905
906 if (root->root_key.objectid ==
907 BTRFS_TREE_RELOC_OBJECTID)
908 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
909 else
910 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
911 BUG_ON(ret);
912 }
913 if (new_flags != 0) {
914 ret = btrfs_set_disk_extent_flags(trans, root,
915 buf->start,
916 buf->len,
917 new_flags, 0);
918 if (ret)
919 return ret;
920 }
921 } else {
922 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
923 if (root->root_key.objectid ==
924 BTRFS_TREE_RELOC_OBJECTID)
925 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
926 else
927 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
928 BUG_ON(ret);
929 ret = btrfs_dec_ref(trans, root, buf, 1, 1);
930 BUG_ON(ret);
931 }
932 tree_mod_log_free_eb(root->fs_info, buf);
933 clean_tree_block(trans, root, buf);
934 *last_ref = 1;
935 }
936 return 0;
937}
938
939
940
941
942
943
944
945
946
947
948
949
950
951static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
952 struct btrfs_root *root,
953 struct extent_buffer *buf,
954 struct extent_buffer *parent, int parent_slot,
955 struct extent_buffer **cow_ret,
956 u64 search_start, u64 empty_size)
957{
958 struct btrfs_disk_key disk_key;
959 struct extent_buffer *cow;
960 int level, ret;
961 int last_ref = 0;
962 int unlock_orig = 0;
963 u64 parent_start;
964
965 if (*cow_ret == buf)
966 unlock_orig = 1;
967
968 btrfs_assert_tree_locked(buf);
969
970 WARN_ON(root->ref_cows && trans->transid !=
971 root->fs_info->running_transaction->transid);
972 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
973
974 level = btrfs_header_level(buf);
975
976 if (level == 0)
977 btrfs_item_key(buf, &disk_key, 0);
978 else
979 btrfs_node_key(buf, &disk_key, 0);
980
981 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
982 if (parent)
983 parent_start = parent->start;
984 else
985 parent_start = 0;
986 } else
987 parent_start = 0;
988
989 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
990 root->root_key.objectid, &disk_key,
991 level, search_start, empty_size);
992 if (IS_ERR(cow))
993 return PTR_ERR(cow);
994
995
996
997 copy_extent_buffer(cow, buf, 0, 0, cow->len);
998 btrfs_set_header_bytenr(cow, cow->start);
999 btrfs_set_header_generation(cow, trans->transid);
1000 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
1001 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
1002 BTRFS_HEADER_FLAG_RELOC);
1003 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1004 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
1005 else
1006 btrfs_set_header_owner(cow, root->root_key.objectid);
1007
1008 write_extent_buffer(cow, root->fs_info->fsid,
1009 (unsigned long)btrfs_header_fsid(cow),
1010 BTRFS_FSID_SIZE);
1011
1012 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
1013 if (ret) {
1014 btrfs_abort_transaction(trans, root, ret);
1015 return ret;
1016 }
1017
1018 if (root->ref_cows)
1019 btrfs_reloc_cow_block(trans, root, buf, cow);
1020
1021 if (buf == root->node) {
1022 WARN_ON(parent && parent != buf);
1023 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
1024 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
1025 parent_start = buf->start;
1026 else
1027 parent_start = 0;
1028
1029 extent_buffer_get(cow);
1030 tree_mod_log_set_root_pointer(root, cow);
1031 rcu_assign_pointer(root->node, cow);
1032
1033 btrfs_free_tree_block(trans, root, buf, parent_start,
1034 last_ref);
1035 free_extent_buffer(buf);
1036 add_root_to_dirty_list(root);
1037 } else {
1038 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1039 parent_start = parent->start;
1040 else
1041 parent_start = 0;
1042
1043 WARN_ON(trans->transid != btrfs_header_generation(parent));
1044 tree_mod_log_insert_key(root->fs_info, parent, parent_slot,
1045 MOD_LOG_KEY_REPLACE);
1046 btrfs_set_node_blockptr(parent, parent_slot,
1047 cow->start);
1048 btrfs_set_node_ptr_generation(parent, parent_slot,
1049 trans->transid);
1050 btrfs_mark_buffer_dirty(parent);
1051 btrfs_free_tree_block(trans, root, buf, parent_start,
1052 last_ref);
1053 }
1054 if (unlock_orig)
1055 btrfs_tree_unlock(buf);
1056 free_extent_buffer_stale(buf);
1057 btrfs_mark_buffer_dirty(cow);
1058 *cow_ret = cow;
1059 return 0;
1060}
1061
1062
1063
1064
1065
1066static struct tree_mod_elem *
1067__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1068 struct btrfs_root *root, u64 time_seq)
1069{
1070 struct tree_mod_elem *tm;
1071 struct tree_mod_elem *found = NULL;
1072 u64 root_logical = root->node->start;
1073 int looped = 0;
1074
1075 if (!time_seq)
1076 return 0;
1077
1078
1079
1080
1081
1082
1083 while (1) {
1084 tm = tree_mod_log_search_oldest(fs_info, root_logical,
1085 time_seq);
1086 if (!looped && !tm)
1087 return 0;
1088
1089
1090
1091
1092
1093 if (!tm)
1094 break;
1095
1096
1097
1098
1099
1100
1101 if (tm->op != MOD_LOG_ROOT_REPLACE)
1102 break;
1103
1104 found = tm;
1105 root_logical = tm->old_root.logical;
1106 BUG_ON(root_logical == root->node->start);
1107 looped = 1;
1108 }
1109
1110
1111 if (!found)
1112 found = tm;
1113
1114 return found;
1115}
1116
1117
1118
1119
1120
1121
1122static void
1123__tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1124 struct tree_mod_elem *first_tm)
1125{
1126 u32 n;
1127 struct rb_node *next;
1128 struct tree_mod_elem *tm = first_tm;
1129 unsigned long o_dst;
1130 unsigned long o_src;
1131 unsigned long p_size = sizeof(struct btrfs_key_ptr);
1132
1133 n = btrfs_header_nritems(eb);
1134 while (tm && tm->seq >= time_seq) {
1135
1136
1137
1138
1139
1140 switch (tm->op) {
1141 case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
1142 BUG_ON(tm->slot < n);
1143 case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
1144 case MOD_LOG_KEY_REMOVE:
1145 btrfs_set_node_key(eb, &tm->key, tm->slot);
1146 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1147 btrfs_set_node_ptr_generation(eb, tm->slot,
1148 tm->generation);
1149 n++;
1150 break;
1151 case MOD_LOG_KEY_REPLACE:
1152 BUG_ON(tm->slot >= n);
1153 btrfs_set_node_key(eb, &tm->key, tm->slot);
1154 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1155 btrfs_set_node_ptr_generation(eb, tm->slot,
1156 tm->generation);
1157 break;
1158 case MOD_LOG_KEY_ADD:
1159
1160 n--;
1161 break;
1162 case MOD_LOG_MOVE_KEYS:
1163 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1164 o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot);
1165 memmove_extent_buffer(eb, o_dst, o_src,
1166 tm->move.nr_items * p_size);
1167 break;
1168 case MOD_LOG_ROOT_REPLACE:
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178 break;
1179 }
1180 next = rb_next(&tm->node);
1181 if (!next)
1182 break;
1183 tm = container_of(next, struct tree_mod_elem, node);
1184 if (tm->index != first_tm->index)
1185 break;
1186 }
1187 btrfs_set_header_nritems(eb, n);
1188}
1189
1190static struct extent_buffer *
1191tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1192 u64 time_seq)
1193{
1194 struct extent_buffer *eb_rewin;
1195 struct tree_mod_elem *tm;
1196
1197 if (!time_seq)
1198 return eb;
1199
1200 if (btrfs_header_level(eb) == 0)
1201 return eb;
1202
1203 tm = tree_mod_log_search(fs_info, eb->start, time_seq);
1204 if (!tm)
1205 return eb;
1206
1207 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1208 BUG_ON(tm->slot != 0);
1209 eb_rewin = alloc_dummy_extent_buffer(eb->start,
1210 fs_info->tree_root->nodesize);
1211 BUG_ON(!eb_rewin);
1212 btrfs_set_header_bytenr(eb_rewin, eb->start);
1213 btrfs_set_header_backref_rev(eb_rewin,
1214 btrfs_header_backref_rev(eb));
1215 btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb));
1216 btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
1217 } else {
1218 eb_rewin = btrfs_clone_extent_buffer(eb);
1219 BUG_ON(!eb_rewin);
1220 }
1221
1222 extent_buffer_get(eb_rewin);
1223 free_extent_buffer(eb);
1224
1225 __tree_mod_log_rewind(eb_rewin, time_seq, tm);
1226 WARN_ON(btrfs_header_nritems(eb_rewin) >
1227 BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root));
1228
1229 return eb_rewin;
1230}
1231
1232
1233
1234
1235
1236
1237
1238
1239static inline struct extent_buffer *
1240get_old_root(struct btrfs_root *root, u64 time_seq)
1241{
1242 struct tree_mod_elem *tm;
1243 struct extent_buffer *eb;
1244 struct extent_buffer *old;
1245 struct tree_mod_root *old_root = NULL;
1246 u64 old_generation = 0;
1247 u64 logical;
1248 u32 blocksize;
1249
1250 eb = btrfs_read_lock_root_node(root);
1251 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
1252 if (!tm)
1253 return root->node;
1254
1255 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1256 old_root = &tm->old_root;
1257 old_generation = tm->generation;
1258 logical = old_root->logical;
1259 } else {
1260 logical = root->node->start;
1261 }
1262
1263 tm = tree_mod_log_search(root->fs_info, logical, time_seq);
1264 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1265 btrfs_tree_read_unlock(root->node);
1266 free_extent_buffer(root->node);
1267 blocksize = btrfs_level_size(root, old_root->level);
1268 old = read_tree_block(root, logical, blocksize, 0);
1269 if (!old) {
1270 pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
1271 logical);
1272 WARN_ON(1);
1273 } else {
1274 eb = btrfs_clone_extent_buffer(old);
1275 free_extent_buffer(old);
1276 }
1277 } else if (old_root) {
1278 btrfs_tree_read_unlock(root->node);
1279 free_extent_buffer(root->node);
1280 eb = alloc_dummy_extent_buffer(logical, root->nodesize);
1281 } else {
1282 eb = btrfs_clone_extent_buffer(root->node);
1283 btrfs_tree_read_unlock(root->node);
1284 free_extent_buffer(root->node);
1285 }
1286
1287 if (!eb)
1288 return NULL;
1289 extent_buffer_get(eb);
1290 btrfs_tree_read_lock(eb);
1291 if (old_root) {
1292 btrfs_set_header_bytenr(eb, eb->start);
1293 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
1294 btrfs_set_header_owner(eb, root->root_key.objectid);
1295 btrfs_set_header_level(eb, old_root->level);
1296 btrfs_set_header_generation(eb, old_generation);
1297 }
1298 if (tm)
1299 __tree_mod_log_rewind(eb, time_seq, tm);
1300 else
1301 WARN_ON(btrfs_header_level(eb) != 0);
1302 WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root));
1303
1304 return eb;
1305}
1306
1307int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
1308{
1309 struct tree_mod_elem *tm;
1310 int level;
1311
1312 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
1313 if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
1314 level = tm->old_root.level;
1315 } else {
1316 rcu_read_lock();
1317 level = btrfs_header_level(root->node);
1318 rcu_read_unlock();
1319 }
1320
1321 return level;
1322}
1323
1324static inline int should_cow_block(struct btrfs_trans_handle *trans,
1325 struct btrfs_root *root,
1326 struct extent_buffer *buf)
1327{
1328
1329 smp_rmb();
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342 if (btrfs_header_generation(buf) == trans->transid &&
1343 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
1344 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
1345 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
1346 !root->force_cow)
1347 return 0;
1348 return 1;
1349}
1350
1351
1352
1353
1354
1355
1356noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
1357 struct btrfs_root *root, struct extent_buffer *buf,
1358 struct extent_buffer *parent, int parent_slot,
1359 struct extent_buffer **cow_ret)
1360{
1361 u64 search_start;
1362 int ret;
1363
1364 if (trans->transaction != root->fs_info->running_transaction) {
1365 printk(KERN_CRIT "trans %llu running %llu\n",
1366 (unsigned long long)trans->transid,
1367 (unsigned long long)
1368 root->fs_info->running_transaction->transid);
1369 WARN_ON(1);
1370 }
1371 if (trans->transid != root->fs_info->generation) {
1372 printk(KERN_CRIT "trans %llu running %llu\n",
1373 (unsigned long long)trans->transid,
1374 (unsigned long long)root->fs_info->generation);
1375 WARN_ON(1);
1376 }
1377
1378 if (!should_cow_block(trans, root, buf)) {
1379 *cow_ret = buf;
1380 return 0;
1381 }
1382
1383 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
1384
1385 if (parent)
1386 btrfs_set_lock_blocking(parent);
1387 btrfs_set_lock_blocking(buf);
1388
1389 ret = __btrfs_cow_block(trans, root, buf, parent,
1390 parent_slot, cow_ret, search_start, 0);
1391
1392 trace_btrfs_cow_block(root, buf, *cow_ret);
1393
1394 return ret;
1395}
1396
1397
1398
1399
1400
1401static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
1402{
1403 if (blocknr < other && other - (blocknr + blocksize) < 32768)
1404 return 1;
1405 if (blocknr > other && blocknr - (other + blocksize) < 32768)
1406 return 1;
1407 return 0;
1408}
1409
1410
1411
1412
1413static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
1414{
1415 struct btrfs_key k1;
1416
1417 btrfs_disk_key_to_cpu(&k1, disk);
1418
1419 return btrfs_comp_cpu_keys(&k1, k2);
1420}
1421
1422
1423
1424
1425int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
1426{
1427 if (k1->objectid > k2->objectid)
1428 return 1;
1429 if (k1->objectid < k2->objectid)
1430 return -1;
1431 if (k1->type > k2->type)
1432 return 1;
1433 if (k1->type < k2->type)
1434 return -1;
1435 if (k1->offset > k2->offset)
1436 return 1;
1437 if (k1->offset < k2->offset)
1438 return -1;
1439 return 0;
1440}
1441
1442
1443
1444
1445
1446
1447int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1448 struct btrfs_root *root, struct extent_buffer *parent,
1449 int start_slot, int cache_only, u64 *last_ret,
1450 struct btrfs_key *progress)
1451{
1452 struct extent_buffer *cur;
1453 u64 blocknr;
1454 u64 gen;
1455 u64 search_start = *last_ret;
1456 u64 last_block = 0;
1457 u64 other;
1458 u32 parent_nritems;
1459 int end_slot;
1460 int i;
1461 int err = 0;
1462 int parent_level;
1463 int uptodate;
1464 u32 blocksize;
1465 int progress_passed = 0;
1466 struct btrfs_disk_key disk_key;
1467
1468 parent_level = btrfs_header_level(parent);
1469 if (cache_only && parent_level != 1)
1470 return 0;
1471
1472 if (trans->transaction != root->fs_info->running_transaction)
1473 WARN_ON(1);
1474 if (trans->transid != root->fs_info->generation)
1475 WARN_ON(1);
1476
1477 parent_nritems = btrfs_header_nritems(parent);
1478 blocksize = btrfs_level_size(root, parent_level - 1);
1479 end_slot = parent_nritems;
1480
1481 if (parent_nritems == 1)
1482 return 0;
1483
1484 btrfs_set_lock_blocking(parent);
1485
1486 for (i = start_slot; i < end_slot; i++) {
1487 int close = 1;
1488
1489 btrfs_node_key(parent, &disk_key, i);
1490 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
1491 continue;
1492
1493 progress_passed = 1;
1494 blocknr = btrfs_node_blockptr(parent, i);
1495 gen = btrfs_node_ptr_generation(parent, i);
1496 if (last_block == 0)
1497 last_block = blocknr;
1498
1499 if (i > 0) {
1500 other = btrfs_node_blockptr(parent, i - 1);
1501 close = close_blocks(blocknr, other, blocksize);
1502 }
1503 if (!close && i < end_slot - 2) {
1504 other = btrfs_node_blockptr(parent, i + 1);
1505 close = close_blocks(blocknr, other, blocksize);
1506 }
1507 if (close) {
1508 last_block = blocknr;
1509 continue;
1510 }
1511
1512 cur = btrfs_find_tree_block(root, blocknr, blocksize);
1513 if (cur)
1514 uptodate = btrfs_buffer_uptodate(cur, gen, 0);
1515 else
1516 uptodate = 0;
1517 if (!cur || !uptodate) {
1518 if (cache_only) {
1519 free_extent_buffer(cur);
1520 continue;
1521 }
1522 if (!cur) {
1523 cur = read_tree_block(root, blocknr,
1524 blocksize, gen);
1525 if (!cur)
1526 return -EIO;
1527 } else if (!uptodate) {
1528 err = btrfs_read_buffer(cur, gen);
1529 if (err) {
1530 free_extent_buffer(cur);
1531 return err;
1532 }
1533 }
1534 }
1535 if (search_start == 0)
1536 search_start = last_block;
1537
1538 btrfs_tree_lock(cur);
1539 btrfs_set_lock_blocking(cur);
1540 err = __btrfs_cow_block(trans, root, cur, parent, i,
1541 &cur, search_start,
1542 min(16 * blocksize,
1543 (end_slot - i) * blocksize));
1544 if (err) {
1545 btrfs_tree_unlock(cur);
1546 free_extent_buffer(cur);
1547 break;
1548 }
1549 search_start = cur->start;
1550 last_block = cur->start;
1551 *last_ret = search_start;
1552 btrfs_tree_unlock(cur);
1553 free_extent_buffer(cur);
1554 }
1555 return err;
1556}
1557
1558
1559
1560
1561
1562
1563static inline unsigned int leaf_data_end(struct btrfs_root *root,
1564 struct extent_buffer *leaf)
1565{
1566 u32 nr = btrfs_header_nritems(leaf);
1567 if (nr == 0)
1568 return BTRFS_LEAF_DATA_SIZE(root);
1569 return btrfs_item_offset_nr(leaf, nr - 1);
1570}
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583static noinline int generic_bin_search(struct extent_buffer *eb,
1584 unsigned long p,
1585 int item_size, struct btrfs_key *key,
1586 int max, int *slot)
1587{
1588 int low = 0;
1589 int high = max;
1590 int mid;
1591 int ret;
1592 struct btrfs_disk_key *tmp = NULL;
1593 struct btrfs_disk_key unaligned;
1594 unsigned long offset;
1595 char *kaddr = NULL;
1596 unsigned long map_start = 0;
1597 unsigned long map_len = 0;
1598 int err;
1599
1600 while (low < high) {
1601 mid = (low + high) / 2;
1602 offset = p + mid * item_size;
1603
1604 if (!kaddr || offset < map_start ||
1605 (offset + sizeof(struct btrfs_disk_key)) >
1606 map_start + map_len) {
1607
1608 err = map_private_extent_buffer(eb, offset,
1609 sizeof(struct btrfs_disk_key),
1610 &kaddr, &map_start, &map_len);
1611
1612 if (!err) {
1613 tmp = (struct btrfs_disk_key *)(kaddr + offset -
1614 map_start);
1615 } else {
1616 read_extent_buffer(eb, &unaligned,
1617 offset, sizeof(unaligned));
1618 tmp = &unaligned;
1619 }
1620
1621 } else {
1622 tmp = (struct btrfs_disk_key *)(kaddr + offset -
1623 map_start);
1624 }
1625 ret = comp_keys(tmp, key);
1626
1627 if (ret < 0)
1628 low = mid + 1;
1629 else if (ret > 0)
1630 high = mid;
1631 else {
1632 *slot = mid;
1633 return 0;
1634 }
1635 }
1636 *slot = low;
1637 return 1;
1638}
1639
1640
1641
1642
1643
1644static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
1645 int level, int *slot)
1646{
1647 if (level == 0)
1648 return generic_bin_search(eb,
1649 offsetof(struct btrfs_leaf, items),
1650 sizeof(struct btrfs_item),
1651 key, btrfs_header_nritems(eb),
1652 slot);
1653 else
1654 return generic_bin_search(eb,
1655 offsetof(struct btrfs_node, ptrs),
1656 sizeof(struct btrfs_key_ptr),
1657 key, btrfs_header_nritems(eb),
1658 slot);
1659}
1660
1661int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
1662 int level, int *slot)
1663{
1664 return bin_search(eb, key, level, slot);
1665}
1666
1667static void root_add_used(struct btrfs_root *root, u32 size)
1668{
1669 spin_lock(&root->accounting_lock);
1670 btrfs_set_root_used(&root->root_item,
1671 btrfs_root_used(&root->root_item) + size);
1672 spin_unlock(&root->accounting_lock);
1673}
1674
1675static void root_sub_used(struct btrfs_root *root, u32 size)
1676{
1677 spin_lock(&root->accounting_lock);
1678 btrfs_set_root_used(&root->root_item,
1679 btrfs_root_used(&root->root_item) - size);
1680 spin_unlock(&root->accounting_lock);
1681}
1682
1683
1684
1685
1686
1687static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
1688 struct extent_buffer *parent, int slot)
1689{
1690 int level = btrfs_header_level(parent);
1691 if (slot < 0)
1692 return NULL;
1693 if (slot >= btrfs_header_nritems(parent))
1694 return NULL;
1695
1696 BUG_ON(level == 0);
1697
1698 return read_tree_block(root, btrfs_node_blockptr(parent, slot),
1699 btrfs_level_size(root, level - 1),
1700 btrfs_node_ptr_generation(parent, slot));
1701}
1702
1703
1704
1705
1706
1707
1708static noinline int balance_level(struct btrfs_trans_handle *trans,
1709 struct btrfs_root *root,
1710 struct btrfs_path *path, int level)
1711{
1712 struct extent_buffer *right = NULL;
1713 struct extent_buffer *mid;
1714 struct extent_buffer *left = NULL;
1715 struct extent_buffer *parent = NULL;
1716 int ret = 0;
1717 int wret;
1718 int pslot;
1719 int orig_slot = path->slots[level];
1720 u64 orig_ptr;
1721
1722 if (level == 0)
1723 return 0;
1724
1725 mid = path->nodes[level];
1726
1727 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
1728 path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
1729 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1730
1731 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
1732
1733 if (level < BTRFS_MAX_LEVEL - 1) {
1734 parent = path->nodes[level + 1];
1735 pslot = path->slots[level + 1];
1736 }
1737
1738
1739
1740
1741
1742 if (!parent) {
1743 struct extent_buffer *child;
1744
1745 if (btrfs_header_nritems(mid) != 1)
1746 return 0;
1747
1748
1749 child = read_node_slot(root, mid, 0);
1750 if (!child) {
1751 ret = -EROFS;
1752 btrfs_std_error(root->fs_info, ret);
1753 goto enospc;
1754 }
1755
1756 btrfs_tree_lock(child);
1757 btrfs_set_lock_blocking(child);
1758 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
1759 if (ret) {
1760 btrfs_tree_unlock(child);
1761 free_extent_buffer(child);
1762 goto enospc;
1763 }
1764
1765 tree_mod_log_free_eb(root->fs_info, root->node);
1766 tree_mod_log_set_root_pointer(root, child);
1767 rcu_assign_pointer(root->node, child);
1768
1769 add_root_to_dirty_list(root);
1770 btrfs_tree_unlock(child);
1771
1772 path->locks[level] = 0;
1773 path->nodes[level] = NULL;
1774 clean_tree_block(trans, root, mid);
1775 btrfs_tree_unlock(mid);
1776
1777 free_extent_buffer(mid);
1778
1779 root_sub_used(root, mid->len);
1780 btrfs_free_tree_block(trans, root, mid, 0, 1);
1781
1782 free_extent_buffer_stale(mid);
1783 return 0;
1784 }
1785 if (btrfs_header_nritems(mid) >
1786 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
1787 return 0;
1788
1789 left = read_node_slot(root, parent, pslot - 1);
1790 if (left) {
1791 btrfs_tree_lock(left);
1792 btrfs_set_lock_blocking(left);
1793 wret = btrfs_cow_block(trans, root, left,
1794 parent, pslot - 1, &left);
1795 if (wret) {
1796 ret = wret;
1797 goto enospc;
1798 }
1799 }
1800 right = read_node_slot(root, parent, pslot + 1);
1801 if (right) {
1802 btrfs_tree_lock(right);
1803 btrfs_set_lock_blocking(right);
1804 wret = btrfs_cow_block(trans, root, right,
1805 parent, pslot + 1, &right);
1806 if (wret) {
1807 ret = wret;
1808 goto enospc;
1809 }
1810 }
1811
1812
1813 if (left) {
1814 orig_slot += btrfs_header_nritems(left);
1815 wret = push_node_left(trans, root, left, mid, 1);
1816 if (wret < 0)
1817 ret = wret;
1818 }
1819
1820
1821
1822
1823 if (right) {
1824 wret = push_node_left(trans, root, mid, right, 1);
1825 if (wret < 0 && wret != -ENOSPC)
1826 ret = wret;
1827 if (btrfs_header_nritems(right) == 0) {
1828 clean_tree_block(trans, root, right);
1829 btrfs_tree_unlock(right);
1830 del_ptr(trans, root, path, level + 1, pslot + 1, 1);
1831 root_sub_used(root, right->len);
1832 btrfs_free_tree_block(trans, root, right, 0, 1);
1833 free_extent_buffer_stale(right);
1834 right = NULL;
1835 } else {
1836 struct btrfs_disk_key right_key;
1837 btrfs_node_key(right, &right_key, 0);
1838 tree_mod_log_set_node_key(root->fs_info, parent,
1839 &right_key, pslot + 1, 0);
1840 btrfs_set_node_key(parent, &right_key, pslot + 1);
1841 btrfs_mark_buffer_dirty(parent);
1842 }
1843 }
1844 if (btrfs_header_nritems(mid) == 1) {
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854 if (!left) {
1855 ret = -EROFS;
1856 btrfs_std_error(root->fs_info, ret);
1857 goto enospc;
1858 }
1859 wret = balance_node_right(trans, root, mid, left);
1860 if (wret < 0) {
1861 ret = wret;
1862 goto enospc;
1863 }
1864 if (wret == 1) {
1865 wret = push_node_left(trans, root, left, mid, 1);
1866 if (wret < 0)
1867 ret = wret;
1868 }
1869 BUG_ON(wret == 1);
1870 }
1871 if (btrfs_header_nritems(mid) == 0) {
1872 clean_tree_block(trans, root, mid);
1873 btrfs_tree_unlock(mid);
1874 del_ptr(trans, root, path, level + 1, pslot, 1);
1875 root_sub_used(root, mid->len);
1876 btrfs_free_tree_block(trans, root, mid, 0, 1);
1877 free_extent_buffer_stale(mid);
1878 mid = NULL;
1879 } else {
1880
1881 struct btrfs_disk_key mid_key;
1882 btrfs_node_key(mid, &mid_key, 0);
1883 tree_mod_log_set_node_key(root->fs_info, parent, &mid_key,
1884 pslot, 0);
1885 btrfs_set_node_key(parent, &mid_key, pslot);
1886 btrfs_mark_buffer_dirty(parent);
1887 }
1888
1889
1890 if (left) {
1891 if (btrfs_header_nritems(left) > orig_slot) {
1892 extent_buffer_get(left);
1893
1894 path->nodes[level] = left;
1895 path->slots[level + 1] -= 1;
1896 path->slots[level] = orig_slot;
1897 if (mid) {
1898 btrfs_tree_unlock(mid);
1899 free_extent_buffer(mid);
1900 }
1901 } else {
1902 orig_slot -= btrfs_header_nritems(left);
1903 path->slots[level] = orig_slot;
1904 }
1905 }
1906
1907 if (orig_ptr !=
1908 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1909 BUG();
1910enospc:
1911 if (right) {
1912 btrfs_tree_unlock(right);
1913 free_extent_buffer(right);
1914 }
1915 if (left) {
1916 if (path->nodes[level] != left)
1917 btrfs_tree_unlock(left);
1918 free_extent_buffer(left);
1919 }
1920 return ret;
1921}
1922
1923
1924
1925
1926
1927static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1928 struct btrfs_root *root,
1929 struct btrfs_path *path, int level)
1930{
1931 struct extent_buffer *right = NULL;
1932 struct extent_buffer *mid;
1933 struct extent_buffer *left = NULL;
1934 struct extent_buffer *parent = NULL;
1935 int ret = 0;
1936 int wret;
1937 int pslot;
1938 int orig_slot = path->slots[level];
1939
1940 if (level == 0)
1941 return 1;
1942
1943 mid = path->nodes[level];
1944 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1945
1946 if (level < BTRFS_MAX_LEVEL - 1) {
1947 parent = path->nodes[level + 1];
1948 pslot = path->slots[level + 1];
1949 }
1950
1951 if (!parent)
1952 return 1;
1953
1954 left = read_node_slot(root, parent, pslot - 1);
1955
1956
1957 if (left) {
1958 u32 left_nr;
1959
1960 btrfs_tree_lock(left);
1961 btrfs_set_lock_blocking(left);
1962
1963 left_nr = btrfs_header_nritems(left);
1964 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1965 wret = 1;
1966 } else {
1967 ret = btrfs_cow_block(trans, root, left, parent,
1968 pslot - 1, &left);
1969 if (ret)
1970 wret = 1;
1971 else {
1972 wret = push_node_left(trans, root,
1973 left, mid, 0);
1974 }
1975 }
1976 if (wret < 0)
1977 ret = wret;
1978 if (wret == 0) {
1979 struct btrfs_disk_key disk_key;
1980 orig_slot += left_nr;
1981 btrfs_node_key(mid, &disk_key, 0);
1982 tree_mod_log_set_node_key(root->fs_info, parent,
1983 &disk_key, pslot, 0);
1984 btrfs_set_node_key(parent, &disk_key, pslot);
1985 btrfs_mark_buffer_dirty(parent);
1986 if (btrfs_header_nritems(left) > orig_slot) {
1987 path->nodes[level] = left;
1988 path->slots[level + 1] -= 1;
1989 path->slots[level] = orig_slot;
1990 btrfs_tree_unlock(mid);
1991 free_extent_buffer(mid);
1992 } else {
1993 orig_slot -=
1994 btrfs_header_nritems(left);
1995 path->slots[level] = orig_slot;
1996 btrfs_tree_unlock(left);
1997 free_extent_buffer(left);
1998 }
1999 return 0;
2000 }
2001 btrfs_tree_unlock(left);
2002 free_extent_buffer(left);
2003 }
2004 right = read_node_slot(root, parent, pslot + 1);
2005
2006
2007
2008
2009 if (right) {
2010 u32 right_nr;
2011
2012 btrfs_tree_lock(right);
2013 btrfs_set_lock_blocking(right);
2014
2015 right_nr = btrfs_header_nritems(right);
2016 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
2017 wret = 1;
2018 } else {
2019 ret = btrfs_cow_block(trans, root, right,
2020 parent, pslot + 1,
2021 &right);
2022 if (ret)
2023 wret = 1;
2024 else {
2025 wret = balance_node_right(trans, root,
2026 right, mid);
2027 }
2028 }
2029 if (wret < 0)
2030 ret = wret;
2031 if (wret == 0) {
2032 struct btrfs_disk_key disk_key;
2033
2034 btrfs_node_key(right, &disk_key, 0);
2035 tree_mod_log_set_node_key(root->fs_info, parent,
2036 &disk_key, pslot + 1, 0);
2037 btrfs_set_node_key(parent, &disk_key, pslot + 1);
2038 btrfs_mark_buffer_dirty(parent);
2039
2040 if (btrfs_header_nritems(mid) <= orig_slot) {
2041 path->nodes[level] = right;
2042 path->slots[level + 1] += 1;
2043 path->slots[level] = orig_slot -
2044 btrfs_header_nritems(mid);
2045 btrfs_tree_unlock(mid);
2046 free_extent_buffer(mid);
2047 } else {
2048 btrfs_tree_unlock(right);
2049 free_extent_buffer(right);
2050 }
2051 return 0;
2052 }
2053 btrfs_tree_unlock(right);
2054 free_extent_buffer(right);
2055 }
2056 return 1;
2057}
2058
2059
2060
2061
2062
2063static void reada_for_search(struct btrfs_root *root,
2064 struct btrfs_path *path,
2065 int level, int slot, u64 objectid)
2066{
2067 struct extent_buffer *node;
2068 struct btrfs_disk_key disk_key;
2069 u32 nritems;
2070 u64 search;
2071 u64 target;
2072 u64 nread = 0;
2073 u64 gen;
2074 int direction = path->reada;
2075 struct extent_buffer *eb;
2076 u32 nr;
2077 u32 blocksize;
2078 u32 nscan = 0;
2079
2080 if (level != 1)
2081 return;
2082
2083 if (!path->nodes[level])
2084 return;
2085
2086 node = path->nodes[level];
2087
2088 search = btrfs_node_blockptr(node, slot);
2089 blocksize = btrfs_level_size(root, level - 1);
2090 eb = btrfs_find_tree_block(root, search, blocksize);
2091 if (eb) {
2092 free_extent_buffer(eb);
2093 return;
2094 }
2095
2096 target = search;
2097
2098 nritems = btrfs_header_nritems(node);
2099 nr = slot;
2100
2101 while (1) {
2102 if (direction < 0) {
2103 if (nr == 0)
2104 break;
2105 nr--;
2106 } else if (direction > 0) {
2107 nr++;
2108 if (nr >= nritems)
2109 break;
2110 }
2111 if (path->reada < 0 && objectid) {
2112 btrfs_node_key(node, &disk_key, nr);
2113 if (btrfs_disk_key_objectid(&disk_key) != objectid)
2114 break;
2115 }
2116 search = btrfs_node_blockptr(node, nr);
2117 if ((search <= target && target - search <= 65536) ||
2118 (search > target && search - target <= 65536)) {
2119 gen = btrfs_node_ptr_generation(node, nr);
2120 readahead_tree_block(root, search, blocksize, gen);
2121 nread += blocksize;
2122 }
2123 nscan++;
2124 if ((nread > 65536 || nscan > 32))
2125 break;
2126 }
2127}
2128
2129
2130
2131
2132
2133static noinline int reada_for_balance(struct btrfs_root *root,
2134 struct btrfs_path *path, int level)
2135{
2136 int slot;
2137 int nritems;
2138 struct extent_buffer *parent;
2139 struct extent_buffer *eb;
2140 u64 gen;
2141 u64 block1 = 0;
2142 u64 block2 = 0;
2143 int ret = 0;
2144 int blocksize;
2145
2146 parent = path->nodes[level + 1];
2147 if (!parent)
2148 return 0;
2149
2150 nritems = btrfs_header_nritems(parent);
2151 slot = path->slots[level + 1];
2152 blocksize = btrfs_level_size(root, level);
2153
2154 if (slot > 0) {
2155 block1 = btrfs_node_blockptr(parent, slot - 1);
2156 gen = btrfs_node_ptr_generation(parent, slot - 1);
2157 eb = btrfs_find_tree_block(root, block1, blocksize);
2158
2159
2160
2161
2162
2163 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
2164 block1 = 0;
2165 free_extent_buffer(eb);
2166 }
2167 if (slot + 1 < nritems) {
2168 block2 = btrfs_node_blockptr(parent, slot + 1);
2169 gen = btrfs_node_ptr_generation(parent, slot + 1);
2170 eb = btrfs_find_tree_block(root, block2, blocksize);
2171 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
2172 block2 = 0;
2173 free_extent_buffer(eb);
2174 }
2175 if (block1 || block2) {
2176 ret = -EAGAIN;
2177
2178
2179 btrfs_release_path(path);
2180
2181
2182 if (block1)
2183 readahead_tree_block(root, block1, blocksize, 0);
2184 if (block2)
2185 readahead_tree_block(root, block2, blocksize, 0);
2186
2187 if (block1) {
2188 eb = read_tree_block(root, block1, blocksize, 0);
2189 free_extent_buffer(eb);
2190 }
2191 if (block2) {
2192 eb = read_tree_block(root, block2, blocksize, 0);
2193 free_extent_buffer(eb);
2194 }
2195 }
2196 return ret;
2197}
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213static noinline void unlock_up(struct btrfs_path *path, int level,
2214 int lowest_unlock, int min_write_lock_level,
2215 int *write_lock_level)
2216{
2217 int i;
2218 int skip_level = level;
2219 int no_skips = 0;
2220 struct extent_buffer *t;
2221
2222 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2223 if (!path->nodes[i])
2224 break;
2225 if (!path->locks[i])
2226 break;
2227 if (!no_skips && path->slots[i] == 0) {
2228 skip_level = i + 1;
2229 continue;
2230 }
2231 if (!no_skips && path->keep_locks) {
2232 u32 nritems;
2233 t = path->nodes[i];
2234 nritems = btrfs_header_nritems(t);
2235 if (nritems < 1 || path->slots[i] >= nritems - 1) {
2236 skip_level = i + 1;
2237 continue;
2238 }
2239 }
2240 if (skip_level < i && i >= lowest_unlock)
2241 no_skips = 1;
2242
2243 t = path->nodes[i];
2244 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
2245 btrfs_tree_unlock_rw(t, path->locks[i]);
2246 path->locks[i] = 0;
2247 if (write_lock_level &&
2248 i > min_write_lock_level &&
2249 i <= *write_lock_level) {
2250 *write_lock_level = i - 1;
2251 }
2252 }
2253 }
2254}
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
2266{
2267 int i;
2268
2269 if (path->keep_locks)
2270 return;
2271
2272 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2273 if (!path->nodes[i])
2274 continue;
2275 if (!path->locks[i])
2276 continue;
2277 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
2278 path->locks[i] = 0;
2279 }
2280}
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290static int
2291read_block_for_search(struct btrfs_trans_handle *trans,
2292 struct btrfs_root *root, struct btrfs_path *p,
2293 struct extent_buffer **eb_ret, int level, int slot,
2294 struct btrfs_key *key, u64 time_seq)
2295{
2296 u64 blocknr;
2297 u64 gen;
2298 u32 blocksize;
2299 struct extent_buffer *b = *eb_ret;
2300 struct extent_buffer *tmp;
2301 int ret;
2302
2303 blocknr = btrfs_node_blockptr(b, slot);
2304 gen = btrfs_node_ptr_generation(b, slot);
2305 blocksize = btrfs_level_size(root, level - 1);
2306
2307 tmp = btrfs_find_tree_block(root, blocknr, blocksize);
2308 if (tmp) {
2309
2310 if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) {
2311 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
2312
2313
2314
2315
2316
2317 *eb_ret = tmp;
2318 return 0;
2319 }
2320
2321
2322
2323
2324
2325
2326 free_extent_buffer(tmp);
2327 btrfs_set_path_blocking(p);
2328
2329
2330 tmp = read_tree_block(root, blocknr, blocksize, gen);
2331 if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) {
2332 *eb_ret = tmp;
2333 return 0;
2334 }
2335 free_extent_buffer(tmp);
2336 btrfs_release_path(p);
2337 return -EIO;
2338 }
2339 }
2340
2341
2342
2343
2344
2345
2346
2347
2348 btrfs_unlock_up_safe(p, level + 1);
2349 btrfs_set_path_blocking(p);
2350
2351 free_extent_buffer(tmp);
2352 if (p->reada)
2353 reada_for_search(root, p, level, slot, key->objectid);
2354
2355 btrfs_release_path(p);
2356
2357 ret = -EAGAIN;
2358 tmp = read_tree_block(root, blocknr, blocksize, 0);
2359 if (tmp) {
2360
2361
2362
2363
2364
2365
2366 if (!btrfs_buffer_uptodate(tmp, 0, 0))
2367 ret = -EIO;
2368 free_extent_buffer(tmp);
2369 }
2370 return ret;
2371}
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382static int
2383setup_nodes_for_search(struct btrfs_trans_handle *trans,
2384 struct btrfs_root *root, struct btrfs_path *p,
2385 struct extent_buffer *b, int level, int ins_len,
2386 int *write_lock_level)
2387{
2388 int ret;
2389 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
2390 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
2391 int sret;
2392
2393 if (*write_lock_level < level + 1) {
2394 *write_lock_level = level + 1;
2395 btrfs_release_path(p);
2396 goto again;
2397 }
2398
2399 sret = reada_for_balance(root, p, level);
2400 if (sret)
2401 goto again;
2402
2403 btrfs_set_path_blocking(p);
2404 sret = split_node(trans, root, p, level);
2405 btrfs_clear_path_blocking(p, NULL, 0);
2406
2407 BUG_ON(sret > 0);
2408 if (sret) {
2409 ret = sret;
2410 goto done;
2411 }
2412 b = p->nodes[level];
2413 } else if (ins_len < 0 && btrfs_header_nritems(b) <
2414 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
2415 int sret;
2416
2417 if (*write_lock_level < level + 1) {
2418 *write_lock_level = level + 1;
2419 btrfs_release_path(p);
2420 goto again;
2421 }
2422
2423 sret = reada_for_balance(root, p, level);
2424 if (sret)
2425 goto again;
2426
2427 btrfs_set_path_blocking(p);
2428 sret = balance_level(trans, root, p, level);
2429 btrfs_clear_path_blocking(p, NULL, 0);
2430
2431 if (sret) {
2432 ret = sret;
2433 goto done;
2434 }
2435 b = p->nodes[level];
2436 if (!b) {
2437 btrfs_release_path(p);
2438 goto again;
2439 }
2440 BUG_ON(btrfs_header_nritems(b) == 1);
2441 }
2442 return 0;
2443
2444again:
2445 ret = -EAGAIN;
2446done:
2447 return ret;
2448}
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2464 *root, struct btrfs_key *key, struct btrfs_path *p, int
2465 ins_len, int cow)
2466{
2467 struct extent_buffer *b;
2468 int slot;
2469 int ret;
2470 int err;
2471 int level;
2472 int lowest_unlock = 1;
2473 int root_lock;
2474
2475 int write_lock_level = 0;
2476 u8 lowest_level = 0;
2477 int min_write_lock_level;
2478
2479 lowest_level = p->lowest_level;
2480 WARN_ON(lowest_level && ins_len > 0);
2481 WARN_ON(p->nodes[0] != NULL);
2482
2483 if (ins_len < 0) {
2484 lowest_unlock = 2;
2485
2486
2487
2488
2489
2490 write_lock_level = 2;
2491 } else if (ins_len > 0) {
2492
2493
2494
2495
2496 write_lock_level = 1;
2497 }
2498
2499 if (!cow)
2500 write_lock_level = -1;
2501
2502 if (cow && (p->keep_locks || p->lowest_level))
2503 write_lock_level = BTRFS_MAX_LEVEL;
2504
2505 min_write_lock_level = write_lock_level;
2506
2507again:
2508
2509
2510
2511 root_lock = BTRFS_READ_LOCK;
2512 level = 0;
2513 if (p->search_commit_root) {
2514
2515
2516
2517
2518 b = root->commit_root;
2519 extent_buffer_get(b);
2520 level = btrfs_header_level(b);
2521 if (!p->skip_locking)
2522 btrfs_tree_read_lock(b);
2523 } else {
2524 if (p->skip_locking) {
2525 b = btrfs_root_node(root);
2526 level = btrfs_header_level(b);
2527 } else {
2528
2529
2530
2531 b = btrfs_read_lock_root_node(root);
2532 level = btrfs_header_level(b);
2533 if (level <= write_lock_level) {
2534
2535 btrfs_tree_read_unlock(b);
2536 free_extent_buffer(b);
2537 b = btrfs_lock_root_node(root);
2538 root_lock = BTRFS_WRITE_LOCK;
2539
2540
2541 level = btrfs_header_level(b);
2542 }
2543 }
2544 }
2545 p->nodes[level] = b;
2546 if (!p->skip_locking)
2547 p->locks[level] = root_lock;
2548
2549 while (b) {
2550 level = btrfs_header_level(b);
2551
2552
2553
2554
2555
2556 if (cow) {
2557
2558
2559
2560
2561
2562 if (!should_cow_block(trans, root, b))
2563 goto cow_done;
2564
2565 btrfs_set_path_blocking(p);
2566
2567
2568
2569
2570
2571 if (level + 1 > write_lock_level) {
2572 write_lock_level = level + 1;
2573 btrfs_release_path(p);
2574 goto again;
2575 }
2576
2577 err = btrfs_cow_block(trans, root, b,
2578 p->nodes[level + 1],
2579 p->slots[level + 1], &b);
2580 if (err) {
2581 ret = err;
2582 goto done;
2583 }
2584 }
2585cow_done:
2586 BUG_ON(!cow && ins_len);
2587
2588 p->nodes[level] = b;
2589 btrfs_clear_path_blocking(p, NULL, 0);
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602 if (!cow)
2603 btrfs_unlock_up_safe(p, level + 1);
2604
2605 ret = bin_search(b, key, level, &slot);
2606
2607 if (level != 0) {
2608 int dec = 0;
2609 if (ret && slot > 0) {
2610 dec = 1;
2611 slot -= 1;
2612 }
2613 p->slots[level] = slot;
2614 err = setup_nodes_for_search(trans, root, p, b, level,
2615 ins_len, &write_lock_level);
2616 if (err == -EAGAIN)
2617 goto again;
2618 if (err) {
2619 ret = err;
2620 goto done;
2621 }
2622 b = p->nodes[level];
2623 slot = p->slots[level];
2624
2625
2626
2627
2628
2629
2630
2631 if (slot == 0 && cow &&
2632 write_lock_level < level + 1) {
2633 write_lock_level = level + 1;
2634 btrfs_release_path(p);
2635 goto again;
2636 }
2637
2638 unlock_up(p, level, lowest_unlock,
2639 min_write_lock_level, &write_lock_level);
2640
2641 if (level == lowest_level) {
2642 if (dec)
2643 p->slots[level]++;
2644 goto done;
2645 }
2646
2647 err = read_block_for_search(trans, root, p,
2648 &b, level, slot, key, 0);
2649 if (err == -EAGAIN)
2650 goto again;
2651 if (err) {
2652 ret = err;
2653 goto done;
2654 }
2655
2656 if (!p->skip_locking) {
2657 level = btrfs_header_level(b);
2658 if (level <= write_lock_level) {
2659 err = btrfs_try_tree_write_lock(b);
2660 if (!err) {
2661 btrfs_set_path_blocking(p);
2662 btrfs_tree_lock(b);
2663 btrfs_clear_path_blocking(p, b,
2664 BTRFS_WRITE_LOCK);
2665 }
2666 p->locks[level] = BTRFS_WRITE_LOCK;
2667 } else {
2668 err = btrfs_try_tree_read_lock(b);
2669 if (!err) {
2670 btrfs_set_path_blocking(p);
2671 btrfs_tree_read_lock(b);
2672 btrfs_clear_path_blocking(p, b,
2673 BTRFS_READ_LOCK);
2674 }
2675 p->locks[level] = BTRFS_READ_LOCK;
2676 }
2677 p->nodes[level] = b;
2678 }
2679 } else {
2680 p->slots[level] = slot;
2681 if (ins_len > 0 &&
2682 btrfs_leaf_free_space(root, b) < ins_len) {
2683 if (write_lock_level < 1) {
2684 write_lock_level = 1;
2685 btrfs_release_path(p);
2686 goto again;
2687 }
2688
2689 btrfs_set_path_blocking(p);
2690 err = split_leaf(trans, root, key,
2691 p, ins_len, ret == 0);
2692 btrfs_clear_path_blocking(p, NULL, 0);
2693
2694 BUG_ON(err > 0);
2695 if (err) {
2696 ret = err;
2697 goto done;
2698 }
2699 }
2700 if (!p->search_for_split)
2701 unlock_up(p, level, lowest_unlock,
2702 min_write_lock_level, &write_lock_level);
2703 goto done;
2704 }
2705 }
2706 ret = 1;
2707done:
2708
2709
2710
2711
2712 if (!p->leave_spinning)
2713 btrfs_set_path_blocking(p);
2714 if (ret < 0)
2715 btrfs_release_path(p);
2716 return ret;
2717}
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2731 struct btrfs_path *p, u64 time_seq)
2732{
2733 struct extent_buffer *b;
2734 int slot;
2735 int ret;
2736 int err;
2737 int level;
2738 int lowest_unlock = 1;
2739 u8 lowest_level = 0;
2740
2741 lowest_level = p->lowest_level;
2742 WARN_ON(p->nodes[0] != NULL);
2743
2744 if (p->search_commit_root) {
2745 BUG_ON(time_seq);
2746 return btrfs_search_slot(NULL, root, key, p, 0, 0);
2747 }
2748
2749again:
2750 b = get_old_root(root, time_seq);
2751 level = btrfs_header_level(b);
2752 p->locks[level] = BTRFS_READ_LOCK;
2753
2754 while (b) {
2755 level = btrfs_header_level(b);
2756 p->nodes[level] = b;
2757 btrfs_clear_path_blocking(p, NULL, 0);
2758
2759
2760
2761
2762
2763
2764
2765 btrfs_unlock_up_safe(p, level + 1);
2766
2767 ret = bin_search(b, key, level, &slot);
2768
2769 if (level != 0) {
2770 int dec = 0;
2771 if (ret && slot > 0) {
2772 dec = 1;
2773 slot -= 1;
2774 }
2775 p->slots[level] = slot;
2776 unlock_up(p, level, lowest_unlock, 0, NULL);
2777
2778 if (level == lowest_level) {
2779 if (dec)
2780 p->slots[level]++;
2781 goto done;
2782 }
2783
2784 err = read_block_for_search(NULL, root, p, &b, level,
2785 slot, key, time_seq);
2786 if (err == -EAGAIN)
2787 goto again;
2788 if (err) {
2789 ret = err;
2790 goto done;
2791 }
2792
2793 level = btrfs_header_level(b);
2794 err = btrfs_try_tree_read_lock(b);
2795 if (!err) {
2796 btrfs_set_path_blocking(p);
2797 btrfs_tree_read_lock(b);
2798 btrfs_clear_path_blocking(p, b,
2799 BTRFS_READ_LOCK);
2800 }
2801 p->locks[level] = BTRFS_READ_LOCK;
2802 p->nodes[level] = b;
2803 b = tree_mod_log_rewind(root->fs_info, b, time_seq);
2804 if (b != p->nodes[level]) {
2805 btrfs_tree_unlock_rw(p->nodes[level],
2806 p->locks[level]);
2807 p->locks[level] = 0;
2808 p->nodes[level] = b;
2809 }
2810 } else {
2811 p->slots[level] = slot;
2812 unlock_up(p, level, lowest_unlock, 0, NULL);
2813 goto done;
2814 }
2815 }
2816 ret = 1;
2817done:
2818 if (!p->leave_spinning)
2819 btrfs_set_path_blocking(p);
2820 if (ret < 0)
2821 btrfs_release_path(p);
2822
2823 return ret;
2824}
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838int btrfs_search_slot_for_read(struct btrfs_root *root,
2839 struct btrfs_key *key, struct btrfs_path *p,
2840 int find_higher, int return_any)
2841{
2842 int ret;
2843 struct extent_buffer *leaf;
2844
2845again:
2846 ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
2847 if (ret <= 0)
2848 return ret;
2849
2850
2851
2852
2853
2854
2855
2856 leaf = p->nodes[0];
2857
2858 if (find_higher) {
2859 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
2860 ret = btrfs_next_leaf(root, p);
2861 if (ret <= 0)
2862 return ret;
2863 if (!return_any)
2864 return 1;
2865
2866
2867
2868
2869 return_any = 0;
2870 find_higher = 0;
2871 btrfs_release_path(p);
2872 goto again;
2873 }
2874 } else {
2875 if (p->slots[0] == 0) {
2876 ret = btrfs_prev_leaf(root, p);
2877 if (ret < 0)
2878 return ret;
2879 if (!ret) {
2880 p->slots[0] = btrfs_header_nritems(leaf) - 1;
2881 return 0;
2882 }
2883 if (!return_any)
2884 return 1;
2885
2886
2887
2888
2889 return_any = 0;
2890 find_higher = 1;
2891 btrfs_release_path(p);
2892 goto again;
2893 } else {
2894 --p->slots[0];
2895 }
2896 }
2897 return 0;
2898}
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908static void fixup_low_keys(struct btrfs_trans_handle *trans,
2909 struct btrfs_root *root, struct btrfs_path *path,
2910 struct btrfs_disk_key *key, int level)
2911{
2912 int i;
2913 struct extent_buffer *t;
2914
2915 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2916 int tslot = path->slots[i];
2917 if (!path->nodes[i])
2918 break;
2919 t = path->nodes[i];
2920 tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1);
2921 btrfs_set_node_key(t, key, tslot);
2922 btrfs_mark_buffer_dirty(path->nodes[i]);
2923 if (tslot != 0)
2924 break;
2925 }
2926}
2927
2928
2929
2930
2931
2932
2933
2934void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
2935 struct btrfs_root *root, struct btrfs_path *path,
2936 struct btrfs_key *new_key)
2937{
2938 struct btrfs_disk_key disk_key;
2939 struct extent_buffer *eb;
2940 int slot;
2941
2942 eb = path->nodes[0];
2943 slot = path->slots[0];
2944 if (slot > 0) {
2945 btrfs_item_key(eb, &disk_key, slot - 1);
2946 BUG_ON(comp_keys(&disk_key, new_key) >= 0);
2947 }
2948 if (slot < btrfs_header_nritems(eb) - 1) {
2949 btrfs_item_key(eb, &disk_key, slot + 1);
2950 BUG_ON(comp_keys(&disk_key, new_key) <= 0);
2951 }
2952
2953 btrfs_cpu_key_to_disk(&disk_key, new_key);
2954 btrfs_set_item_key(eb, &disk_key, slot);
2955 btrfs_mark_buffer_dirty(eb);
2956 if (slot == 0)
2957 fixup_low_keys(trans, root, path, &disk_key, 1);
2958}
2959
2960
2961
2962
2963
2964
2965
2966
2967static int push_node_left(struct btrfs_trans_handle *trans,
2968 struct btrfs_root *root, struct extent_buffer *dst,
2969 struct extent_buffer *src, int empty)
2970{
2971 int push_items = 0;
2972 int src_nritems;
2973 int dst_nritems;
2974 int ret = 0;
2975
2976 src_nritems = btrfs_header_nritems(src);
2977 dst_nritems = btrfs_header_nritems(dst);
2978 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
2979 WARN_ON(btrfs_header_generation(src) != trans->transid);
2980 WARN_ON(btrfs_header_generation(dst) != trans->transid);
2981
2982 if (!empty && src_nritems <= 8)
2983 return 1;
2984
2985 if (push_items <= 0)
2986 return 1;
2987
2988 if (empty) {
2989 push_items = min(src_nritems, push_items);
2990 if (push_items < src_nritems) {
2991
2992
2993
2994 if (src_nritems - push_items < 8) {
2995 if (push_items <= 8)
2996 return 1;
2997 push_items -= 8;
2998 }
2999 }
3000 } else
3001 push_items = min(src_nritems - 8, push_items);
3002
3003 tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
3004 push_items);
3005 copy_extent_buffer(dst, src,
3006 btrfs_node_key_ptr_offset(dst_nritems),
3007 btrfs_node_key_ptr_offset(0),
3008 push_items * sizeof(struct btrfs_key_ptr));
3009
3010 if (push_items < src_nritems) {
3011
3012
3013
3014
3015 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
3016 btrfs_node_key_ptr_offset(push_items),
3017 (src_nritems - push_items) *
3018 sizeof(struct btrfs_key_ptr));
3019 }
3020 btrfs_set_header_nritems(src, src_nritems - push_items);
3021 btrfs_set_header_nritems(dst, dst_nritems + push_items);
3022 btrfs_mark_buffer_dirty(src);
3023 btrfs_mark_buffer_dirty(dst);
3024
3025 return ret;
3026}
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037static int balance_node_right(struct btrfs_trans_handle *trans,
3038 struct btrfs_root *root,
3039 struct extent_buffer *dst,
3040 struct extent_buffer *src)
3041{
3042 int push_items = 0;
3043 int max_push;
3044 int src_nritems;
3045 int dst_nritems;
3046 int ret = 0;
3047
3048 WARN_ON(btrfs_header_generation(src) != trans->transid);
3049 WARN_ON(btrfs_header_generation(dst) != trans->transid);
3050
3051 src_nritems = btrfs_header_nritems(src);
3052 dst_nritems = btrfs_header_nritems(dst);
3053 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
3054 if (push_items <= 0)
3055 return 1;
3056
3057 if (src_nritems < 4)
3058 return 1;
3059
3060 max_push = src_nritems / 2 + 1;
3061
3062 if (max_push >= src_nritems)
3063 return 1;
3064
3065 if (max_push < push_items)
3066 push_items = max_push;
3067
3068 tree_mod_log_eb_move(root->fs_info, dst, push_items, 0, dst_nritems);
3069 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
3070 btrfs_node_key_ptr_offset(0),
3071 (dst_nritems) *
3072 sizeof(struct btrfs_key_ptr));
3073
3074 tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
3075 src_nritems - push_items, push_items);
3076 copy_extent_buffer(dst, src,
3077 btrfs_node_key_ptr_offset(0),
3078 btrfs_node_key_ptr_offset(src_nritems - push_items),
3079 push_items * sizeof(struct btrfs_key_ptr));
3080
3081 btrfs_set_header_nritems(src, src_nritems - push_items);
3082 btrfs_set_header_nritems(dst, dst_nritems + push_items);
3083
3084 btrfs_mark_buffer_dirty(src);
3085 btrfs_mark_buffer_dirty(dst);
3086
3087 return ret;
3088}
3089
3090
3091
3092
3093
3094
3095
3096
3097static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3098 struct btrfs_root *root,
3099 struct btrfs_path *path, int level)
3100{
3101 u64 lower_gen;
3102 struct extent_buffer *lower;
3103 struct extent_buffer *c;
3104 struct extent_buffer *old;
3105 struct btrfs_disk_key lower_key;
3106
3107 BUG_ON(path->nodes[level]);
3108 BUG_ON(path->nodes[level-1] != root->node);
3109
3110 lower = path->nodes[level-1];
3111 if (level == 1)
3112 btrfs_item_key(lower, &lower_key, 0);
3113 else
3114 btrfs_node_key(lower, &lower_key, 0);
3115
3116 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
3117 root->root_key.objectid, &lower_key,
3118 level, root->node->start, 0);
3119 if (IS_ERR(c))
3120 return PTR_ERR(c);
3121
3122 root_add_used(root, root->nodesize);
3123
3124 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
3125 btrfs_set_header_nritems(c, 1);
3126 btrfs_set_header_level(c, level);
3127 btrfs_set_header_bytenr(c, c->start);
3128 btrfs_set_header_generation(c, trans->transid);
3129 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
3130 btrfs_set_header_owner(c, root->root_key.objectid);
3131
3132 write_extent_buffer(c, root->fs_info->fsid,
3133 (unsigned long)btrfs_header_fsid(c),
3134 BTRFS_FSID_SIZE);
3135
3136 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
3137 (unsigned long)btrfs_header_chunk_tree_uuid(c),
3138 BTRFS_UUID_SIZE);
3139
3140 btrfs_set_node_key(c, &lower_key, 0);
3141 btrfs_set_node_blockptr(c, 0, lower->start);
3142 lower_gen = btrfs_header_generation(lower);
3143 WARN_ON(lower_gen != trans->transid);
3144
3145 btrfs_set_node_ptr_generation(c, 0, lower_gen);
3146
3147 btrfs_mark_buffer_dirty(c);
3148
3149 old = root->node;
3150 tree_mod_log_set_root_pointer(root, c);
3151 rcu_assign_pointer(root->node, c);
3152
3153
3154 free_extent_buffer(old);
3155
3156 add_root_to_dirty_list(root);
3157 extent_buffer_get(c);
3158 path->nodes[level] = c;
3159 path->locks[level] = BTRFS_WRITE_LOCK;
3160 path->slots[level] = 0;
3161 return 0;
3162}
3163
3164
3165
3166
3167
3168
3169
3170
3171static void insert_ptr(struct btrfs_trans_handle *trans,
3172 struct btrfs_root *root, struct btrfs_path *path,
3173 struct btrfs_disk_key *key, u64 bytenr,
3174 int slot, int level)
3175{
3176 struct extent_buffer *lower;
3177 int nritems;
3178 int ret;
3179
3180 BUG_ON(!path->nodes[level]);
3181 btrfs_assert_tree_locked(path->nodes[level]);
3182 lower = path->nodes[level];
3183 nritems = btrfs_header_nritems(lower);
3184 BUG_ON(slot > nritems);
3185 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
3186 if (slot != nritems) {
3187 if (level)
3188 tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
3189 slot, nritems - slot);
3190 memmove_extent_buffer(lower,
3191 btrfs_node_key_ptr_offset(slot + 1),
3192 btrfs_node_key_ptr_offset(slot),
3193 (nritems - slot) * sizeof(struct btrfs_key_ptr));
3194 }
3195 if (level) {
3196 ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
3197 MOD_LOG_KEY_ADD);
3198 BUG_ON(ret < 0);
3199 }
3200 btrfs_set_node_key(lower, key, slot);
3201 btrfs_set_node_blockptr(lower, slot, bytenr);
3202 WARN_ON(trans->transid == 0);
3203 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
3204 btrfs_set_header_nritems(lower, nritems + 1);
3205 btrfs_mark_buffer_dirty(lower);
3206}
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217static noinline int split_node(struct btrfs_trans_handle *trans,
3218 struct btrfs_root *root,
3219 struct btrfs_path *path, int level)
3220{
3221 struct extent_buffer *c;
3222 struct extent_buffer *split;
3223 struct btrfs_disk_key disk_key;
3224 int mid;
3225 int ret;
3226 u32 c_nritems;
3227
3228 c = path->nodes[level];
3229 WARN_ON(btrfs_header_generation(c) != trans->transid);
3230 if (c == root->node) {
3231
3232 ret = insert_new_root(trans, root, path, level + 1);
3233 if (ret)
3234 return ret;
3235 } else {
3236 ret = push_nodes_for_insert(trans, root, path, level);
3237 c = path->nodes[level];
3238 if (!ret && btrfs_header_nritems(c) <
3239 BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
3240 return 0;
3241 if (ret < 0)
3242 return ret;
3243 }
3244
3245 c_nritems = btrfs_header_nritems(c);
3246 mid = (c_nritems + 1) / 2;
3247 btrfs_node_key(c, &disk_key, mid);
3248
3249 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
3250 root->root_key.objectid,
3251 &disk_key, level, c->start, 0);
3252 if (IS_ERR(split))
3253 return PTR_ERR(split);
3254
3255 root_add_used(root, root->nodesize);
3256
3257 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
3258 btrfs_set_header_level(split, btrfs_header_level(c));
3259 btrfs_set_header_bytenr(split, split->start);
3260 btrfs_set_header_generation(split, trans->transid);
3261 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
3262 btrfs_set_header_owner(split, root->root_key.objectid);
3263 write_extent_buffer(split, root->fs_info->fsid,
3264 (unsigned long)btrfs_header_fsid(split),
3265 BTRFS_FSID_SIZE);
3266 write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
3267 (unsigned long)btrfs_header_chunk_tree_uuid(split),
3268 BTRFS_UUID_SIZE);
3269
3270 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
3271 copy_extent_buffer(split, c,
3272 btrfs_node_key_ptr_offset(0),
3273 btrfs_node_key_ptr_offset(mid),
3274 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
3275 btrfs_set_header_nritems(split, c_nritems - mid);
3276 btrfs_set_header_nritems(c, mid);
3277 ret = 0;
3278
3279 btrfs_mark_buffer_dirty(c);
3280 btrfs_mark_buffer_dirty(split);
3281
3282 insert_ptr(trans, root, path, &disk_key, split->start,
3283 path->slots[level + 1] + 1, level + 1);
3284
3285 if (path->slots[level] >= mid) {
3286 path->slots[level] -= mid;
3287 btrfs_tree_unlock(c);
3288 free_extent_buffer(c);
3289 path->nodes[level] = split;
3290 path->slots[level + 1] += 1;
3291 } else {
3292 btrfs_tree_unlock(split);
3293 free_extent_buffer(split);
3294 }
3295 return ret;
3296}
3297
3298
3299
3300
3301
3302
3303static int leaf_space_used(struct extent_buffer *l, int start, int nr)
3304{
3305 int data_len;
3306 int nritems = btrfs_header_nritems(l);
3307 int end = min(nritems, start + nr) - 1;
3308
3309 if (!nr)
3310 return 0;
3311 data_len = btrfs_item_end_nr(l, start);
3312 data_len = data_len - btrfs_item_offset_nr(l, end);
3313 data_len += sizeof(struct btrfs_item) * nr;
3314 WARN_ON(data_len < 0);
3315 return data_len;
3316}
3317
3318
3319
3320
3321
3322
3323noinline int btrfs_leaf_free_space(struct btrfs_root *root,
3324 struct extent_buffer *leaf)
3325{
3326 int nritems = btrfs_header_nritems(leaf);
3327 int ret;
3328 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
3329 if (ret < 0) {
3330 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
3331 "used %d nritems %d\n",
3332 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
3333 leaf_space_used(leaf, 0, nritems), nritems);
3334 }
3335 return ret;
3336}
3337
3338
3339
3340
3341
3342static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
3343 struct btrfs_root *root,
3344 struct btrfs_path *path,
3345 int data_size, int empty,
3346 struct extent_buffer *right,
3347 int free_space, u32 left_nritems,
3348 u32 min_slot)
3349{
3350 struct extent_buffer *left = path->nodes[0];
3351 struct extent_buffer *upper = path->nodes[1];
3352 struct btrfs_map_token token;
3353 struct btrfs_disk_key disk_key;
3354 int slot;
3355 u32 i;
3356 int push_space = 0;
3357 int push_items = 0;
3358 struct btrfs_item *item;
3359 u32 nr;
3360 u32 right_nritems;
3361 u32 data_end;
3362 u32 this_item_size;
3363
3364 btrfs_init_map_token(&token);
3365
3366 if (empty)
3367 nr = 0;
3368 else
3369 nr = max_t(u32, 1, min_slot);
3370
3371 if (path->slots[0] >= left_nritems)
3372 push_space += data_size;
3373
3374 slot = path->slots[1];
3375 i = left_nritems - 1;
3376 while (i >= nr) {
3377 item = btrfs_item_nr(left, i);
3378
3379 if (!empty && push_items > 0) {
3380 if (path->slots[0] > i)
3381 break;
3382 if (path->slots[0] == i) {
3383 int space = btrfs_leaf_free_space(root, left);
3384 if (space + push_space * 2 > free_space)
3385 break;
3386 }
3387 }
3388
3389 if (path->slots[0] == i)
3390 push_space += data_size;
3391
3392 this_item_size = btrfs_item_size(left, item);
3393 if (this_item_size + sizeof(*item) + push_space > free_space)
3394 break;
3395
3396 push_items++;
3397 push_space += this_item_size + sizeof(*item);
3398 if (i == 0)
3399 break;
3400 i--;
3401 }
3402
3403 if (push_items == 0)
3404 goto out_unlock;
3405
3406 if (!empty && push_items == left_nritems)
3407 WARN_ON(1);
3408
3409
3410 right_nritems = btrfs_header_nritems(right);
3411
3412 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
3413 push_space -= leaf_data_end(root, left);
3414
3415
3416 data_end = leaf_data_end(root, right);
3417 memmove_extent_buffer(right,
3418 btrfs_leaf_data(right) + data_end - push_space,
3419 btrfs_leaf_data(right) + data_end,
3420 BTRFS_LEAF_DATA_SIZE(root) - data_end);
3421
3422
3423 copy_extent_buffer(right, left, btrfs_leaf_data(right) +
3424 BTRFS_LEAF_DATA_SIZE(root) - push_space,
3425 btrfs_leaf_data(left) + leaf_data_end(root, left),
3426 push_space);
3427
3428 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
3429 btrfs_item_nr_offset(0),
3430 right_nritems * sizeof(struct btrfs_item));
3431
3432
3433 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
3434 btrfs_item_nr_offset(left_nritems - push_items),
3435 push_items * sizeof(struct btrfs_item));
3436
3437
3438 right_nritems += push_items;
3439 btrfs_set_header_nritems(right, right_nritems);
3440 push_space = BTRFS_LEAF_DATA_SIZE(root);
3441 for (i = 0; i < right_nritems; i++) {
3442 item = btrfs_item_nr(right, i);
3443 push_space -= btrfs_token_item_size(right, item, &token);
3444 btrfs_set_token_item_offset(right, item, push_space, &token);
3445 }
3446
3447 left_nritems -= push_items;
3448 btrfs_set_header_nritems(left, left_nritems);
3449
3450 if (left_nritems)
3451 btrfs_mark_buffer_dirty(left);
3452 else
3453 clean_tree_block(trans, root, left);
3454
3455 btrfs_mark_buffer_dirty(right);
3456
3457 btrfs_item_key(right, &disk_key, 0);
3458 btrfs_set_node_key(upper, &disk_key, slot + 1);
3459 btrfs_mark_buffer_dirty(upper);
3460
3461
3462 if (path->slots[0] >= left_nritems) {
3463 path->slots[0] -= left_nritems;
3464 if (btrfs_header_nritems(path->nodes[0]) == 0)
3465 clean_tree_block(trans, root, path->nodes[0]);
3466 btrfs_tree_unlock(path->nodes[0]);
3467 free_extent_buffer(path->nodes[0]);
3468 path->nodes[0] = right;
3469 path->slots[1] += 1;
3470 } else {
3471 btrfs_tree_unlock(right);
3472 free_extent_buffer(right);
3473 }
3474 return 0;
3475
3476out_unlock:
3477 btrfs_tree_unlock(right);
3478 free_extent_buffer(right);
3479 return 1;
3480}
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
3493 *root, struct btrfs_path *path,
3494 int min_data_size, int data_size,
3495 int empty, u32 min_slot)
3496{
3497 struct extent_buffer *left = path->nodes[0];
3498 struct extent_buffer *right;
3499 struct extent_buffer *upper;
3500 int slot;
3501 int free_space;
3502 u32 left_nritems;
3503 int ret;
3504
3505 if (!path->nodes[1])
3506 return 1;
3507
3508 slot = path->slots[1];
3509 upper = path->nodes[1];
3510 if (slot >= btrfs_header_nritems(upper) - 1)
3511 return 1;
3512
3513 btrfs_assert_tree_locked(path->nodes[1]);
3514
3515 right = read_node_slot(root, upper, slot + 1);
3516 if (right == NULL)
3517 return 1;
3518
3519 btrfs_tree_lock(right);
3520 btrfs_set_lock_blocking(right);
3521
3522 free_space = btrfs_leaf_free_space(root, right);
3523 if (free_space < data_size)
3524 goto out_unlock;
3525
3526
3527 ret = btrfs_cow_block(trans, root, right, upper,
3528 slot + 1, &right);
3529 if (ret)
3530 goto out_unlock;
3531
3532 free_space = btrfs_leaf_free_space(root, right);
3533 if (free_space < data_size)
3534 goto out_unlock;
3535
3536 left_nritems = btrfs_header_nritems(left);
3537 if (left_nritems == 0)
3538 goto out_unlock;
3539
3540 return __push_leaf_right(trans, root, path, min_data_size, empty,
3541 right, free_space, left_nritems, min_slot);
3542out_unlock:
3543 btrfs_tree_unlock(right);
3544 free_extent_buffer(right);
3545 return 1;
3546}
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
3557 struct btrfs_root *root,
3558 struct btrfs_path *path, int data_size,
3559 int empty, struct extent_buffer *left,
3560 int free_space, u32 right_nritems,
3561 u32 max_slot)
3562{
3563 struct btrfs_disk_key disk_key;
3564 struct extent_buffer *right = path->nodes[0];
3565 int i;
3566 int push_space = 0;
3567 int push_items = 0;
3568 struct btrfs_item *item;
3569 u32 old_left_nritems;
3570 u32 nr;
3571 int ret = 0;
3572 u32 this_item_size;
3573 u32 old_left_item_size;
3574 struct btrfs_map_token token;
3575
3576 btrfs_init_map_token(&token);
3577
3578 if (empty)
3579 nr = min(right_nritems, max_slot);
3580 else
3581 nr = min(right_nritems - 1, max_slot);
3582
3583 for (i = 0; i < nr; i++) {
3584 item = btrfs_item_nr(right, i);
3585
3586 if (!empty && push_items > 0) {
3587 if (path->slots[0] < i)
3588 break;
3589 if (path->slots[0] == i) {
3590 int space = btrfs_leaf_free_space(root, right);
3591 if (space + push_space * 2 > free_space)
3592 break;
3593 }
3594 }
3595
3596 if (path->slots[0] == i)
3597 push_space += data_size;
3598
3599 this_item_size = btrfs_item_size(right, item);
3600 if (this_item_size + sizeof(*item) + push_space > free_space)
3601 break;
3602
3603 push_items++;
3604 push_space += this_item_size + sizeof(*item);
3605 }
3606
3607 if (push_items == 0) {
3608 ret = 1;
3609 goto out;
3610 }
3611 if (!empty && push_items == btrfs_header_nritems(right))
3612 WARN_ON(1);
3613
3614
3615 copy_extent_buffer(left, right,
3616 btrfs_item_nr_offset(btrfs_header_nritems(left)),
3617 btrfs_item_nr_offset(0),
3618 push_items * sizeof(struct btrfs_item));
3619
3620 push_space = BTRFS_LEAF_DATA_SIZE(root) -
3621 btrfs_item_offset_nr(right, push_items - 1);
3622
3623 copy_extent_buffer(left, right, btrfs_leaf_data(left) +
3624 leaf_data_end(root, left) - push_space,
3625 btrfs_leaf_data(right) +
3626 btrfs_item_offset_nr(right, push_items - 1),
3627 push_space);
3628 old_left_nritems = btrfs_header_nritems(left);
3629 BUG_ON(old_left_nritems <= 0);
3630
3631 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
3632 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
3633 u32 ioff;
3634
3635 item = btrfs_item_nr(left, i);
3636
3637 ioff = btrfs_token_item_offset(left, item, &token);
3638 btrfs_set_token_item_offset(left, item,
3639 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size),
3640 &token);
3641 }
3642 btrfs_set_header_nritems(left, old_left_nritems + push_items);
3643
3644
3645 if (push_items > right_nritems) {
3646 printk(KERN_CRIT "push items %d nr %u\n", push_items,
3647 right_nritems);
3648 WARN_ON(1);
3649 }
3650
3651 if (push_items < right_nritems) {
3652 push_space = btrfs_item_offset_nr(right, push_items - 1) -
3653 leaf_data_end(root, right);
3654 memmove_extent_buffer(right, btrfs_leaf_data(right) +
3655 BTRFS_LEAF_DATA_SIZE(root) - push_space,
3656 btrfs_leaf_data(right) +
3657 leaf_data_end(root, right), push_space);
3658
3659 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
3660 btrfs_item_nr_offset(push_items),
3661 (btrfs_header_nritems(right) - push_items) *
3662 sizeof(struct btrfs_item));
3663 }
3664 right_nritems -= push_items;
3665 btrfs_set_header_nritems(right, right_nritems);
3666 push_space = BTRFS_LEAF_DATA_SIZE(root);
3667 for (i = 0; i < right_nritems; i++) {
3668 item = btrfs_item_nr(right, i);
3669
3670 push_space = push_space - btrfs_token_item_size(right,
3671 item, &token);
3672 btrfs_set_token_item_offset(right, item, push_space, &token);
3673 }
3674
3675 btrfs_mark_buffer_dirty(left);
3676 if (right_nritems)
3677 btrfs_mark_buffer_dirty(right);
3678 else
3679 clean_tree_block(trans, root, right);
3680
3681 btrfs_item_key(right, &disk_key, 0);
3682 fixup_low_keys(trans, root, path, &disk_key, 1);
3683
3684
3685 if (path->slots[0] < push_items) {
3686 path->slots[0] += old_left_nritems;
3687 btrfs_tree_unlock(path->nodes[0]);
3688 free_extent_buffer(path->nodes[0]);
3689 path->nodes[0] = left;
3690 path->slots[1] -= 1;
3691 } else {
3692 btrfs_tree_unlock(left);
3693 free_extent_buffer(left);
3694 path->slots[0] -= push_items;
3695 }
3696 BUG_ON(path->slots[0] < 0);
3697 return ret;
3698out:
3699 btrfs_tree_unlock(left);
3700 free_extent_buffer(left);
3701 return ret;
3702}
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
3713 *root, struct btrfs_path *path, int min_data_size,
3714 int data_size, int empty, u32 max_slot)
3715{
3716 struct extent_buffer *right = path->nodes[0];
3717 struct extent_buffer *left;
3718 int slot;
3719 int free_space;
3720 u32 right_nritems;
3721 int ret = 0;
3722
3723 slot = path->slots[1];
3724 if (slot == 0)
3725 return 1;
3726 if (!path->nodes[1])
3727 return 1;
3728
3729 right_nritems = btrfs_header_nritems(right);
3730 if (right_nritems == 0)
3731 return 1;
3732
3733 btrfs_assert_tree_locked(path->nodes[1]);
3734
3735 left = read_node_slot(root, path->nodes[1], slot - 1);
3736 if (left == NULL)
3737 return 1;
3738
3739 btrfs_tree_lock(left);
3740 btrfs_set_lock_blocking(left);
3741
3742 free_space = btrfs_leaf_free_space(root, left);
3743 if (free_space < data_size) {
3744 ret = 1;
3745 goto out;
3746 }
3747
3748
3749 ret = btrfs_cow_block(trans, root, left,
3750 path->nodes[1], slot - 1, &left);
3751 if (ret) {
3752
3753 if (ret == -ENOSPC)
3754 ret = 1;
3755 goto out;
3756 }
3757
3758 free_space = btrfs_leaf_free_space(root, left);
3759 if (free_space < data_size) {
3760 ret = 1;
3761 goto out;
3762 }
3763
3764 return __push_leaf_left(trans, root, path, min_data_size,
3765 empty, left, free_space, right_nritems,
3766 max_slot);
3767out:
3768 btrfs_tree_unlock(left);
3769 free_extent_buffer(left);
3770 return ret;
3771}
3772
3773
3774
3775
3776
3777static noinline void copy_for_split(struct btrfs_trans_handle *trans,
3778 struct btrfs_root *root,
3779 struct btrfs_path *path,
3780 struct extent_buffer *l,
3781 struct extent_buffer *right,
3782 int slot, int mid, int nritems)
3783{
3784 int data_copy_size;
3785 int rt_data_off;
3786 int i;
3787 struct btrfs_disk_key disk_key;
3788 struct btrfs_map_token token;
3789
3790 btrfs_init_map_token(&token);
3791
3792 nritems = nritems - mid;
3793 btrfs_set_header_nritems(right, nritems);
3794 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
3795
3796 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
3797 btrfs_item_nr_offset(mid),
3798 nritems * sizeof(struct btrfs_item));
3799
3800 copy_extent_buffer(right, l,
3801 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
3802 data_copy_size, btrfs_leaf_data(l) +
3803 leaf_data_end(root, l), data_copy_size);
3804
3805 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
3806 btrfs_item_end_nr(l, mid);
3807
3808 for (i = 0; i < nritems; i++) {
3809 struct btrfs_item *item = btrfs_item_nr(right, i);
3810 u32 ioff;
3811
3812 ioff = btrfs_token_item_offset(right, item, &token);
3813 btrfs_set_token_item_offset(right, item,
3814 ioff + rt_data_off, &token);
3815 }
3816
3817 btrfs_set_header_nritems(l, mid);
3818 btrfs_item_key(right, &disk_key, 0);
3819 insert_ptr(trans, root, path, &disk_key, right->start,
3820 path->slots[1] + 1, 1);
3821
3822 btrfs_mark_buffer_dirty(right);
3823 btrfs_mark_buffer_dirty(l);
3824 BUG_ON(path->slots[0] != slot);
3825
3826 if (mid <= slot) {
3827 btrfs_tree_unlock(path->nodes[0]);
3828 free_extent_buffer(path->nodes[0]);
3829 path->nodes[0] = right;
3830 path->slots[0] -= mid;
3831 path->slots[1] += 1;
3832 } else {
3833 btrfs_tree_unlock(right);
3834 free_extent_buffer(right);
3835 }
3836
3837 BUG_ON(path->slots[0] < 0);
3838}
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
3851 struct btrfs_root *root,
3852 struct btrfs_path *path,
3853 int data_size)
3854{
3855 int ret;
3856 int progress = 0;
3857 int slot;
3858 u32 nritems;
3859
3860 slot = path->slots[0];
3861
3862
3863
3864
3865
3866 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
3867 if (ret < 0)
3868 return ret;
3869
3870 if (ret == 0)
3871 progress++;
3872
3873 nritems = btrfs_header_nritems(path->nodes[0]);
3874
3875
3876
3877
3878 if (path->slots[0] == 0 || path->slots[0] == nritems)
3879 return 0;
3880
3881 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
3882 return 0;
3883
3884
3885 slot = path->slots[0];
3886 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
3887 if (ret < 0)
3888 return ret;
3889
3890 if (ret == 0)
3891 progress++;
3892
3893 if (progress)
3894 return 0;
3895 return 1;
3896}
3897
3898
3899
3900
3901
3902
3903
3904static noinline int split_leaf(struct btrfs_trans_handle *trans,
3905 struct btrfs_root *root,
3906 struct btrfs_key *ins_key,
3907 struct btrfs_path *path, int data_size,
3908 int extend)
3909{
3910 struct btrfs_disk_key disk_key;
3911 struct extent_buffer *l;
3912 u32 nritems;
3913 int mid;
3914 int slot;
3915 struct extent_buffer *right;
3916 int ret = 0;
3917 int wret;
3918 int split;
3919 int num_doubles = 0;
3920 int tried_avoid_double = 0;
3921
3922 l = path->nodes[0];
3923 slot = path->slots[0];
3924 if (extend && data_size + btrfs_item_size_nr(l, slot) +
3925 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
3926 return -EOVERFLOW;
3927
3928
3929 if (data_size) {
3930 wret = push_leaf_right(trans, root, path, data_size,
3931 data_size, 0, 0);
3932 if (wret < 0)
3933 return wret;
3934 if (wret) {
3935 wret = push_leaf_left(trans, root, path, data_size,
3936 data_size, 0, (u32)-1);
3937 if (wret < 0)
3938 return wret;
3939 }
3940 l = path->nodes[0];
3941
3942
3943 if (btrfs_leaf_free_space(root, l) >= data_size)
3944 return 0;
3945 }
3946
3947 if (!path->nodes[1]) {
3948 ret = insert_new_root(trans, root, path, 1);
3949 if (ret)
3950 return ret;
3951 }
3952again:
3953 split = 1;
3954 l = path->nodes[0];
3955 slot = path->slots[0];
3956 nritems = btrfs_header_nritems(l);
3957 mid = (nritems + 1) / 2;
3958
3959 if (mid <= slot) {
3960 if (nritems == 1 ||
3961 leaf_space_used(l, mid, nritems - mid) + data_size >
3962 BTRFS_LEAF_DATA_SIZE(root)) {
3963 if (slot >= nritems) {
3964 split = 0;
3965 } else {
3966 mid = slot;
3967 if (mid != nritems &&
3968 leaf_space_used(l, mid, nritems - mid) +
3969 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3970 if (data_size && !tried_avoid_double)
3971 goto push_for_double;
3972 split = 2;
3973 }
3974 }
3975 }
3976 } else {
3977 if (leaf_space_used(l, 0, mid) + data_size >
3978 BTRFS_LEAF_DATA_SIZE(root)) {
3979 if (!extend && data_size && slot == 0) {
3980 split = 0;
3981 } else if ((extend || !data_size) && slot == 0) {
3982 mid = 1;
3983 } else {
3984 mid = slot;
3985 if (mid != nritems &&
3986 leaf_space_used(l, mid, nritems - mid) +
3987 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3988 if (data_size && !tried_avoid_double)
3989 goto push_for_double;
3990 split = 2 ;
3991 }
3992 }
3993 }
3994 }
3995
3996 if (split == 0)
3997 btrfs_cpu_key_to_disk(&disk_key, ins_key);
3998 else
3999 btrfs_item_key(l, &disk_key, mid);
4000
4001 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
4002 root->root_key.objectid,
4003 &disk_key, 0, l->start, 0);
4004 if (IS_ERR(right))
4005 return PTR_ERR(right);
4006
4007 root_add_used(root, root->leafsize);
4008
4009 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
4010 btrfs_set_header_bytenr(right, right->start);
4011 btrfs_set_header_generation(right, trans->transid);
4012 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
4013 btrfs_set_header_owner(right, root->root_key.objectid);
4014 btrfs_set_header_level(right, 0);
4015 write_extent_buffer(right, root->fs_info->fsid,
4016 (unsigned long)btrfs_header_fsid(right),
4017 BTRFS_FSID_SIZE);
4018
4019 write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
4020 (unsigned long)btrfs_header_chunk_tree_uuid(right),
4021 BTRFS_UUID_SIZE);
4022
4023 if (split == 0) {
4024 if (mid <= slot) {
4025 btrfs_set_header_nritems(right, 0);
4026 insert_ptr(trans, root, path, &disk_key, right->start,
4027 path->slots[1] + 1, 1);
4028 btrfs_tree_unlock(path->nodes[0]);
4029 free_extent_buffer(path->nodes[0]);
4030 path->nodes[0] = right;
4031 path->slots[0] = 0;
4032 path->slots[1] += 1;
4033 } else {
4034 btrfs_set_header_nritems(right, 0);
4035 insert_ptr(trans, root, path, &disk_key, right->start,
4036 path->slots[1], 1);
4037 btrfs_tree_unlock(path->nodes[0]);
4038 free_extent_buffer(path->nodes[0]);
4039 path->nodes[0] = right;
4040 path->slots[0] = 0;
4041 if (path->slots[1] == 0)
4042 fixup_low_keys(trans, root, path,
4043 &disk_key, 1);
4044 }
4045 btrfs_mark_buffer_dirty(right);
4046 return ret;
4047 }
4048
4049 copy_for_split(trans, root, path, l, right, slot, mid, nritems);
4050
4051 if (split == 2) {
4052 BUG_ON(num_doubles != 0);
4053 num_doubles++;
4054 goto again;
4055 }
4056
4057 return 0;
4058
4059push_for_double:
4060 push_for_double_split(trans, root, path, data_size);
4061 tried_avoid_double = 1;
4062 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
4063 return 0;
4064 goto again;
4065}
4066
4067static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
4068 struct btrfs_root *root,
4069 struct btrfs_path *path, int ins_len)
4070{
4071 struct btrfs_key key;
4072 struct extent_buffer *leaf;
4073 struct btrfs_file_extent_item *fi;
4074 u64 extent_len = 0;
4075 u32 item_size;
4076 int ret;
4077
4078 leaf = path->nodes[0];
4079 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4080
4081 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
4082 key.type != BTRFS_EXTENT_CSUM_KEY);
4083
4084 if (btrfs_leaf_free_space(root, leaf) >= ins_len)
4085 return 0;
4086
4087 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
4088 if (key.type == BTRFS_EXTENT_DATA_KEY) {
4089 fi = btrfs_item_ptr(leaf, path->slots[0],
4090 struct btrfs_file_extent_item);
4091 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
4092 }
4093 btrfs_release_path(path);
4094
4095 path->keep_locks = 1;
4096 path->search_for_split = 1;
4097 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4098 path->search_for_split = 0;
4099 if (ret < 0)
4100 goto err;
4101
4102 ret = -EAGAIN;
4103 leaf = path->nodes[0];
4104
4105 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
4106 goto err;
4107
4108
4109 if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
4110 goto err;
4111
4112 if (key.type == BTRFS_EXTENT_DATA_KEY) {
4113 fi = btrfs_item_ptr(leaf, path->slots[0],
4114 struct btrfs_file_extent_item);
4115 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
4116 goto err;
4117 }
4118
4119 btrfs_set_path_blocking(path);
4120 ret = split_leaf(trans, root, &key, path, ins_len, 1);
4121 if (ret)
4122 goto err;
4123
4124 path->keep_locks = 0;
4125 btrfs_unlock_up_safe(path, 1);
4126 return 0;
4127err:
4128 path->keep_locks = 0;
4129 return ret;
4130}
4131
4132static noinline int split_item(struct btrfs_trans_handle *trans,
4133 struct btrfs_root *root,
4134 struct btrfs_path *path,
4135 struct btrfs_key *new_key,
4136 unsigned long split_offset)
4137{
4138 struct extent_buffer *leaf;
4139 struct btrfs_item *item;
4140 struct btrfs_item *new_item;
4141 int slot;
4142 char *buf;
4143 u32 nritems;
4144 u32 item_size;
4145 u32 orig_offset;
4146 struct btrfs_disk_key disk_key;
4147
4148 leaf = path->nodes[0];
4149 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
4150
4151 btrfs_set_path_blocking(path);
4152
4153 item = btrfs_item_nr(leaf, path->slots[0]);
4154 orig_offset = btrfs_item_offset(leaf, item);
4155 item_size = btrfs_item_size(leaf, item);
4156
4157 buf = kmalloc(item_size, GFP_NOFS);
4158 if (!buf)
4159 return -ENOMEM;
4160
4161 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
4162 path->slots[0]), item_size);
4163
4164 slot = path->slots[0] + 1;
4165 nritems = btrfs_header_nritems(leaf);
4166 if (slot != nritems) {
4167
4168 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
4169 btrfs_item_nr_offset(slot),
4170 (nritems - slot) * sizeof(struct btrfs_item));
4171 }
4172
4173 btrfs_cpu_key_to_disk(&disk_key, new_key);
4174 btrfs_set_item_key(leaf, &disk_key, slot);
4175
4176 new_item = btrfs_item_nr(leaf, slot);
4177
4178 btrfs_set_item_offset(leaf, new_item, orig_offset);
4179 btrfs_set_item_size(leaf, new_item, item_size - split_offset);
4180
4181 btrfs_set_item_offset(leaf, item,
4182 orig_offset + item_size - split_offset);
4183 btrfs_set_item_size(leaf, item, split_offset);
4184
4185 btrfs_set_header_nritems(leaf, nritems + 1);
4186
4187
4188 write_extent_buffer(leaf, buf,
4189 btrfs_item_ptr_offset(leaf, path->slots[0]),
4190 split_offset);
4191
4192
4193 write_extent_buffer(leaf, buf + split_offset,
4194 btrfs_item_ptr_offset(leaf, slot),
4195 item_size - split_offset);
4196 btrfs_mark_buffer_dirty(leaf);
4197
4198 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
4199 kfree(buf);
4200 return 0;
4201}
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218int btrfs_split_item(struct btrfs_trans_handle *trans,
4219 struct btrfs_root *root,
4220 struct btrfs_path *path,
4221 struct btrfs_key *new_key,
4222 unsigned long split_offset)
4223{
4224 int ret;
4225 ret = setup_leaf_for_split(trans, root, path,
4226 sizeof(struct btrfs_item));
4227 if (ret)
4228 return ret;
4229
4230 ret = split_item(trans, root, path, new_key, split_offset);
4231 return ret;
4232}
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4243 struct btrfs_root *root,
4244 struct btrfs_path *path,
4245 struct btrfs_key *new_key)
4246{
4247 struct extent_buffer *leaf;
4248 int ret;
4249 u32 item_size;
4250
4251 leaf = path->nodes[0];
4252 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
4253 ret = setup_leaf_for_split(trans, root, path,
4254 item_size + sizeof(struct btrfs_item));
4255 if (ret)
4256 return ret;
4257
4258 path->slots[0]++;
4259 setup_items_for_insert(trans, root, path, new_key, &item_size,
4260 item_size, item_size +
4261 sizeof(struct btrfs_item), 1);
4262 leaf = path->nodes[0];
4263 memcpy_extent_buffer(leaf,
4264 btrfs_item_ptr_offset(leaf, path->slots[0]),
4265 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
4266 item_size);
4267 return 0;
4268}
4269
4270
4271
4272
4273
4274
4275
4276void btrfs_truncate_item(struct btrfs_trans_handle *trans,
4277 struct btrfs_root *root,
4278 struct btrfs_path *path,
4279 u32 new_size, int from_end)
4280{
4281 int slot;
4282 struct extent_buffer *leaf;
4283 struct btrfs_item *item;
4284 u32 nritems;
4285 unsigned int data_end;
4286 unsigned int old_data_start;
4287 unsigned int old_size;
4288 unsigned int size_diff;
4289 int i;
4290 struct btrfs_map_token token;
4291
4292 btrfs_init_map_token(&token);
4293
4294 leaf = path->nodes[0];
4295 slot = path->slots[0];
4296
4297 old_size = btrfs_item_size_nr(leaf, slot);
4298 if (old_size == new_size)
4299 return;
4300
4301 nritems = btrfs_header_nritems(leaf);
4302 data_end = leaf_data_end(root, leaf);
4303
4304 old_data_start = btrfs_item_offset_nr(leaf, slot);
4305
4306 size_diff = old_size - new_size;
4307
4308 BUG_ON(slot < 0);
4309 BUG_ON(slot >= nritems);
4310
4311
4312
4313
4314
4315 for (i = slot; i < nritems; i++) {
4316 u32 ioff;
4317 item = btrfs_item_nr(leaf, i);
4318
4319 ioff = btrfs_token_item_offset(leaf, item, &token);
4320 btrfs_set_token_item_offset(leaf, item,
4321 ioff + size_diff, &token);
4322 }
4323
4324
4325 if (from_end) {
4326 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
4327 data_end + size_diff, btrfs_leaf_data(leaf) +
4328 data_end, old_data_start + new_size - data_end);
4329 } else {
4330 struct btrfs_disk_key disk_key;
4331 u64 offset;
4332
4333 btrfs_item_key(leaf, &disk_key, slot);
4334
4335 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
4336 unsigned long ptr;
4337 struct btrfs_file_extent_item *fi;
4338
4339 fi = btrfs_item_ptr(leaf, slot,
4340 struct btrfs_file_extent_item);
4341 fi = (struct btrfs_file_extent_item *)(
4342 (unsigned long)fi - size_diff);
4343
4344 if (btrfs_file_extent_type(leaf, fi) ==
4345 BTRFS_FILE_EXTENT_INLINE) {
4346 ptr = btrfs_item_ptr_offset(leaf, slot);
4347 memmove_extent_buffer(leaf, ptr,
4348 (unsigned long)fi,
4349 offsetof(struct btrfs_file_extent_item,
4350 disk_bytenr));
4351 }
4352 }
4353
4354 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
4355 data_end + size_diff, btrfs_leaf_data(leaf) +
4356 data_end, old_data_start - data_end);
4357
4358 offset = btrfs_disk_key_offset(&disk_key);
4359 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
4360 btrfs_set_item_key(leaf, &disk_key, slot);
4361 if (slot == 0)
4362 fixup_low_keys(trans, root, path, &disk_key, 1);
4363 }
4364
4365 item = btrfs_item_nr(leaf, slot);
4366 btrfs_set_item_size(leaf, item, new_size);
4367 btrfs_mark_buffer_dirty(leaf);
4368
4369 if (btrfs_leaf_free_space(root, leaf) < 0) {
4370 btrfs_print_leaf(root, leaf);
4371 BUG();
4372 }
4373}
4374
4375
4376
4377
4378void btrfs_extend_item(struct btrfs_trans_handle *trans,
4379 struct btrfs_root *root, struct btrfs_path *path,
4380 u32 data_size)
4381{
4382 int slot;
4383 struct extent_buffer *leaf;
4384 struct btrfs_item *item;
4385 u32 nritems;
4386 unsigned int data_end;
4387 unsigned int old_data;
4388 unsigned int old_size;
4389 int i;
4390 struct btrfs_map_token token;
4391
4392 btrfs_init_map_token(&token);
4393
4394 leaf = path->nodes[0];
4395
4396 nritems = btrfs_header_nritems(leaf);
4397 data_end = leaf_data_end(root, leaf);
4398
4399 if (btrfs_leaf_free_space(root, leaf) < data_size) {
4400 btrfs_print_leaf(root, leaf);
4401 BUG();
4402 }
4403 slot = path->slots[0];
4404 old_data = btrfs_item_end_nr(leaf, slot);
4405
4406 BUG_ON(slot < 0);
4407 if (slot >= nritems) {
4408 btrfs_print_leaf(root, leaf);
4409 printk(KERN_CRIT "slot %d too large, nritems %d\n",
4410 slot, nritems);
4411 BUG_ON(1);
4412 }
4413
4414
4415
4416
4417
4418 for (i = slot; i < nritems; i++) {
4419 u32 ioff;
4420 item = btrfs_item_nr(leaf, i);
4421
4422 ioff = btrfs_token_item_offset(leaf, item, &token);
4423 btrfs_set_token_item_offset(leaf, item,
4424 ioff - data_size, &token);
4425 }
4426
4427
4428 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
4429 data_end - data_size, btrfs_leaf_data(leaf) +
4430 data_end, old_data - data_end);
4431
4432 data_end = old_data;
4433 old_size = btrfs_item_size_nr(leaf, slot);
4434 item = btrfs_item_nr(leaf, slot);
4435 btrfs_set_item_size(leaf, item, old_size + data_size);
4436 btrfs_mark_buffer_dirty(leaf);
4437
4438 if (btrfs_leaf_free_space(root, leaf) < 0) {
4439 btrfs_print_leaf(root, leaf);
4440 BUG();
4441 }
4442}
4443
4444
4445
4446
4447
4448
4449void setup_items_for_insert(struct btrfs_trans_handle *trans,
4450 struct btrfs_root *root, struct btrfs_path *path,
4451 struct btrfs_key *cpu_key, u32 *data_size,
4452 u32 total_data, u32 total_size, int nr)
4453{
4454 struct btrfs_item *item;
4455 int i;
4456 u32 nritems;
4457 unsigned int data_end;
4458 struct btrfs_disk_key disk_key;
4459 struct extent_buffer *leaf;
4460 int slot;
4461 struct btrfs_map_token token;
4462
4463 btrfs_init_map_token(&token);
4464
4465 leaf = path->nodes[0];
4466 slot = path->slots[0];
4467
4468 nritems = btrfs_header_nritems(leaf);
4469 data_end = leaf_data_end(root, leaf);
4470
4471 if (btrfs_leaf_free_space(root, leaf) < total_size) {
4472 btrfs_print_leaf(root, leaf);
4473 printk(KERN_CRIT "not enough freespace need %u have %d\n",
4474 total_size, btrfs_leaf_free_space(root, leaf));
4475 BUG();
4476 }
4477
4478 if (slot != nritems) {
4479 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
4480
4481 if (old_data < data_end) {
4482 btrfs_print_leaf(root, leaf);
4483 printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
4484 slot, old_data, data_end);
4485 BUG_ON(1);
4486 }
4487
4488
4489
4490
4491 for (i = slot; i < nritems; i++) {
4492 u32 ioff;
4493
4494 item = btrfs_item_nr(leaf, i);
4495 ioff = btrfs_token_item_offset(leaf, item, &token);
4496 btrfs_set_token_item_offset(leaf, item,
4497 ioff - total_data, &token);
4498 }
4499
4500 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
4501 btrfs_item_nr_offset(slot),
4502 (nritems - slot) * sizeof(struct btrfs_item));
4503
4504
4505 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
4506 data_end - total_data, btrfs_leaf_data(leaf) +
4507 data_end, old_data - data_end);
4508 data_end = old_data;
4509 }
4510
4511
4512 for (i = 0; i < nr; i++) {
4513 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
4514 btrfs_set_item_key(leaf, &disk_key, slot + i);
4515 item = btrfs_item_nr(leaf, slot + i);
4516 btrfs_set_token_item_offset(leaf, item,
4517 data_end - data_size[i], &token);
4518 data_end -= data_size[i];
4519 btrfs_set_token_item_size(leaf, item, data_size[i], &token);
4520 }
4521
4522 btrfs_set_header_nritems(leaf, nritems + nr);
4523
4524 if (slot == 0) {
4525 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
4526 fixup_low_keys(trans, root, path, &disk_key, 1);
4527 }
4528 btrfs_unlock_up_safe(path, 1);
4529 btrfs_mark_buffer_dirty(leaf);
4530
4531 if (btrfs_leaf_free_space(root, leaf) < 0) {
4532 btrfs_print_leaf(root, leaf);
4533 BUG();
4534 }
4535}
4536
4537
4538
4539
4540
4541int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
4542 struct btrfs_root *root,
4543 struct btrfs_path *path,
4544 struct btrfs_key *cpu_key, u32 *data_size,
4545 int nr)
4546{
4547 int ret = 0;
4548 int slot;
4549 int i;
4550 u32 total_size = 0;
4551 u32 total_data = 0;
4552
4553 for (i = 0; i < nr; i++)
4554 total_data += data_size[i];
4555
4556 total_size = total_data + (nr * sizeof(struct btrfs_item));
4557 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
4558 if (ret == 0)
4559 return -EEXIST;
4560 if (ret < 0)
4561 return ret;
4562
4563 slot = path->slots[0];
4564 BUG_ON(slot < 0);
4565
4566 setup_items_for_insert(trans, root, path, cpu_key, data_size,
4567 total_data, total_size, nr);
4568 return 0;
4569}
4570
4571
4572
4573
4574
4575int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
4576 *root, struct btrfs_key *cpu_key, void *data, u32
4577 data_size)
4578{
4579 int ret = 0;
4580 struct btrfs_path *path;
4581 struct extent_buffer *leaf;
4582 unsigned long ptr;
4583
4584 path = btrfs_alloc_path();
4585 if (!path)
4586 return -ENOMEM;
4587 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
4588 if (!ret) {
4589 leaf = path->nodes[0];
4590 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4591 write_extent_buffer(leaf, data, ptr, data_size);
4592 btrfs_mark_buffer_dirty(leaf);
4593 }
4594 btrfs_free_path(path);
4595 return ret;
4596}
4597
4598
4599
4600
4601
4602
4603
4604static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4605 struct btrfs_path *path, int level, int slot,
4606 int tree_mod_log)
4607{
4608 struct extent_buffer *parent = path->nodes[level];
4609 u32 nritems;
4610 int ret;
4611
4612 nritems = btrfs_header_nritems(parent);
4613 if (slot != nritems - 1) {
4614 if (tree_mod_log && level)
4615 tree_mod_log_eb_move(root->fs_info, parent, slot,
4616 slot + 1, nritems - slot - 1);
4617 memmove_extent_buffer(parent,
4618 btrfs_node_key_ptr_offset(slot),
4619 btrfs_node_key_ptr_offset(slot + 1),
4620 sizeof(struct btrfs_key_ptr) *
4621 (nritems - slot - 1));
4622 } else if (tree_mod_log && level) {
4623 ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
4624 MOD_LOG_KEY_REMOVE);
4625 BUG_ON(ret < 0);
4626 }
4627
4628 nritems--;
4629 btrfs_set_header_nritems(parent, nritems);
4630 if (nritems == 0 && parent == root->node) {
4631 BUG_ON(btrfs_header_level(root->node) != 1);
4632
4633 btrfs_set_header_level(root->node, 0);
4634 } else if (slot == 0) {
4635 struct btrfs_disk_key disk_key;
4636
4637 btrfs_node_key(parent, &disk_key, 0);
4638 fixup_low_keys(trans, root, path, &disk_key, level + 1);
4639 }
4640 btrfs_mark_buffer_dirty(parent);
4641}
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4654 struct btrfs_root *root,
4655 struct btrfs_path *path,
4656 struct extent_buffer *leaf)
4657{
4658 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
4659 del_ptr(trans, root, path, 1, path->slots[1], 1);
4660
4661
4662
4663
4664
4665 btrfs_unlock_up_safe(path, 0);
4666
4667 root_sub_used(root, leaf->len);
4668
4669 extent_buffer_get(leaf);
4670 btrfs_free_tree_block(trans, root, leaf, 0, 1);
4671 free_extent_buffer_stale(leaf);
4672}
4673
4674
4675
4676
4677int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4678 struct btrfs_path *path, int slot, int nr)
4679{
4680 struct extent_buffer *leaf;
4681 struct btrfs_item *item;
4682 int last_off;
4683 int dsize = 0;
4684 int ret = 0;
4685 int wret;
4686 int i;
4687 u32 nritems;
4688 struct btrfs_map_token token;
4689
4690 btrfs_init_map_token(&token);
4691
4692 leaf = path->nodes[0];
4693 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
4694
4695 for (i = 0; i < nr; i++)
4696 dsize += btrfs_item_size_nr(leaf, slot + i);
4697
4698 nritems = btrfs_header_nritems(leaf);
4699
4700 if (slot + nr != nritems) {
4701 int data_end = leaf_data_end(root, leaf);
4702
4703 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
4704 data_end + dsize,
4705 btrfs_leaf_data(leaf) + data_end,
4706 last_off - data_end);
4707
4708 for (i = slot + nr; i < nritems; i++) {
4709 u32 ioff;
4710
4711 item = btrfs_item_nr(leaf, i);
4712 ioff = btrfs_token_item_offset(leaf, item, &token);
4713 btrfs_set_token_item_offset(leaf, item,
4714 ioff + dsize, &token);
4715 }
4716
4717 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
4718 btrfs_item_nr_offset(slot + nr),
4719 sizeof(struct btrfs_item) *
4720 (nritems - slot - nr));
4721 }
4722 btrfs_set_header_nritems(leaf, nritems - nr);
4723 nritems -= nr;
4724
4725
4726 if (nritems == 0) {
4727 if (leaf == root->node) {
4728 btrfs_set_header_level(leaf, 0);
4729 } else {
4730 btrfs_set_path_blocking(path);
4731 clean_tree_block(trans, root, leaf);
4732 btrfs_del_leaf(trans, root, path, leaf);
4733 }
4734 } else {
4735 int used = leaf_space_used(leaf, 0, nritems);
4736 if (slot == 0) {
4737 struct btrfs_disk_key disk_key;
4738
4739 btrfs_item_key(leaf, &disk_key, 0);
4740 fixup_low_keys(trans, root, path, &disk_key, 1);
4741 }
4742
4743
4744 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
4745
4746
4747
4748
4749 slot = path->slots[1];
4750 extent_buffer_get(leaf);
4751
4752 btrfs_set_path_blocking(path);
4753 wret = push_leaf_left(trans, root, path, 1, 1,
4754 1, (u32)-1);
4755 if (wret < 0 && wret != -ENOSPC)
4756 ret = wret;
4757
4758 if (path->nodes[0] == leaf &&
4759 btrfs_header_nritems(leaf)) {
4760 wret = push_leaf_right(trans, root, path, 1,
4761 1, 1, 0);
4762 if (wret < 0 && wret != -ENOSPC)
4763 ret = wret;
4764 }
4765
4766 if (btrfs_header_nritems(leaf) == 0) {
4767 path->slots[1] = slot;
4768 btrfs_del_leaf(trans, root, path, leaf);
4769 free_extent_buffer(leaf);
4770 ret = 0;
4771 } else {
4772
4773
4774
4775
4776
4777 if (path->nodes[0] == leaf)
4778 btrfs_mark_buffer_dirty(leaf);
4779 free_extent_buffer(leaf);
4780 }
4781 } else {
4782 btrfs_mark_buffer_dirty(leaf);
4783 }
4784 }
4785 return ret;
4786}
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4797{
4798 struct btrfs_key key;
4799 struct btrfs_disk_key found_key;
4800 int ret;
4801
4802 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
4803
4804 if (key.offset > 0)
4805 key.offset--;
4806 else if (key.type > 0)
4807 key.type--;
4808 else if (key.objectid > 0)
4809 key.objectid--;
4810 else
4811 return 1;
4812
4813 btrfs_release_path(path);
4814 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4815 if (ret < 0)
4816 return ret;
4817 btrfs_item_key(path->nodes[0], &found_key, 0);
4818 ret = comp_keys(&found_key, &key);
4819 if (ret < 0)
4820 return 0;
4821 return 1;
4822}
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
4847 struct btrfs_key *max_key,
4848 struct btrfs_path *path, int cache_only,
4849 u64 min_trans)
4850{
4851 struct extent_buffer *cur;
4852 struct btrfs_key found_key;
4853 int slot;
4854 int sret;
4855 u32 nritems;
4856 int level;
4857 int ret = 1;
4858
4859 WARN_ON(!path->keep_locks);
4860again:
4861 cur = btrfs_read_lock_root_node(root);
4862 level = btrfs_header_level(cur);
4863 WARN_ON(path->nodes[level]);
4864 path->nodes[level] = cur;
4865 path->locks[level] = BTRFS_READ_LOCK;
4866
4867 if (btrfs_header_generation(cur) < min_trans) {
4868 ret = 1;
4869 goto out;
4870 }
4871 while (1) {
4872 nritems = btrfs_header_nritems(cur);
4873 level = btrfs_header_level(cur);
4874 sret = bin_search(cur, min_key, level, &slot);
4875
4876
4877 if (level == path->lowest_level) {
4878 if (slot >= nritems)
4879 goto find_next_key;
4880 ret = 0;
4881 path->slots[level] = slot;
4882 btrfs_item_key_to_cpu(cur, &found_key, slot);
4883 goto out;
4884 }
4885 if (sret && slot > 0)
4886 slot--;
4887
4888
4889
4890
4891
4892 while (slot < nritems) {
4893 u64 blockptr;
4894 u64 gen;
4895 struct extent_buffer *tmp;
4896 struct btrfs_disk_key disk_key;
4897
4898 blockptr = btrfs_node_blockptr(cur, slot);
4899 gen = btrfs_node_ptr_generation(cur, slot);
4900 if (gen < min_trans) {
4901 slot++;
4902 continue;
4903 }
4904 if (!cache_only)
4905 break;
4906
4907 if (max_key) {
4908 btrfs_node_key(cur, &disk_key, slot);
4909 if (comp_keys(&disk_key, max_key) >= 0) {
4910 ret = 1;
4911 goto out;
4912 }
4913 }
4914
4915 tmp = btrfs_find_tree_block(root, blockptr,
4916 btrfs_level_size(root, level - 1));
4917
4918 if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
4919 free_extent_buffer(tmp);
4920 break;
4921 }
4922 if (tmp)
4923 free_extent_buffer(tmp);
4924 slot++;
4925 }
4926find_next_key:
4927
4928
4929
4930
4931 if (slot >= nritems) {
4932 path->slots[level] = slot;
4933 btrfs_set_path_blocking(path);
4934 sret = btrfs_find_next_key(root, path, min_key, level,
4935 cache_only, min_trans);
4936 if (sret == 0) {
4937 btrfs_release_path(path);
4938 goto again;
4939 } else {
4940 goto out;
4941 }
4942 }
4943
4944 btrfs_node_key_to_cpu(cur, &found_key, slot);
4945 path->slots[level] = slot;
4946 if (level == path->lowest_level) {
4947 ret = 0;
4948 unlock_up(path, level, 1, 0, NULL);
4949 goto out;
4950 }
4951 btrfs_set_path_blocking(path);
4952 cur = read_node_slot(root, cur, slot);
4953 BUG_ON(!cur);
4954
4955 btrfs_tree_read_lock(cur);
4956
4957 path->locks[level - 1] = BTRFS_READ_LOCK;
4958 path->nodes[level - 1] = cur;
4959 unlock_up(path, level, 1, 0, NULL);
4960 btrfs_clear_path_blocking(path, NULL, 0);
4961 }
4962out:
4963 if (ret == 0)
4964 memcpy(min_key, &found_key, sizeof(found_key));
4965 btrfs_set_path_blocking(path);
4966 return ret;
4967}
4968
4969static void tree_move_down(struct btrfs_root *root,
4970 struct btrfs_path *path,
4971 int *level, int root_level)
4972{
4973 BUG_ON(*level == 0);
4974 path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level],
4975 path->slots[*level]);
4976 path->slots[*level - 1] = 0;
4977 (*level)--;
4978}
4979
4980static int tree_move_next_or_upnext(struct btrfs_root *root,
4981 struct btrfs_path *path,
4982 int *level, int root_level)
4983{
4984 int ret = 0;
4985 int nritems;
4986 nritems = btrfs_header_nritems(path->nodes[*level]);
4987
4988 path->slots[*level]++;
4989
4990 while (path->slots[*level] >= nritems) {
4991 if (*level == root_level)
4992 return -1;
4993
4994
4995 path->slots[*level] = 0;
4996 free_extent_buffer(path->nodes[*level]);
4997 path->nodes[*level] = NULL;
4998 (*level)++;
4999 path->slots[*level]++;
5000
5001 nritems = btrfs_header_nritems(path->nodes[*level]);
5002 ret = 1;
5003 }
5004 return ret;
5005}
5006
5007
5008
5009
5010
5011static int tree_advance(struct btrfs_root *root,
5012 struct btrfs_path *path,
5013 int *level, int root_level,
5014 int allow_down,
5015 struct btrfs_key *key)
5016{
5017 int ret;
5018
5019 if (*level == 0 || !allow_down) {
5020 ret = tree_move_next_or_upnext(root, path, level, root_level);
5021 } else {
5022 tree_move_down(root, path, level, root_level);
5023 ret = 0;
5024 }
5025 if (ret >= 0) {
5026 if (*level == 0)
5027 btrfs_item_key_to_cpu(path->nodes[*level], key,
5028 path->slots[*level]);
5029 else
5030 btrfs_node_key_to_cpu(path->nodes[*level], key,
5031 path->slots[*level]);
5032 }
5033 return ret;
5034}
5035
5036static int tree_compare_item(struct btrfs_root *left_root,
5037 struct btrfs_path *left_path,
5038 struct btrfs_path *right_path,
5039 char *tmp_buf)
5040{
5041 int cmp;
5042 int len1, len2;
5043 unsigned long off1, off2;
5044
5045 len1 = btrfs_item_size_nr(left_path->nodes[0], left_path->slots[0]);
5046 len2 = btrfs_item_size_nr(right_path->nodes[0], right_path->slots[0]);
5047 if (len1 != len2)
5048 return 1;
5049
5050 off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]);
5051 off2 = btrfs_item_ptr_offset(right_path->nodes[0],
5052 right_path->slots[0]);
5053
5054 read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1);
5055
5056 cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1);
5057 if (cmp)
5058 return 1;
5059 return 0;
5060}
5061
5062#define ADVANCE 1
5063#define ADVANCE_ONLY_NEXT -1
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078int btrfs_compare_trees(struct btrfs_root *left_root,
5079 struct btrfs_root *right_root,
5080 btrfs_changed_cb_t changed_cb, void *ctx)
5081{
5082 int ret;
5083 int cmp;
5084 struct btrfs_trans_handle *trans = NULL;
5085 struct btrfs_path *left_path = NULL;
5086 struct btrfs_path *right_path = NULL;
5087 struct btrfs_key left_key;
5088 struct btrfs_key right_key;
5089 char *tmp_buf = NULL;
5090 int left_root_level;
5091 int right_root_level;
5092 int left_level;
5093 int right_level;
5094 int left_end_reached;
5095 int right_end_reached;
5096 int advance_left;
5097 int advance_right;
5098 u64 left_blockptr;
5099 u64 right_blockptr;
5100 u64 left_start_ctransid;
5101 u64 right_start_ctransid;
5102 u64 ctransid;
5103
5104 left_path = btrfs_alloc_path();
5105 if (!left_path) {
5106 ret = -ENOMEM;
5107 goto out;
5108 }
5109 right_path = btrfs_alloc_path();
5110 if (!right_path) {
5111 ret = -ENOMEM;
5112 goto out;
5113 }
5114
5115 tmp_buf = kmalloc(left_root->leafsize, GFP_NOFS);
5116 if (!tmp_buf) {
5117 ret = -ENOMEM;
5118 goto out;
5119 }
5120
5121 left_path->search_commit_root = 1;
5122 left_path->skip_locking = 1;
5123 right_path->search_commit_root = 1;
5124 right_path->skip_locking = 1;
5125
5126 spin_lock(&left_root->root_times_lock);
5127 left_start_ctransid = btrfs_root_ctransid(&left_root->root_item);
5128 spin_unlock(&left_root->root_times_lock);
5129
5130 spin_lock(&right_root->root_times_lock);
5131 right_start_ctransid = btrfs_root_ctransid(&right_root->root_item);
5132 spin_unlock(&right_root->root_times_lock);
5133
5134 trans = btrfs_join_transaction(left_root);
5135 if (IS_ERR(trans)) {
5136 ret = PTR_ERR(trans);
5137 trans = NULL;
5138 goto out;
5139 }
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177 left_level = btrfs_header_level(left_root->commit_root);
5178 left_root_level = left_level;
5179 left_path->nodes[left_level] = left_root->commit_root;
5180 extent_buffer_get(left_path->nodes[left_level]);
5181
5182 right_level = btrfs_header_level(right_root->commit_root);
5183 right_root_level = right_level;
5184 right_path->nodes[right_level] = right_root->commit_root;
5185 extent_buffer_get(right_path->nodes[right_level]);
5186
5187 if (left_level == 0)
5188 btrfs_item_key_to_cpu(left_path->nodes[left_level],
5189 &left_key, left_path->slots[left_level]);
5190 else
5191 btrfs_node_key_to_cpu(left_path->nodes[left_level],
5192 &left_key, left_path->slots[left_level]);
5193 if (right_level == 0)
5194 btrfs_item_key_to_cpu(right_path->nodes[right_level],
5195 &right_key, right_path->slots[right_level]);
5196 else
5197 btrfs_node_key_to_cpu(right_path->nodes[right_level],
5198 &right_key, right_path->slots[right_level]);
5199
5200 left_end_reached = right_end_reached = 0;
5201 advance_left = advance_right = 0;
5202
5203 while (1) {
5204
5205
5206
5207
5208
5209 if (trans && btrfs_should_end_transaction(trans, left_root)) {
5210 btrfs_release_path(left_path);
5211 btrfs_release_path(right_path);
5212
5213 ret = btrfs_end_transaction(trans, left_root);
5214 trans = NULL;
5215 if (ret < 0)
5216 goto out;
5217 }
5218
5219 if (!trans) {
5220 trans = btrfs_join_transaction(left_root);
5221 if (IS_ERR(trans)) {
5222 ret = PTR_ERR(trans);
5223 trans = NULL;
5224 goto out;
5225 }
5226
5227 spin_lock(&left_root->root_times_lock);
5228 ctransid = btrfs_root_ctransid(&left_root->root_item);
5229 spin_unlock(&left_root->root_times_lock);
5230 if (ctransid != left_start_ctransid)
5231 left_start_ctransid = 0;
5232
5233 spin_lock(&right_root->root_times_lock);
5234 ctransid = btrfs_root_ctransid(&right_root->root_item);
5235 spin_unlock(&right_root->root_times_lock);
5236 if (ctransid != right_start_ctransid)
5237 right_start_ctransid = 0;
5238
5239 if (!left_start_ctransid || !right_start_ctransid) {
5240 WARN(1, KERN_WARNING
5241 "btrfs: btrfs_compare_tree detected "
5242 "a change in one of the trees while "
5243 "iterating. This is probably a "
5244 "bug.\n");
5245 ret = -EIO;
5246 goto out;
5247 }
5248
5249
5250
5251
5252
5253 left_path->lowest_level = left_level;
5254 right_path->lowest_level = right_level;
5255 ret = btrfs_search_slot(NULL, left_root,
5256 &left_key, left_path, 0, 0);
5257 if (ret < 0)
5258 goto out;
5259 ret = btrfs_search_slot(NULL, right_root,
5260 &right_key, right_path, 0, 0);
5261 if (ret < 0)
5262 goto out;
5263 }
5264
5265 if (advance_left && !left_end_reached) {
5266 ret = tree_advance(left_root, left_path, &left_level,
5267 left_root_level,
5268 advance_left != ADVANCE_ONLY_NEXT,
5269 &left_key);
5270 if (ret < 0)
5271 left_end_reached = ADVANCE;
5272 advance_left = 0;
5273 }
5274 if (advance_right && !right_end_reached) {
5275 ret = tree_advance(right_root, right_path, &right_level,
5276 right_root_level,
5277 advance_right != ADVANCE_ONLY_NEXT,
5278 &right_key);
5279 if (ret < 0)
5280 right_end_reached = ADVANCE;
5281 advance_right = 0;
5282 }
5283
5284 if (left_end_reached && right_end_reached) {
5285 ret = 0;
5286 goto out;
5287 } else if (left_end_reached) {
5288 if (right_level == 0) {
5289 ret = changed_cb(left_root, right_root,
5290 left_path, right_path,
5291 &right_key,
5292 BTRFS_COMPARE_TREE_DELETED,
5293 ctx);
5294 if (ret < 0)
5295 goto out;
5296 }
5297 advance_right = ADVANCE;
5298 continue;
5299 } else if (right_end_reached) {
5300 if (left_level == 0) {
5301 ret = changed_cb(left_root, right_root,
5302 left_path, right_path,
5303 &left_key,
5304 BTRFS_COMPARE_TREE_NEW,
5305 ctx);
5306 if (ret < 0)
5307 goto out;
5308 }
5309 advance_left = ADVANCE;
5310 continue;
5311 }
5312
5313 if (left_level == 0 && right_level == 0) {
5314 cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
5315 if (cmp < 0) {
5316 ret = changed_cb(left_root, right_root,
5317 left_path, right_path,
5318 &left_key,
5319 BTRFS_COMPARE_TREE_NEW,
5320 ctx);
5321 if (ret < 0)
5322 goto out;
5323 advance_left = ADVANCE;
5324 } else if (cmp > 0) {
5325 ret = changed_cb(left_root, right_root,
5326 left_path, right_path,
5327 &right_key,
5328 BTRFS_COMPARE_TREE_DELETED,
5329 ctx);
5330 if (ret < 0)
5331 goto out;
5332 advance_right = ADVANCE;
5333 } else {
5334 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
5335 ret = tree_compare_item(left_root, left_path,
5336 right_path, tmp_buf);
5337 if (ret) {
5338 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
5339 ret = changed_cb(left_root, right_root,
5340 left_path, right_path,
5341 &left_key,
5342 BTRFS_COMPARE_TREE_CHANGED,
5343 ctx);
5344 if (ret < 0)
5345 goto out;
5346 }
5347 advance_left = ADVANCE;
5348 advance_right = ADVANCE;
5349 }
5350 } else if (left_level == right_level) {
5351 cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
5352 if (cmp < 0) {
5353 advance_left = ADVANCE;
5354 } else if (cmp > 0) {
5355 advance_right = ADVANCE;
5356 } else {
5357 left_blockptr = btrfs_node_blockptr(
5358 left_path->nodes[left_level],
5359 left_path->slots[left_level]);
5360 right_blockptr = btrfs_node_blockptr(
5361 right_path->nodes[right_level],
5362 right_path->slots[right_level]);
5363 if (left_blockptr == right_blockptr) {
5364
5365
5366
5367
5368 advance_left = ADVANCE_ONLY_NEXT;
5369 advance_right = ADVANCE_ONLY_NEXT;
5370 } else {
5371 advance_left = ADVANCE;
5372 advance_right = ADVANCE;
5373 }
5374 }
5375 } else if (left_level < right_level) {
5376 advance_right = ADVANCE;
5377 } else {
5378 advance_left = ADVANCE;
5379 }
5380 }
5381
5382out:
5383 btrfs_free_path(left_path);
5384 btrfs_free_path(right_path);
5385 kfree(tmp_buf);
5386
5387 if (trans) {