1
2
3#include "misc.h"
4#include "ctree.h"
5#include "block-group.h"
6#include "space-info.h"
7#include "disk-io.h"
8#include "free-space-cache.h"
9#include "free-space-tree.h"
10#include "volumes.h"
11#include "transaction.h"
12#include "ref-verify.h"
13#include "sysfs.h"
14#include "tree-log.h"
15#include "delalloc-space.h"
16#include "discard.h"
17#include "raid56.h"
18#include "zoned.h"
19
20
21
22
23
24
25
26static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
27{
28 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
29 u64 target = 0;
30
31 if (!bctl)
32 return 0;
33
34 if (flags & BTRFS_BLOCK_GROUP_DATA &&
35 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
36 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
37 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
38 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
39 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
40 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
41 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
42 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
43 }
44
45 return target;
46}
47
48
49
50
51
52
53
54
55static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
56{
57 u64 num_devices = fs_info->fs_devices->rw_devices;
58 u64 target;
59 u64 raid_type;
60 u64 allowed = 0;
61
62
63
64
65
66 spin_lock(&fs_info->balance_lock);
67 target = get_restripe_target(fs_info, flags);
68 if (target) {
69 spin_unlock(&fs_info->balance_lock);
70 return extended_to_chunk(target);
71 }
72 spin_unlock(&fs_info->balance_lock);
73
74
75 for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
76 if (num_devices >= btrfs_raid_array[raid_type].devs_min)
77 allowed |= btrfs_raid_array[raid_type].bg_flag;
78 }
79 allowed &= flags;
80
81 if (allowed & BTRFS_BLOCK_GROUP_RAID6)
82 allowed = BTRFS_BLOCK_GROUP_RAID6;
83 else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
84 allowed = BTRFS_BLOCK_GROUP_RAID5;
85 else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
86 allowed = BTRFS_BLOCK_GROUP_RAID10;
87 else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
88 allowed = BTRFS_BLOCK_GROUP_RAID1;
89 else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
90 allowed = BTRFS_BLOCK_GROUP_RAID0;
91
92 flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
93
94 return extended_to_chunk(flags | allowed);
95}
96
97u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
98{
99 unsigned seq;
100 u64 flags;
101
102 do {
103 flags = orig_flags;
104 seq = read_seqbegin(&fs_info->profiles_lock);
105
106 if (flags & BTRFS_BLOCK_GROUP_DATA)
107 flags |= fs_info->avail_data_alloc_bits;
108 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
109 flags |= fs_info->avail_system_alloc_bits;
110 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
111 flags |= fs_info->avail_metadata_alloc_bits;
112 } while (read_seqretry(&fs_info->profiles_lock, seq));
113
114 return btrfs_reduce_alloc_profile(fs_info, flags);
115}
116
117void btrfs_get_block_group(struct btrfs_block_group *cache)
118{
119 refcount_inc(&cache->refs);
120}
121
122void btrfs_put_block_group(struct btrfs_block_group *cache)
123{
124 if (refcount_dec_and_test(&cache->refs)) {
125 WARN_ON(cache->pinned > 0);
126 WARN_ON(cache->reserved > 0);
127
128
129
130
131
132
133 if (WARN_ON(!list_empty(&cache->discard_list)))
134 btrfs_discard_cancel_work(&cache->fs_info->discard_ctl,
135 cache);
136
137
138
139
140
141
142
143
144
145 WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
146 kfree(cache->free_space_ctl);
147 kfree(cache);
148 }
149}
150
151
152
153
154static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
155 struct btrfs_block_group *block_group)
156{
157 struct rb_node **p;
158 struct rb_node *parent = NULL;
159 struct btrfs_block_group *cache;
160
161 ASSERT(block_group->length != 0);
162
163 spin_lock(&info->block_group_cache_lock);
164 p = &info->block_group_cache_tree.rb_node;
165
166 while (*p) {
167 parent = *p;
168 cache = rb_entry(parent, struct btrfs_block_group, cache_node);
169 if (block_group->start < cache->start) {
170 p = &(*p)->rb_left;
171 } else if (block_group->start > cache->start) {
172 p = &(*p)->rb_right;
173 } else {
174 spin_unlock(&info->block_group_cache_lock);
175 return -EEXIST;
176 }
177 }
178
179 rb_link_node(&block_group->cache_node, parent, p);
180 rb_insert_color(&block_group->cache_node,
181 &info->block_group_cache_tree);
182
183 if (info->first_logical_byte > block_group->start)
184 info->first_logical_byte = block_group->start;
185
186 spin_unlock(&info->block_group_cache_lock);
187
188 return 0;
189}
190
191
192
193
194
195static struct btrfs_block_group *block_group_cache_tree_search(
196 struct btrfs_fs_info *info, u64 bytenr, int contains)
197{
198 struct btrfs_block_group *cache, *ret = NULL;
199 struct rb_node *n;
200 u64 end, start;
201
202 spin_lock(&info->block_group_cache_lock);
203 n = info->block_group_cache_tree.rb_node;
204
205 while (n) {
206 cache = rb_entry(n, struct btrfs_block_group, cache_node);
207 end = cache->start + cache->length - 1;
208 start = cache->start;
209
210 if (bytenr < start) {
211 if (!contains && (!ret || start < ret->start))
212 ret = cache;
213 n = n->rb_left;
214 } else if (bytenr > start) {
215 if (contains && bytenr <= end) {
216 ret = cache;
217 break;
218 }
219 n = n->rb_right;
220 } else {
221 ret = cache;
222 break;
223 }
224 }
225 if (ret) {
226 btrfs_get_block_group(ret);
227 if (bytenr == 0 && info->first_logical_byte > ret->start)
228 info->first_logical_byte = ret->start;
229 }
230 spin_unlock(&info->block_group_cache_lock);
231
232 return ret;
233}
234
235
236
237
238struct btrfs_block_group *btrfs_lookup_first_block_group(
239 struct btrfs_fs_info *info, u64 bytenr)
240{
241 return block_group_cache_tree_search(info, bytenr, 0);
242}
243
244
245
246
247struct btrfs_block_group *btrfs_lookup_block_group(
248 struct btrfs_fs_info *info, u64 bytenr)
249{
250 return block_group_cache_tree_search(info, bytenr, 1);
251}
252
253struct btrfs_block_group *btrfs_next_block_group(
254 struct btrfs_block_group *cache)
255{
256 struct btrfs_fs_info *fs_info = cache->fs_info;
257 struct rb_node *node;
258
259 spin_lock(&fs_info->block_group_cache_lock);
260
261
262 if (RB_EMPTY_NODE(&cache->cache_node)) {
263 const u64 next_bytenr = cache->start + cache->length;
264
265 spin_unlock(&fs_info->block_group_cache_lock);
266 btrfs_put_block_group(cache);
267 cache = btrfs_lookup_first_block_group(fs_info, next_bytenr); return cache;
268 }
269 node = rb_next(&cache->cache_node);
270 btrfs_put_block_group(cache);
271 if (node) {
272 cache = rb_entry(node, struct btrfs_block_group, cache_node);
273 btrfs_get_block_group(cache);
274 } else
275 cache = NULL;
276 spin_unlock(&fs_info->block_group_cache_lock);
277 return cache;
278}
279
280bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
281{
282 struct btrfs_block_group *bg;
283 bool ret = true;
284
285 bg = btrfs_lookup_block_group(fs_info, bytenr);
286 if (!bg)
287 return false;
288
289 spin_lock(&bg->lock);
290 if (bg->ro)
291 ret = false;
292 else
293 atomic_inc(&bg->nocow_writers);
294 spin_unlock(&bg->lock);
295
296
297 if (!ret)
298 btrfs_put_block_group(bg);
299
300 return ret;
301}
302
303void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
304{
305 struct btrfs_block_group *bg;
306
307 bg = btrfs_lookup_block_group(fs_info, bytenr);
308 ASSERT(bg);
309 if (atomic_dec_and_test(&bg->nocow_writers))
310 wake_up_var(&bg->nocow_writers);
311
312
313
314
315 btrfs_put_block_group(bg);
316 btrfs_put_block_group(bg);
317}
318
319void btrfs_wait_nocow_writers(struct btrfs_block_group *bg)
320{
321 wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
322}
323
324void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
325 const u64 start)
326{
327 struct btrfs_block_group *bg;
328
329 bg = btrfs_lookup_block_group(fs_info, start);
330 ASSERT(bg);
331 if (atomic_dec_and_test(&bg->reservations))
332 wake_up_var(&bg->reservations);
333 btrfs_put_block_group(bg);
334}
335
336void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg)
337{
338 struct btrfs_space_info *space_info = bg->space_info;
339
340 ASSERT(bg->ro);
341
342 if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
343 return;
344
345
346
347
348
349
350
351
352
353
354
355 down_write(&space_info->groups_sem);
356 up_write(&space_info->groups_sem);
357
358 wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
359}
360
361struct btrfs_caching_control *btrfs_get_caching_control(
362 struct btrfs_block_group *cache)
363{
364 struct btrfs_caching_control *ctl;
365
366 spin_lock(&cache->lock);
367 if (!cache->caching_ctl) {
368 spin_unlock(&cache->lock);
369 return NULL;
370 }
371
372 ctl = cache->caching_ctl;
373 refcount_inc(&ctl->count);
374 spin_unlock(&cache->lock);
375 return ctl;
376}
377
378void btrfs_put_caching_control(struct btrfs_caching_control *ctl)
379{
380 if (refcount_dec_and_test(&ctl->count))
381 kfree(ctl);
382}
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
398 u64 num_bytes)
399{
400 struct btrfs_caching_control *caching_ctl;
401
402 caching_ctl = btrfs_get_caching_control(cache);
403 if (!caching_ctl)
404 return;
405
406 wait_event(caching_ctl->wait, btrfs_block_group_done(cache) ||
407 (cache->free_space_ctl->free_space >= num_bytes));
408
409 btrfs_put_caching_control(caching_ctl);
410}
411
412int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
413{
414 struct btrfs_caching_control *caching_ctl;
415 int ret = 0;
416
417 caching_ctl = btrfs_get_caching_control(cache);
418 if (!caching_ctl)
419 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
420
421 wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
422 if (cache->cached == BTRFS_CACHE_ERROR)
423 ret = -EIO;
424 btrfs_put_caching_control(caching_ctl);
425 return ret;
426}
427
428static bool space_cache_v1_done(struct btrfs_block_group *cache)
429{
430 bool ret;
431
432 spin_lock(&cache->lock);
433 ret = cache->cached != BTRFS_CACHE_FAST;
434 spin_unlock(&cache->lock);
435
436 return ret;
437}
438
439void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
440 struct btrfs_caching_control *caching_ctl)
441{
442 wait_event(caching_ctl->wait, space_cache_v1_done(cache));
443}
444
445#ifdef CONFIG_BTRFS_DEBUG
446static void fragment_free_space(struct btrfs_block_group *block_group)
447{
448 struct btrfs_fs_info *fs_info = block_group->fs_info;
449 u64 start = block_group->start;
450 u64 len = block_group->length;
451 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
452 fs_info->nodesize : fs_info->sectorsize;
453 u64 step = chunk << 1;
454
455 while (len > chunk) {
456 btrfs_remove_free_space(block_group, start, chunk);
457 start += step;
458 if (len < step)
459 len = 0;
460 else
461 len -= step;
462 }
463}
464#endif
465
466
467
468
469
470
471
472u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end)
473{
474 struct btrfs_fs_info *info = block_group->fs_info;
475 u64 extent_start, extent_end, size, total_added = 0;
476 int ret;
477
478 while (start < end) {
479 ret = find_first_extent_bit(&info->excluded_extents, start,
480 &extent_start, &extent_end,
481 EXTENT_DIRTY | EXTENT_UPTODATE,
482 NULL);
483 if (ret)
484 break;
485
486 if (extent_start <= start) {
487 start = extent_end + 1;
488 } else if (extent_start > start && extent_start < end) {
489 size = extent_start - start;
490 total_added += size;
491 ret = btrfs_add_free_space_async_trimmed(block_group,
492 start, size);
493 BUG_ON(ret);
494 start = extent_end + 1;
495 } else {
496 break;
497 }
498 }
499
500 if (start < end) {
501 size = end - start;
502 total_added += size;
503 ret = btrfs_add_free_space_async_trimmed(block_group, start,
504 size);
505 BUG_ON(ret);
506 }
507
508 return total_added;
509}
510
511static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
512{
513 struct btrfs_block_group *block_group = caching_ctl->block_group;
514 struct btrfs_fs_info *fs_info = block_group->fs_info;
515 struct btrfs_root *extent_root = fs_info->extent_root;
516 struct btrfs_path *path;
517 struct extent_buffer *leaf;
518 struct btrfs_key key;
519 u64 total_found = 0;
520 u64 last = 0;
521 u32 nritems;
522 int ret;
523 bool wakeup = true;
524
525 path = btrfs_alloc_path();
526 if (!path)
527 return -ENOMEM;
528
529 last = max_t(u64, block_group->start, BTRFS_SUPER_INFO_OFFSET);
530
531#ifdef CONFIG_BTRFS_DEBUG
532
533
534
535
536
537 if (btrfs_should_fragment_free_space(block_group))
538 wakeup = false;
539#endif
540
541
542
543
544
545
546 path->skip_locking = 1;
547 path->search_commit_root = 1;
548 path->reada = READA_FORWARD;
549
550 key.objectid = last;
551 key.offset = 0;
552 key.type = BTRFS_EXTENT_ITEM_KEY;
553
554next:
555 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
556 if (ret < 0)
557 goto out;
558
559 leaf = path->nodes[0];
560 nritems = btrfs_header_nritems(leaf);
561
562 while (1) {
563 if (btrfs_fs_closing(fs_info) > 1) {
564 last = (u64)-1;
565 break;
566 }
567
568 if (path->slots[0] < nritems) {
569 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
570 } else {
571 ret = btrfs_find_next_key(extent_root, path, &key, 0, 0);
572 if (ret)
573 break;
574
575 if (need_resched() ||
576 rwsem_is_contended(&fs_info->commit_root_sem)) {
577 if (wakeup)
578 caching_ctl->progress = last;
579 btrfs_release_path(path);
580 up_read(&fs_info->commit_root_sem);
581 mutex_unlock(&caching_ctl->mutex);
582 cond_resched();
583 mutex_lock(&caching_ctl->mutex);
584 down_read(&fs_info->commit_root_sem);
585 goto next;
586 }
587
588 ret = btrfs_next_leaf(extent_root, path);
589 if (ret < 0)
590 goto out;
591 if (ret)
592 break;
593 leaf = path->nodes[0];
594 nritems = btrfs_header_nritems(leaf);
595 continue;
596 }
597
598 if (key.objectid < last) {
599 key.objectid = last;
600 key.offset = 0;
601 key.type = BTRFS_EXTENT_ITEM_KEY;
602
603 if (wakeup)
604 caching_ctl->progress = last;
605 btrfs_release_path(path);
606 goto next;
607 }
608
609 if (key.objectid < block_group->start) {
610 path->slots[0]++;
611 continue;
612 }
613
614 if (key.objectid >= block_group->start + block_group->length)
615 break;
616
617 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
618 key.type == BTRFS_METADATA_ITEM_KEY) {
619 total_found += add_new_free_space(block_group, last,
620 key.objectid);
621 if (key.type == BTRFS_METADATA_ITEM_KEY)
622 last = key.objectid +
623 fs_info->nodesize;
624 else
625 last = key.objectid + key.offset;
626
627 if (total_found > CACHING_CTL_WAKE_UP) {
628 total_found = 0;
629 if (wakeup)
630 wake_up(&caching_ctl->wait);
631 }
632 }
633 path->slots[0]++;
634 }
635 ret = 0;
636
637 total_found += add_new_free_space(block_group, last,
638 block_group->start + block_group->length);
639 caching_ctl->progress = (u64)-1;
640
641out:
642 btrfs_free_path(path);
643 return ret;
644}
645
646static noinline void caching_thread(struct btrfs_work *work)
647{
648 struct btrfs_block_group *block_group;
649 struct btrfs_fs_info *fs_info;
650 struct btrfs_caching_control *caching_ctl;
651 int ret;
652
653 caching_ctl = container_of(work, struct btrfs_caching_control, work);
654 block_group = caching_ctl->block_group;
655 fs_info = block_group->fs_info;
656
657 mutex_lock(&caching_ctl->mutex);
658 down_read(&fs_info->commit_root_sem);
659
660 if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
661 ret = load_free_space_cache(block_group);
662 if (ret == 1) {
663 ret = 0;
664 goto done;
665 }
666
667
668
669
670
671 spin_lock(&block_group->lock);
672 block_group->cached = BTRFS_CACHE_STARTED;
673 spin_unlock(&block_group->lock);
674 wake_up(&caching_ctl->wait);
675 }
676
677
678
679
680
681
682
683
684 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
685 !(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags)))
686 ret = load_free_space_tree(caching_ctl);
687 else
688 ret = load_extent_tree_free(caching_ctl);
689done:
690 spin_lock(&block_group->lock);
691 block_group->caching_ctl = NULL;
692 block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
693 spin_unlock(&block_group->lock);
694
695#ifdef CONFIG_BTRFS_DEBUG
696 if (btrfs_should_fragment_free_space(block_group)) {
697 u64 bytes_used;
698
699 spin_lock(&block_group->space_info->lock);
700 spin_lock(&block_group->lock);
701 bytes_used = block_group->length - block_group->used;
702 block_group->space_info->bytes_used += bytes_used >> 1;
703 spin_unlock(&block_group->lock);
704 spin_unlock(&block_group->space_info->lock);
705 fragment_free_space(block_group);
706 }
707#endif
708
709 caching_ctl->progress = (u64)-1;
710
711 up_read(&fs_info->commit_root_sem);
712 btrfs_free_excluded_extents(block_group);
713 mutex_unlock(&caching_ctl->mutex);
714
715 wake_up(&caching_ctl->wait);
716
717 btrfs_put_caching_control(caching_ctl);
718 btrfs_put_block_group(block_group);
719}
720
721int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only)
722{
723 DEFINE_WAIT(wait);
724 struct btrfs_fs_info *fs_info = cache->fs_info;
725 struct btrfs_caching_control *caching_ctl = NULL;
726 int ret = 0;
727
728
729 if (btrfs_is_zoned(fs_info))
730 return 0;
731
732 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
733 if (!caching_ctl)
734 return -ENOMEM;
735
736 INIT_LIST_HEAD(&caching_ctl->list);
737 mutex_init(&caching_ctl->mutex);
738 init_waitqueue_head(&caching_ctl->wait);
739 caching_ctl->block_group = cache;
740 caching_ctl->progress = cache->start;
741 refcount_set(&caching_ctl->count, 2);
742 btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
743
744 spin_lock(&cache->lock);
745 if (cache->cached != BTRFS_CACHE_NO) {
746 kfree(caching_ctl);
747
748 caching_ctl = cache->caching_ctl;
749 if (caching_ctl)
750 refcount_inc(&caching_ctl->count);
751 spin_unlock(&cache->lock);
752 goto out;
753 }
754 WARN_ON(cache->caching_ctl);
755 cache->caching_ctl = caching_ctl;
756 if (btrfs_test_opt(fs_info, SPACE_CACHE))
757 cache->cached = BTRFS_CACHE_FAST;
758 else
759 cache->cached = BTRFS_CACHE_STARTED;
760 cache->has_caching_ctl = 1;
761 spin_unlock(&cache->lock);
762
763 spin_lock(&fs_info->block_group_cache_lock);
764 refcount_inc(&caching_ctl->count);
765 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
766 spin_unlock(&fs_info->block_group_cache_lock);
767
768 btrfs_get_block_group(cache);
769
770 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
771out:
772 if (load_cache_only && caching_ctl)
773 btrfs_wait_space_cache_v1_finished(cache, caching_ctl);
774 if (caching_ctl)
775 btrfs_put_caching_control(caching_ctl);
776
777 return ret;
778}
779
780static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
781{
782 u64 extra_flags = chunk_to_extended(flags) &
783 BTRFS_EXTENDED_PROFILE_MASK;
784
785 write_seqlock(&fs_info->profiles_lock);
786 if (flags & BTRFS_BLOCK_GROUP_DATA)
787 fs_info->avail_data_alloc_bits &= ~extra_flags;
788 if (flags & BTRFS_BLOCK_GROUP_METADATA)
789 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
790 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
791 fs_info->avail_system_alloc_bits &= ~extra_flags;
792 write_sequnlock(&fs_info->profiles_lock);
793}
794
795
796
797
798
799
800
801
802
803static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
804{
805 bool found_raid56 = false;
806 bool found_raid1c34 = false;
807
808 if ((flags & BTRFS_BLOCK_GROUP_RAID56_MASK) ||
809 (flags & BTRFS_BLOCK_GROUP_RAID1C3) ||
810 (flags & BTRFS_BLOCK_GROUP_RAID1C4)) {
811 struct list_head *head = &fs_info->space_info;
812 struct btrfs_space_info *sinfo;
813
814 list_for_each_entry_rcu(sinfo, head, list) {
815 down_read(&sinfo->groups_sem);
816 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5]))
817 found_raid56 = true;
818 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6]))
819 found_raid56 = true;
820 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C3]))
821 found_raid1c34 = true;
822 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C4]))
823 found_raid1c34 = true;
824 up_read(&sinfo->groups_sem);
825 }
826 if (!found_raid56)
827 btrfs_clear_fs_incompat(fs_info, RAID56);
828 if (!found_raid1c34)
829 btrfs_clear_fs_incompat(fs_info, RAID1C34);
830 }
831}
832
833static int remove_block_group_item(struct btrfs_trans_handle *trans,
834 struct btrfs_path *path,
835 struct btrfs_block_group *block_group)
836{
837 struct btrfs_fs_info *fs_info = trans->fs_info;
838 struct btrfs_root *root;
839 struct btrfs_key key;
840 int ret;
841
842 root = fs_info->extent_root;
843 key.objectid = block_group->start;
844 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
845 key.offset = block_group->length;
846
847 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
848 if (ret > 0)
849 ret = -ENOENT;
850 if (ret < 0)
851 return ret;
852
853 ret = btrfs_del_item(trans, root, path);
854 return ret;
855}
856
857int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
858 u64 group_start, struct extent_map *em)
859{
860 struct btrfs_fs_info *fs_info = trans->fs_info;
861 struct btrfs_path *path;
862 struct btrfs_block_group *block_group;
863 struct btrfs_free_cluster *cluster;
864 struct inode *inode;
865 struct kobject *kobj = NULL;
866 int ret;
867 int index;
868 int factor;
869 struct btrfs_caching_control *caching_ctl = NULL;
870 bool remove_em;
871 bool remove_rsv = false;
872
873 block_group = btrfs_lookup_block_group(fs_info, group_start);
874 BUG_ON(!block_group);
875 BUG_ON(!block_group->ro);
876
877 trace_btrfs_remove_block_group(block_group);
878
879
880
881
882 btrfs_free_excluded_extents(block_group);
883 btrfs_free_ref_tree_range(fs_info, block_group->start,
884 block_group->length);
885
886 index = btrfs_bg_flags_to_raid_index(block_group->flags);
887 factor = btrfs_bg_type_to_factor(block_group->flags);
888
889
890 cluster = &fs_info->data_alloc_cluster;
891 spin_lock(&cluster->refill_lock);
892 btrfs_return_cluster_to_free_space(block_group, cluster);
893 spin_unlock(&cluster->refill_lock);
894
895
896
897
898
899 cluster = &fs_info->meta_alloc_cluster;
900 spin_lock(&cluster->refill_lock);
901 btrfs_return_cluster_to_free_space(block_group, cluster);
902 spin_unlock(&cluster->refill_lock);
903
904 btrfs_clear_treelog_bg(block_group);
905
906 path = btrfs_alloc_path();
907 if (!path) {
908 ret = -ENOMEM;
909 goto out;
910 }
911
912
913
914
915
916 inode = lookup_free_space_inode(block_group, path);
917
918 mutex_lock(&trans->transaction->cache_write_mutex);
919
920
921
922
923 spin_lock(&trans->transaction->dirty_bgs_lock);
924 if (!list_empty(&block_group->io_list)) {
925 list_del_init(&block_group->io_list);
926
927 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
928
929 spin_unlock(&trans->transaction->dirty_bgs_lock);
930 btrfs_wait_cache_io(trans, block_group, path);
931 btrfs_put_block_group(block_group);
932 spin_lock(&trans->transaction->dirty_bgs_lock);
933 }
934
935 if (!list_empty(&block_group->dirty_list)) {
936 list_del_init(&block_group->dirty_list);
937 remove_rsv = true;
938 btrfs_put_block_group(block_group);
939 }
940 spin_unlock(&trans->transaction->dirty_bgs_lock);
941 mutex_unlock(&trans->transaction->cache_write_mutex);
942
943 ret = btrfs_remove_free_space_inode(trans, inode, block_group);
944 if (ret)
945 goto out;
946
947 spin_lock(&fs_info->block_group_cache_lock);
948 rb_erase(&block_group->cache_node,
949 &fs_info->block_group_cache_tree);
950 RB_CLEAR_NODE(&block_group->cache_node);
951
952
953 btrfs_put_block_group(block_group);
954
955 if (fs_info->first_logical_byte == block_group->start)
956 fs_info->first_logical_byte = (u64)-1;
957 spin_unlock(&fs_info->block_group_cache_lock);
958
959 down_write(&block_group->space_info->groups_sem);
960
961
962
963
964 list_del_init(&block_group->list);
965 if (list_empty(&block_group->space_info->block_groups[index])) {
966 kobj = block_group->space_info->block_group_kobjs[index];
967 block_group->space_info->block_group_kobjs[index] = NULL;
968 clear_avail_alloc_bits(fs_info, block_group->flags);
969 }
970 up_write(&block_group->space_info->groups_sem);
971 clear_incompat_bg_bits(fs_info, block_group->flags);
972 if (kobj) {
973 kobject_del(kobj);
974 kobject_put(kobj);
975 }
976
977 if (block_group->has_caching_ctl)
978 caching_ctl = btrfs_get_caching_control(block_group);
979 if (block_group->cached == BTRFS_CACHE_STARTED)
980 btrfs_wait_block_group_cache_done(block_group);
981 if (block_group->has_caching_ctl) {
982 spin_lock(&fs_info->block_group_cache_lock);
983 if (!caching_ctl) {
984 struct btrfs_caching_control *ctl;
985
986 list_for_each_entry(ctl,
987 &fs_info->caching_block_groups, list)
988 if (ctl->block_group == block_group) {
989 caching_ctl = ctl;
990 refcount_inc(&caching_ctl->count);
991 break;
992 }
993 }
994 if (caching_ctl)
995 list_del_init(&caching_ctl->list);
996 spin_unlock(&fs_info->block_group_cache_lock);
997 if (caching_ctl) {
998
999 btrfs_put_caching_control(caching_ctl);
1000 btrfs_put_caching_control(caching_ctl);
1001 }
1002 }
1003
1004 spin_lock(&trans->transaction->dirty_bgs_lock);
1005 WARN_ON(!list_empty(&block_group->dirty_list));
1006 WARN_ON(!list_empty(&block_group->io_list));
1007 spin_unlock(&trans->transaction->dirty_bgs_lock);
1008
1009 btrfs_remove_free_space_cache(block_group);
1010
1011 spin_lock(&block_group->space_info->lock);
1012 list_del_init(&block_group->ro_list);
1013
1014 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
1015 WARN_ON(block_group->space_info->total_bytes
1016 < block_group->length);
1017 WARN_ON(block_group->space_info->bytes_readonly
1018 < block_group->length - block_group->zone_unusable);
1019 WARN_ON(block_group->space_info->bytes_zone_unusable
1020 < block_group->zone_unusable);
1021 WARN_ON(block_group->space_info->disk_total
1022 < block_group->length * factor);
1023 }
1024 block_group->space_info->total_bytes -= block_group->length;
1025 block_group->space_info->bytes_readonly -=
1026 (block_group->length - block_group->zone_unusable);
1027 block_group->space_info->bytes_zone_unusable -=
1028 block_group->zone_unusable;
1029 block_group->space_info->disk_total -= block_group->length * factor;
1030
1031 spin_unlock(&block_group->space_info->lock);
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044 ret = remove_block_group_free_space(trans, block_group);
1045 if (ret)
1046 goto out;
1047
1048 ret = remove_block_group_item(trans, path, block_group);
1049 if (ret < 0)
1050 goto out;
1051
1052 spin_lock(&block_group->lock);
1053 block_group->removed = 1;
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080 remove_em = (atomic_read(&block_group->frozen) == 0);
1081 spin_unlock(&block_group->lock);
1082
1083 if (remove_em) {
1084 struct extent_map_tree *em_tree;
1085
1086 em_tree = &fs_info->mapping_tree;
1087 write_lock(&em_tree->lock);
1088 remove_extent_mapping(em_tree, em);
1089 write_unlock(&em_tree->lock);
1090
1091 free_extent_map(em);
1092 }
1093
1094out:
1095
1096 btrfs_put_block_group(block_group);
1097 if (remove_rsv)
1098 btrfs_delayed_refs_rsv_release(fs_info, 1);
1099 btrfs_free_path(path);
1100 return ret;
1101}
1102
1103struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
1104 struct btrfs_fs_info *fs_info, const u64 chunk_offset)
1105{
1106 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
1107 struct extent_map *em;
1108 struct map_lookup *map;
1109 unsigned int num_items;
1110
1111 read_lock(&em_tree->lock);
1112 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1113 read_unlock(&em_tree->lock);
1114 ASSERT(em && em->start == chunk_offset);
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135 map = em->map_lookup;
1136 num_items = 3 + map->num_stripes;
1137 free_extent_map(em);
1138
1139 return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
1140 num_items);
1141}
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
1157{
1158 struct btrfs_space_info *sinfo = cache->space_info;
1159 u64 num_bytes;
1160 int ret = -ENOSPC;
1161
1162 spin_lock(&sinfo->lock);
1163 spin_lock(&cache->lock);
1164
1165 if (cache->swap_extents) {
1166 ret = -ETXTBSY;
1167 goto out;
1168 }
1169
1170 if (cache->ro) {
1171 cache->ro++;
1172 ret = 0;
1173 goto out;
1174 }
1175
1176 num_bytes = cache->length - cache->reserved - cache->pinned -
1177 cache->bytes_super - cache->zone_unusable - cache->used;
1178
1179
1180
1181
1182
1183 if (force) {
1184 ret = 0;
1185 } else if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) {
1186 u64 sinfo_used = btrfs_space_info_used(sinfo, true);
1187
1188
1189
1190
1191
1192 if (sinfo_used + num_bytes <= sinfo->total_bytes)
1193 ret = 0;
1194 } else {
1195
1196
1197
1198
1199
1200
1201 if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes,
1202 BTRFS_RESERVE_NO_FLUSH))
1203 ret = 0;
1204 }
1205
1206 if (!ret) {
1207 sinfo->bytes_readonly += num_bytes;
1208 if (btrfs_is_zoned(cache->fs_info)) {
1209
1210 sinfo->bytes_readonly += cache->zone_unusable;
1211 sinfo->bytes_zone_unusable -= cache->zone_unusable;
1212 cache->zone_unusable = 0;
1213 }
1214 cache->ro++;
1215 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
1216 }
1217out:
1218 spin_unlock(&cache->lock);
1219 spin_unlock(&sinfo->lock);
1220 if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
1221 btrfs_info(cache->fs_info,
1222 "unable to make block group %llu ro", cache->start);
1223 btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
1224 }
1225 return ret;
1226}
1227
1228static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
1229 struct btrfs_block_group *bg)
1230{
1231 struct btrfs_fs_info *fs_info = bg->fs_info;
1232 struct btrfs_transaction *prev_trans = NULL;
1233 const u64 start = bg->start;
1234 const u64 end = start + bg->length - 1;
1235 int ret;
1236
1237 spin_lock(&fs_info->trans_lock);
1238 if (trans->transaction->list.prev != &fs_info->trans_list) {
1239 prev_trans = list_last_entry(&trans->transaction->list,
1240 struct btrfs_transaction, list);
1241 refcount_inc(&prev_trans->use_count);
1242 }
1243 spin_unlock(&fs_info->trans_lock);
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255 mutex_lock(&fs_info->unused_bg_unpin_mutex);
1256 if (prev_trans) {
1257 ret = clear_extent_bits(&prev_trans->pinned_extents, start, end,
1258 EXTENT_DIRTY);
1259 if (ret)
1260 goto out;
1261 }
1262
1263 ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end,
1264 EXTENT_DIRTY);
1265out:
1266 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
1267 if (prev_trans)
1268 btrfs_put_transaction(prev_trans);
1269
1270 return ret == 0;
1271}
1272
1273
1274
1275
1276
1277void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
1278{
1279 struct btrfs_block_group *block_group;
1280 struct btrfs_space_info *space_info;
1281 struct btrfs_trans_handle *trans;
1282 const bool async_trim_enabled = btrfs_test_opt(fs_info, DISCARD_ASYNC);
1283 int ret = 0;
1284
1285 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
1286 return;
1287
1288
1289
1290
1291
1292 if (!mutex_trylock(&fs_info->reclaim_bgs_lock))
1293 return;
1294
1295 spin_lock(&fs_info->unused_bgs_lock);
1296 while (!list_empty(&fs_info->unused_bgs)) {
1297 int trimming;
1298
1299 block_group = list_first_entry(&fs_info->unused_bgs,
1300 struct btrfs_block_group,
1301 bg_list);
1302 list_del_init(&block_group->bg_list);
1303
1304 space_info = block_group->space_info;
1305
1306 if (ret || btrfs_mixed_space_info(space_info)) {
1307 btrfs_put_block_group(block_group);
1308 continue;
1309 }
1310 spin_unlock(&fs_info->unused_bgs_lock);
1311
1312 btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
1313
1314
1315 down_write(&space_info->groups_sem);
1316
1317
1318
1319
1320
1321
1322 if (btrfs_test_opt(fs_info, DISCARD_ASYNC) &&
1323 !btrfs_is_free_space_trimmed(block_group)) {
1324 trace_btrfs_skip_unused_block_group(block_group);
1325 up_write(&space_info->groups_sem);
1326
1327 btrfs_discard_queue_work(&fs_info->discard_ctl,
1328 block_group);
1329 goto next;
1330 }
1331
1332 spin_lock(&block_group->lock);
1333 if (block_group->reserved || block_group->pinned ||
1334 block_group->used || block_group->ro ||
1335 list_is_singular(&block_group->list)) {
1336
1337
1338
1339
1340
1341
1342 trace_btrfs_skip_unused_block_group(block_group);
1343 spin_unlock(&block_group->lock);
1344 up_write(&space_info->groups_sem);
1345 goto next;
1346 }
1347 spin_unlock(&block_group->lock);
1348
1349
1350 ret = inc_block_group_ro(block_group, 0);
1351 up_write(&space_info->groups_sem);
1352 if (ret < 0) {
1353 ret = 0;
1354 goto next;
1355 }
1356
1357
1358
1359
1360
1361 trans = btrfs_start_trans_remove_block_group(fs_info,
1362 block_group->start);
1363 if (IS_ERR(trans)) {
1364 btrfs_dec_block_group_ro(block_group);
1365 ret = PTR_ERR(trans);
1366 goto next;
1367 }
1368
1369
1370
1371
1372
1373 if (!clean_pinned_extents(trans, block_group)) {
1374 btrfs_dec_block_group_ro(block_group);
1375 goto end_trans;
1376 }
1377
1378
1379
1380
1381
1382
1383
1384
1385 spin_lock(&fs_info->discard_ctl.lock);
1386 if (!list_empty(&block_group->discard_list)) {
1387 spin_unlock(&fs_info->discard_ctl.lock);
1388 btrfs_dec_block_group_ro(block_group);
1389 btrfs_discard_queue_work(&fs_info->discard_ctl,
1390 block_group);
1391 goto end_trans;
1392 }
1393 spin_unlock(&fs_info->discard_ctl.lock);
1394
1395
1396 spin_lock(&space_info->lock);
1397 spin_lock(&block_group->lock);
1398
1399 btrfs_space_info_update_bytes_pinned(fs_info, space_info,
1400 -block_group->pinned);
1401 space_info->bytes_readonly += block_group->pinned;
1402 block_group->pinned = 0;
1403
1404 spin_unlock(&block_group->lock);
1405 spin_unlock(&space_info->lock);
1406
1407
1408
1409
1410
1411
1412
1413
1414 if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC))
1415 goto flip_async;
1416
1417
1418
1419
1420
1421 trimming = btrfs_test_opt(fs_info, DISCARD_SYNC) ||
1422 btrfs_is_zoned(fs_info);
1423
1424
1425 if (trimming)
1426 btrfs_freeze_block_group(block_group);
1427
1428
1429
1430
1431
1432 ret = btrfs_remove_chunk(trans, block_group->start);
1433
1434 if (ret) {
1435 if (trimming)
1436 btrfs_unfreeze_block_group(block_group);
1437 goto end_trans;
1438 }
1439
1440
1441
1442
1443
1444
1445 if (trimming) {
1446 spin_lock(&fs_info->unused_bgs_lock);
1447
1448
1449
1450
1451
1452 list_move(&block_group->bg_list,
1453 &trans->transaction->deleted_bgs);
1454 spin_unlock(&fs_info->unused_bgs_lock);
1455 btrfs_get_block_group(block_group);
1456 }
1457end_trans:
1458 btrfs_end_transaction(trans);
1459next:
1460 btrfs_put_block_group(block_group);
1461 spin_lock(&fs_info->unused_bgs_lock);
1462 }
1463 spin_unlock(&fs_info->unused_bgs_lock);
1464 mutex_unlock(&fs_info->reclaim_bgs_lock);
1465 return;
1466
1467flip_async:
1468 btrfs_end_transaction(trans);
1469 mutex_unlock(&fs_info->reclaim_bgs_lock);
1470 btrfs_put_block_group(block_group);
1471 btrfs_discard_punt_unused_bgs_list(fs_info);
1472}
1473
1474void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
1475{
1476 struct btrfs_fs_info *fs_info = bg->fs_info;
1477
1478 spin_lock(&fs_info->unused_bgs_lock);
1479 if (list_empty(&bg->bg_list)) {
1480 btrfs_get_block_group(bg);
1481 trace_btrfs_add_unused_block_group(bg);
1482 list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
1483 }
1484 spin_unlock(&fs_info->unused_bgs_lock);
1485}
1486
1487void btrfs_reclaim_bgs_work(struct work_struct *work)
1488{
1489 struct btrfs_fs_info *fs_info =
1490 container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
1491 struct btrfs_block_group *bg;
1492 struct btrfs_space_info *space_info;
1493 LIST_HEAD(again_list);
1494
1495 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
1496 return;
1497
1498 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
1499 return;
1500
1501
1502
1503
1504
1505 if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
1506 btrfs_exclop_finish(fs_info);
1507 return;
1508 }
1509
1510 spin_lock(&fs_info->unused_bgs_lock);
1511 while (!list_empty(&fs_info->reclaim_bgs)) {
1512 u64 zone_unusable;
1513 int ret = 0;
1514
1515 bg = list_first_entry(&fs_info->reclaim_bgs,
1516 struct btrfs_block_group,
1517 bg_list);
1518 list_del_init(&bg->bg_list);
1519
1520 space_info = bg->space_info;
1521 spin_unlock(&fs_info->unused_bgs_lock);
1522
1523
1524 down_write(&space_info->groups_sem);
1525
1526 spin_lock(&bg->lock);
1527 if (bg->reserved || bg->pinned || bg->ro) {
1528
1529
1530
1531
1532
1533
1534 spin_unlock(&bg->lock);
1535 up_write(&space_info->groups_sem);
1536 goto next;
1537 }
1538 spin_unlock(&bg->lock);
1539
1540
1541 if (btrfs_fs_closing(fs_info)) {
1542 up_write(&space_info->groups_sem);
1543 goto next;
1544 }
1545
1546
1547
1548
1549
1550
1551
1552 zone_unusable = bg->zone_unusable;
1553 ret = inc_block_group_ro(bg, 0);
1554 up_write(&space_info->groups_sem);
1555 if (ret < 0)
1556 goto next;
1557
1558 btrfs_info(fs_info,
1559 "reclaiming chunk %llu with %llu%% used %llu%% unusable",
1560 bg->start, div_u64(bg->used * 100, bg->length),
1561 div64_u64(zone_unusable * 100, bg->length));
1562 trace_btrfs_reclaim_block_group(bg);
1563 ret = btrfs_relocate_chunk(fs_info, bg->start);
1564 if (ret)
1565 btrfs_err(fs_info, "error relocating chunk %llu",
1566 bg->start);
1567
1568next:
1569 spin_lock(&fs_info->unused_bgs_lock);
1570 if (ret == -EAGAIN && list_empty(&bg->bg_list))
1571 list_add_tail(&bg->bg_list, &again_list);
1572 else
1573 btrfs_put_block_group(bg);
1574 }
1575 list_splice_tail(&again_list, &fs_info->reclaim_bgs);
1576 spin_unlock(&fs_info->unused_bgs_lock);
1577 mutex_unlock(&fs_info->reclaim_bgs_lock);
1578 btrfs_exclop_finish(fs_info);
1579}
1580
1581void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
1582{
1583 spin_lock(&fs_info->unused_bgs_lock);
1584 if (!list_empty(&fs_info->reclaim_bgs))
1585 queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work);
1586 spin_unlock(&fs_info->unused_bgs_lock);
1587}
1588
1589void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg)
1590{
1591 struct btrfs_fs_info *fs_info = bg->fs_info;
1592
1593 spin_lock(&fs_info->unused_bgs_lock);
1594 if (list_empty(&bg->bg_list)) {
1595 btrfs_get_block_group(bg);
1596 trace_btrfs_add_reclaim_block_group(bg);
1597 list_add_tail(&bg->bg_list, &fs_info->reclaim_bgs);
1598 }
1599 spin_unlock(&fs_info->unused_bgs_lock);
1600}
1601
1602static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
1603 struct btrfs_path *path)
1604{
1605 struct extent_map_tree *em_tree;
1606 struct extent_map *em;
1607 struct btrfs_block_group_item bg;
1608 struct extent_buffer *leaf;
1609 int slot;
1610 u64 flags;
1611 int ret = 0;
1612
1613 slot = path->slots[0];
1614 leaf = path->nodes[0];
1615
1616 em_tree = &fs_info->mapping_tree;
1617 read_lock(&em_tree->lock);
1618 em = lookup_extent_mapping(em_tree, key->objectid, key->offset);
1619 read_unlock(&em_tree->lock);
1620 if (!em) {
1621 btrfs_err(fs_info,
1622 "logical %llu len %llu found bg but no related chunk",
1623 key->objectid, key->offset);
1624 return -ENOENT;
1625 }
1626
1627 if (em->start != key->objectid || em->len != key->offset) {
1628 btrfs_err(fs_info,
1629 "block group %llu len %llu mismatch with chunk %llu len %llu",
1630 key->objectid, key->offset, em->start, em->len);
1631 ret = -EUCLEAN;
1632 goto out_free_em;
1633 }
1634
1635 read_extent_buffer(leaf, &bg, btrfs_item_ptr_offset(leaf, slot),
1636 sizeof(bg));
1637 flags = btrfs_stack_block_group_flags(&bg) &
1638 BTRFS_BLOCK_GROUP_TYPE_MASK;
1639
1640 if (flags != (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
1641 btrfs_err(fs_info,
1642"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
1643 key->objectid, key->offset, flags,
1644 (BTRFS_BLOCK_GROUP_TYPE_MASK & em->map_lookup->type));
1645 ret = -EUCLEAN;
1646 }
1647
1648out_free_em:
1649 free_extent_map(em);
1650 return ret;
1651}
1652
1653static int find_first_block_group(struct btrfs_fs_info *fs_info,
1654 struct btrfs_path *path,
1655 struct btrfs_key *key)
1656{
1657 struct btrfs_root *root = fs_info->extent_root;
1658 int ret;
1659 struct btrfs_key found_key;
1660 struct extent_buffer *leaf;
1661 int slot;
1662
1663 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
1664 if (ret < 0)
1665 return ret;
1666
1667 while (1) {
1668 slot = path->slots[0];
1669 leaf = path->nodes[0];
1670 if (slot >= btrfs_header_nritems(leaf)) {
1671 ret = btrfs_next_leaf(root, path);
1672 if (ret == 0)
1673 continue;
1674 if (ret < 0)
1675 goto out;
1676 break;
1677 }
1678 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1679
1680 if (found_key.objectid >= key->objectid &&
1681 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1682 ret = read_bg_from_eb(fs_info, &found_key, path);
1683 break;
1684 }
1685
1686 path->slots[0]++;
1687 }
1688out:
1689 return ret;
1690}
1691
1692static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1693{
1694 u64 extra_flags = chunk_to_extended(flags) &
1695 BTRFS_EXTENDED_PROFILE_MASK;
1696
1697 write_seqlock(&fs_info->profiles_lock);
1698 if (flags & BTRFS_BLOCK_GROUP_DATA)
1699 fs_info->avail_data_alloc_bits |= extra_flags;
1700 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1701 fs_info->avail_metadata_alloc_bits |= extra_flags;
1702 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1703 fs_info->avail_system_alloc_bits |= extra_flags;
1704 write_sequnlock(&fs_info->profiles_lock);
1705}
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
1723 struct block_device *bdev, u64 physical, u64 **logical,
1724 int *naddrs, int *stripe_len)
1725{
1726 struct extent_map *em;
1727 struct map_lookup *map;
1728 u64 *buf;
1729 u64 bytenr;
1730 u64 data_stripe_length;
1731 u64 io_stripe_size;
1732 int i, nr = 0;
1733 int ret = 0;
1734
1735 em = btrfs_get_chunk_map(fs_info, chunk_start, 1);
1736 if (IS_ERR(em))
1737 return -EIO;
1738
1739 map = em->map_lookup;
1740 data_stripe_length = em->orig_block_len;
1741 io_stripe_size = map->stripe_len;
1742 chunk_start = em->start;
1743
1744
1745 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
1746 io_stripe_size = map->stripe_len * nr_data_stripes(map);
1747
1748 buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
1749 if (!buf) {
1750 ret = -ENOMEM;
1751 goto out;
1752 }
1753
1754 for (i = 0; i < map->num_stripes; i++) {
1755 bool already_inserted = false;
1756 u64 stripe_nr;
1757 u64 offset;
1758 int j;
1759
1760 if (!in_range(physical, map->stripes[i].physical,
1761 data_stripe_length))
1762 continue;
1763
1764 if (bdev && map->stripes[i].dev->bdev != bdev)
1765 continue;
1766
1767 stripe_nr = physical - map->stripes[i].physical;
1768 stripe_nr = div64_u64_rem(stripe_nr, map->stripe_len, &offset);
1769
1770 if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1771 stripe_nr = stripe_nr * map->num_stripes + i;
1772 stripe_nr = div_u64(stripe_nr, map->sub_stripes);
1773 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
1774 stripe_nr = stripe_nr * map->num_stripes + i;
1775 }
1776
1777
1778
1779
1780
1781
1782 bytenr = chunk_start + stripe_nr * io_stripe_size + offset;
1783
1784
1785 for (j = 0; j < nr; j++) {
1786 if (buf[j] == bytenr) {
1787 already_inserted = true;
1788 break;
1789 }
1790 }
1791
1792 if (!already_inserted)
1793 buf[nr++] = bytenr;
1794 }
1795
1796 *logical = buf;
1797 *naddrs = nr;
1798 *stripe_len = io_stripe_size;
1799out:
1800 free_extent_map(em);
1801 return ret;
1802}
1803
1804static int exclude_super_stripes(struct btrfs_block_group *cache)
1805{
1806 struct btrfs_fs_info *fs_info = cache->fs_info;
1807 const bool zoned = btrfs_is_zoned(fs_info);
1808 u64 bytenr;
1809 u64 *logical;
1810 int stripe_len;
1811 int i, nr, ret;
1812
1813 if (cache->start < BTRFS_SUPER_INFO_OFFSET) {
1814 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start;
1815 cache->bytes_super += stripe_len;
1816 ret = btrfs_add_excluded_extent(fs_info, cache->start,
1817 stripe_len);
1818 if (ret)
1819 return ret;
1820 }
1821
1822 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1823 bytenr = btrfs_sb_offset(i);
1824 ret = btrfs_rmap_block(fs_info, cache->start, NULL,
1825 bytenr, &logical, &nr, &stripe_len);
1826 if (ret)
1827 return ret;
1828
1829
1830 if (zoned && nr) {
1831 btrfs_err(fs_info,
1832 "zoned: block group %llu must not contain super block",
1833 cache->start);
1834 return -EUCLEAN;
1835 }
1836
1837 while (nr--) {
1838 u64 len = min_t(u64, stripe_len,
1839 cache->start + cache->length - logical[nr]);
1840
1841 cache->bytes_super += len;
1842 ret = btrfs_add_excluded_extent(fs_info, logical[nr],
1843 len);
1844 if (ret) {
1845 kfree(logical);
1846 return ret;
1847 }
1848 }
1849
1850 kfree(logical);
1851 }
1852 return 0;
1853}
1854
1855static void link_block_group(struct btrfs_block_group *cache)
1856{
1857 struct btrfs_space_info *space_info = cache->space_info;
1858 int index = btrfs_bg_flags_to_raid_index(cache->flags);
1859
1860 down_write(&space_info->groups_sem);
1861 list_add_tail(&cache->list, &space_info->block_groups[index]);
1862 up_write(&space_info->groups_sem);
1863}
1864
1865static struct btrfs_block_group *btrfs_create_block_group_cache(
1866 struct btrfs_fs_info *fs_info, u64 start)
1867{
1868 struct btrfs_block_group *cache;
1869
1870 cache = kzalloc(sizeof(*cache), GFP_NOFS);
1871 if (!cache)
1872 return NULL;
1873
1874 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
1875 GFP_NOFS);
1876 if (!cache->free_space_ctl) {
1877 kfree(cache);
1878 return NULL;
1879 }
1880
1881 cache->start = start;
1882
1883 cache->fs_info = fs_info;
1884 cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
1885
1886 cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
1887
1888 refcount_set(&cache->refs, 1);
1889 spin_lock_init(&cache->lock);
1890 init_rwsem(&cache->data_rwsem);
1891 INIT_LIST_HEAD(&cache->list);
1892 INIT_LIST_HEAD(&cache->cluster_list);
1893 INIT_LIST_HEAD(&cache->bg_list);
1894 INIT_LIST_HEAD(&cache->ro_list);
1895 INIT_LIST_HEAD(&cache->discard_list);
1896 INIT_LIST_HEAD(&cache->dirty_list);
1897 INIT_LIST_HEAD(&cache->io_list);
1898 btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
1899 atomic_set(&cache->frozen, 0);
1900 mutex_init(&cache->free_space_lock);
1901 btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
1902
1903 return cache;
1904}
1905
1906
1907
1908
1909
1910static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
1911{
1912 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
1913 struct extent_map *em;
1914 struct btrfs_block_group *bg;
1915 u64 start = 0;
1916 int ret = 0;
1917
1918 while (1) {
1919 read_lock(&map_tree->lock);
1920
1921
1922
1923
1924
1925 em = lookup_extent_mapping(map_tree, start, 1);
1926 read_unlock(&map_tree->lock);
1927 if (!em)
1928 break;
1929
1930 bg = btrfs_lookup_block_group(fs_info, em->start);
1931 if (!bg) {
1932 btrfs_err(fs_info,
1933 "chunk start=%llu len=%llu doesn't have corresponding block group",
1934 em->start, em->len);
1935 ret = -EUCLEAN;
1936 free_extent_map(em);
1937 break;
1938 }
1939 if (bg->start != em->start || bg->length != em->len ||
1940 (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
1941 (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
1942 btrfs_err(fs_info,
1943"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
1944 em->start, em->len,
1945 em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
1946 bg->start, bg->length,
1947 bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
1948 ret = -EUCLEAN;
1949 free_extent_map(em);
1950 btrfs_put_block_group(bg);
1951 break;
1952 }
1953 start = em->start + em->len;
1954 free_extent_map(em);
1955 btrfs_put_block_group(bg);
1956 }
1957 return ret;
1958}
1959
1960static int read_one_block_group(struct btrfs_fs_info *info,
1961 struct btrfs_block_group_item *bgi,
1962 const struct btrfs_key *key,
1963 int need_clear)
1964{
1965 struct btrfs_block_group *cache;
1966 struct btrfs_space_info *space_info;
1967 const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS);
1968 int ret;
1969
1970 ASSERT(key->type == BTRFS_BLOCK_GROUP_ITEM_KEY);
1971
1972 cache = btrfs_create_block_group_cache(info, key->objectid);
1973 if (!cache)
1974 return -ENOMEM;
1975
1976 cache->length = key->offset;
1977 cache->used = btrfs_stack_block_group_used(bgi);
1978 cache->flags = btrfs_stack_block_group_flags(bgi);
1979
1980 set_free_space_tree_thresholds(cache);
1981
1982 if (need_clear) {
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993 if (btrfs_test_opt(info, SPACE_CACHE))
1994 cache->disk_cache_state = BTRFS_DC_CLEAR;
1995 }
1996 if (!mixed && ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
1997 (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
1998 btrfs_err(info,
1999"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
2000 cache->start);
2001 ret = -EINVAL;
2002 goto error;
2003 }
2004
2005 ret = btrfs_load_block_group_zone_info(cache, false);
2006 if (ret) {
2007 btrfs_err(info, "zoned: failed to load zone info of bg %llu",
2008 cache->start);
2009 goto error;
2010 }
2011
2012
2013
2014
2015
2016
2017 ret = exclude_super_stripes(cache);
2018 if (ret) {
2019
2020 btrfs_free_excluded_extents(cache);
2021 goto error;
2022 }
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036 if (btrfs_is_zoned(info)) {
2037 btrfs_calc_zone_unusable(cache);
2038 } else if (cache->length == cache->used) {
2039 cache->last_byte_to_unpin = (u64)-1;
2040 cache->cached = BTRFS_CACHE_FINISHED;
2041 btrfs_free_excluded_extents(cache);
2042 } else if (cache->used == 0) {
2043 cache->last_byte_to_unpin = (u64)-1;
2044 cache->cached = BTRFS_CACHE_FINISHED;
2045 add_new_free_space(cache, cache->start,
2046 cache->start + cache->length);
2047 btrfs_free_excluded_extents(cache);
2048 }
2049
2050 ret = btrfs_add_block_group_cache(info, cache);
2051 if (ret) {
2052 btrfs_remove_free_space_cache(cache);
2053 goto error;
2054 }
2055 trace_btrfs_add_block_group(info, cache, 0);
2056 btrfs_update_space_info(info, cache->flags, cache->length,
2057 cache->used, cache->bytes_super,
2058 cache->zone_unusable, &space_info);
2059
2060 cache->space_info = space_info;
2061
2062 link_block_group(cache);
2063
2064 set_avail_alloc_bits(info, cache->flags);
2065 if (btrfs_chunk_readonly(info, cache->start)) {
2066 inc_block_group_ro(cache, 1);
2067 } else if (cache->used == 0) {
2068 ASSERT(list_empty(&cache->bg_list));
2069 if (btrfs_test_opt(info, DISCARD_ASYNC))
2070 btrfs_discard_queue_work(&info->discard_ctl, cache);
2071 else
2072 btrfs_mark_bg_unused(cache);
2073 }
2074 return 0;
2075error:
2076 btrfs_put_block_group(cache);
2077 return ret;
2078}
2079
2080static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
2081{
2082 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
2083 struct btrfs_space_info *space_info;
2084 struct rb_node *node;
2085 int ret = 0;
2086
2087 for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
2088 struct extent_map *em;
2089 struct map_lookup *map;
2090 struct btrfs_block_group *bg;
2091
2092 em = rb_entry(node, struct extent_map, rb_node);
2093 map = em->map_lookup;
2094 bg = btrfs_create_block_group_cache(fs_info, em->start);
2095 if (!bg) {
2096 ret = -ENOMEM;
2097 break;
2098 }
2099
2100
2101 bg->length = em->len;
2102 bg->flags = map->type;
2103 bg->last_byte_to_unpin = (u64)-1;
2104 bg->cached = BTRFS_CACHE_FINISHED;
2105 bg->used = em->len;
2106 bg->flags = map->type;
2107 ret = btrfs_add_block_group_cache(fs_info, bg);
2108 if (ret) {
2109 btrfs_remove_free_space_cache(bg);
2110 btrfs_put_block_group(bg);
2111 break;
2112 }
2113 btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
2114 0, 0, &space_info);
2115 bg->space_info = space_info;
2116 link_block_group(bg);
2117
2118 set_avail_alloc_bits(fs_info, bg->flags);
2119 }
2120 if (!ret)
2121 btrfs_init_global_block_rsv(fs_info);
2122 return ret;
2123}
2124
2125int btrfs_read_block_groups(struct btrfs_fs_info *info)
2126{
2127 struct btrfs_path *path;
2128 int ret;
2129 struct btrfs_block_group *cache;
2130 struct btrfs_space_info *space_info;
2131 struct btrfs_key key;
2132 int need_clear = 0;
2133 u64 cache_gen;
2134
2135 if (!info->extent_root)
2136 return fill_dummy_bgs(info);
2137
2138 key.objectid = 0;
2139 key.offset = 0;
2140 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
2141 path = btrfs_alloc_path();
2142 if (!path)
2143 return -ENOMEM;
2144
2145 cache_gen = btrfs_super_cache_generation(info->super_copy);
2146 if (btrfs_test_opt(info, SPACE_CACHE) &&
2147 btrfs_super_generation(info->super_copy) != cache_gen)
2148 need_clear = 1;
2149 if (btrfs_test_opt(info, CLEAR_CACHE))
2150 need_clear = 1;
2151
2152 while (1) {
2153 struct btrfs_block_group_item bgi;
2154 struct extent_buffer *leaf;
2155 int slot;
2156
2157 ret = find_first_block_group(info, path, &key);
2158 if (ret > 0)
2159 break;
2160 if (ret != 0)
2161 goto error;
2162
2163 leaf = path->nodes[0];
2164 slot = path->slots[0];
2165
2166 read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
2167 sizeof(bgi));
2168
2169 btrfs_item_key_to_cpu(leaf, &key, slot);
2170 btrfs_release_path(path);
2171 ret = read_one_block_group(info, &bgi, &key, need_clear);
2172 if (ret < 0)
2173 goto error;
2174 key.objectid += key.offset;
2175 key.offset = 0;
2176 }
2177 btrfs_release_path(path);
2178
2179 list_for_each_entry(space_info, &info->space_info, list) {
2180 int i;
2181
2182 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2183 if (list_empty(&space_info->block_groups[i]))
2184 continue;
2185 cache = list_first_entry(&space_info->block_groups[i],
2186 struct btrfs_block_group,
2187 list);
2188 btrfs_sysfs_add_block_group_type(cache);
2189 }
2190
2191 if (!(btrfs_get_alloc_profile(info, space_info->flags) &
2192 (BTRFS_BLOCK_GROUP_RAID10 |
2193 BTRFS_BLOCK_GROUP_RAID1_MASK |
2194 BTRFS_BLOCK_GROUP_RAID56_MASK |
2195 BTRFS_BLOCK_GROUP_DUP)))
2196 continue;
2197
2198
2199
2200
2201 list_for_each_entry(cache,
2202 &space_info->block_groups[BTRFS_RAID_RAID0],
2203 list)
2204 inc_block_group_ro(cache, 1);
2205 list_for_each_entry(cache,
2206 &space_info->block_groups[BTRFS_RAID_SINGLE],
2207 list)
2208 inc_block_group_ro(cache, 1);
2209 }
2210
2211 btrfs_init_global_block_rsv(info);
2212 ret = check_chunk_block_group_mappings(info);
2213error:
2214 btrfs_free_path(path);
2215 return ret;
2216}
2217
2218
2219
2220
2221
2222
2223
2224
2225static int insert_block_group_item(struct btrfs_trans_handle *trans,
2226 struct btrfs_block_group *block_group)
2227{
2228 struct btrfs_fs_info *fs_info = trans->fs_info;
2229 struct btrfs_block_group_item bgi;
2230 struct btrfs_root *root;
2231 struct btrfs_key key;
2232
2233 spin_lock(&block_group->lock);
2234 btrfs_set_stack_block_group_used(&bgi, block_group->used);
2235 btrfs_set_stack_block_group_chunk_objectid(&bgi,
2236 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
2237 btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
2238 key.objectid = block_group->start;
2239 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
2240 key.offset = block_group->length;
2241 spin_unlock(&block_group->lock);
2242
2243 root = fs_info->extent_root;
2244 return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
2245}
2246
2247
2248
2249
2250
2251
2252
2253
2254void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
2255{
2256 struct btrfs_fs_info *fs_info = trans->fs_info;
2257 struct btrfs_block_group *block_group;
2258 int ret = 0;
2259
2260 while (!list_empty(&trans->new_bgs)) {
2261 int index;
2262
2263 block_group = list_first_entry(&trans->new_bgs,
2264 struct btrfs_block_group,
2265 bg_list);
2266 if (ret)
2267 goto next;
2268
2269 index = btrfs_bg_flags_to_raid_index(block_group->flags);
2270
2271 ret = insert_block_group_item(trans, block_group);
2272 if (ret)
2273 btrfs_abort_transaction(trans, ret);
2274 if (!block_group->chunk_item_inserted) {
2275 mutex_lock(&fs_info->chunk_mutex);
2276 ret = btrfs_chunk_alloc_add_chunk_item(trans, block_group);
2277 mutex_unlock(&fs_info->chunk_mutex);
2278 if (ret)
2279 btrfs_abort_transaction(trans, ret);
2280 }
2281 ret = btrfs_finish_chunk_alloc(trans, block_group->start,
2282 block_group->length);
2283 if (ret)
2284 btrfs_abort_transaction(trans, ret);
2285 add_block_group_free_space(trans, block_group);
2286
2287
2288
2289
2290
2291
2292
2293 if (block_group->space_info->block_group_kobjs[index] == NULL)
2294 btrfs_sysfs_add_block_group_type(block_group);
2295
2296
2297next:
2298 btrfs_delayed_refs_rsv_release(fs_info, 1);
2299 list_del_init(&block_group->bg_list);
2300 }
2301 btrfs_trans_release_chunk_metadata(trans);
2302}
2303
2304struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
2305 u64 bytes_used, u64 type,
2306 u64 chunk_offset, u64 size)
2307{
2308 struct btrfs_fs_info *fs_info = trans->fs_info;
2309 struct btrfs_block_group *cache;
2310 int ret;
2311
2312 btrfs_set_log_full_commit(trans);
2313
2314 cache = btrfs_create_block_group_cache(fs_info, chunk_offset);
2315 if (!cache)
2316 return ERR_PTR(-ENOMEM);
2317
2318 cache->length = size;
2319 set_free_space_tree_thresholds(cache);
2320 cache->used = bytes_used;
2321 cache->flags = type;
2322 cache->last_byte_to_unpin = (u64)-1;
2323 cache->cached = BTRFS_CACHE_FINISHED;
2324 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
2325 cache->needs_free_space = 1;
2326
2327 ret = btrfs_load_block_group_zone_info(cache, true);
2328 if (ret) {
2329 btrfs_put_block_group(cache);
2330 return ERR_PTR(ret);
2331 }
2332
2333 ret = exclude_super_stripes(cache);
2334 if (ret) {
2335
2336 btrfs_free_excluded_extents(cache);
2337 btrfs_put_block_group(cache);
2338 return ERR_PTR(ret);
2339 }
2340
2341 add_new_free_space(cache, chunk_offset, chunk_offset + size);
2342
2343 btrfs_free_excluded_extents(cache);
2344
2345#ifdef CONFIG_BTRFS_DEBUG
2346 if (btrfs_should_fragment_free_space(cache)) {
2347 u64 new_bytes_used = size - bytes_used;
2348
2349 bytes_used += new_bytes_used >> 1;
2350 fragment_free_space(cache);
2351 }
2352#endif
2353
2354
2355
2356
2357
2358 cache->space_info = btrfs_find_space_info(fs_info, cache->flags);
2359 ASSERT(cache->space_info);
2360
2361 ret = btrfs_add_block_group_cache(fs_info, cache);
2362 if (ret) {
2363 btrfs_remove_free_space_cache(cache);
2364 btrfs_put_block_group(cache);
2365 return ERR_PTR(ret);
2366 }
2367
2368
2369
2370
2371
2372 trace_btrfs_add_block_group(fs_info, cache, 1);
2373 btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
2374 cache->bytes_super, 0, &cache->space_info);
2375 btrfs_update_global_block_rsv(fs_info);
2376
2377 link_block_group(cache);
2378
2379 list_add_tail(&cache->bg_list, &trans->new_bgs);
2380 trans->delayed_ref_updates++;
2381 btrfs_update_delayed_refs_rsv(trans);
2382
2383 set_avail_alloc_bits(fs_info, type);
2384 return cache;
2385}
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
2397 bool do_chunk_alloc)
2398{
2399 struct btrfs_fs_info *fs_info = cache->fs_info;
2400 struct btrfs_trans_handle *trans;
2401 u64 alloc_flags;
2402 int ret;
2403 bool dirty_bg_running;
2404
2405 do {
2406 trans = btrfs_join_transaction(fs_info->extent_root);
2407 if (IS_ERR(trans))
2408 return PTR_ERR(trans);
2409
2410 dirty_bg_running = false;
2411
2412
2413
2414
2415
2416
2417 mutex_lock(&fs_info->ro_block_group_mutex);
2418 if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
2419 u64 transid = trans->transid;
2420
2421 mutex_unlock(&fs_info->ro_block_group_mutex);
2422 btrfs_end_transaction(trans);
2423
2424 ret = btrfs_wait_for_commit(fs_info, transid);
2425 if (ret)
2426 return ret;
2427 dirty_bg_running = true;
2428 }
2429 } while (dirty_bg_running);
2430
2431 if (do_chunk_alloc) {
2432
2433
2434
2435
2436 alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
2437 if (alloc_flags != cache->flags) {
2438 ret = btrfs_chunk_alloc(trans, alloc_flags,
2439 CHUNK_ALLOC_FORCE);
2440
2441
2442
2443
2444 if (ret == -ENOSPC)
2445 ret = 0;
2446 if (ret < 0)
2447 goto out;
2448 }
2449 }
2450
2451 ret = inc_block_group_ro(cache, 0);
2452 if (!do_chunk_alloc || ret == -ETXTBSY)
2453 goto unlock_out;
2454 if (!ret)
2455 goto out;
2456 alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
2457 ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
2458 if (ret < 0)
2459 goto out;
2460 ret = inc_block_group_ro(cache, 0);
2461 if (ret == -ETXTBSY)
2462 goto unlock_out;
2463out:
2464 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2465 alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
2466 mutex_lock(&fs_info->chunk_mutex);
2467 check_system_chunk(trans, alloc_flags);
2468 mutex_unlock(&fs_info->chunk_mutex);
2469 }
2470unlock_out:
2471 mutex_unlock(&fs_info->ro_block_group_mutex);
2472
2473 btrfs_end_transaction(trans);
2474 return ret;
2475}
2476
2477void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
2478{
2479 struct btrfs_space_info *sinfo = cache->space_info;
2480 u64 num_bytes;
2481
2482 BUG_ON(!cache->ro);
2483
2484 spin_lock(&sinfo->lock);
2485 spin_lock(&cache->lock);
2486 if (!--cache->ro) {
2487 if (btrfs_is_zoned(cache->fs_info)) {
2488
2489 cache->zone_unusable = cache->alloc_offset - cache->used;
2490 sinfo->bytes_zone_unusable += cache->zone_unusable;
2491 sinfo->bytes_readonly -= cache->zone_unusable;
2492 }
2493 num_bytes = cache->length - cache->reserved -
2494 cache->pinned - cache->bytes_super -
2495 cache->zone_unusable - cache->used;
2496 sinfo->bytes_readonly -= num_bytes;
2497 list_del_init(&cache->ro_list);
2498 }
2499 spin_unlock(&cache->lock);
2500 spin_unlock(&sinfo->lock);
2501}
2502
2503static int update_block_group_item(struct btrfs_trans_handle *trans,
2504 struct btrfs_path *path,
2505 struct btrfs_block_group *cache)
2506{
2507 struct btrfs_fs_info *fs_info = trans->fs_info;
2508 int ret;
2509 struct btrfs_root *root = fs_info->extent_root;
2510 unsigned long bi;
2511 struct extent_buffer *leaf;
2512 struct btrfs_block_group_item bgi;
2513 struct btrfs_key key;
2514
2515 key.objectid = cache->start;
2516 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
2517 key.offset = cache->length;
2518
2519 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2520 if (ret) {
2521 if (ret > 0)
2522 ret = -ENOENT;
2523 goto fail;
2524 }
2525
2526 leaf = path->nodes[0];
2527 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
2528 btrfs_set_stack_block_group_used(&bgi, cache->used);
2529 btrfs_set_stack_block_group_chunk_objectid(&bgi,
2530 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
2531 btrfs_set_stack_block_group_flags(&bgi, cache->flags);
2532 write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
2533 btrfs_mark_buffer_dirty(leaf);
2534fail:
2535 btrfs_release_path(path);
2536 return ret;
2537
2538}
2539
2540static int cache_save_setup(struct btrfs_block_group *block_group,
2541 struct btrfs_trans_handle *trans,
2542 struct btrfs_path *path)
2543{
2544 struct btrfs_fs_info *fs_info = block_group->fs_info;
2545 struct btrfs_root *root = fs_info->tree_root;
2546 struct inode *inode = NULL;
2547 struct extent_changeset *data_reserved = NULL;
2548 u64 alloc_hint = 0;
2549 int dcs = BTRFS_DC_ERROR;
2550 u64 cache_size = 0;
2551 int retries = 0;
2552 int ret = 0;
2553
2554 if (!btrfs_test_opt(fs_info, SPACE_CACHE))
2555 return 0;
2556
2557
2558
2559
2560
2561 if (block_group->length < (100 * SZ_1M)) {
2562 spin_lock(&block_group->lock);
2563 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
2564 spin_unlock(&block_group->lock);
2565 return 0;
2566 }
2567
2568 if (TRANS_ABORTED(trans))
2569 return 0;
2570again:
2571 inode = lookup_free_space_inode(block_group, path);
2572 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2573 ret = PTR_ERR(inode);
2574 btrfs_release_path(path);
2575 goto out;
2576 }
2577
2578 if (IS_ERR(inode)) {
2579 BUG_ON(retries);
2580 retries++;
2581
2582 if (block_group->ro)
2583 goto out_free;
2584
2585 ret = create_free_space_inode(trans, block_group, path);
2586 if (ret)
2587 goto out_free;
2588 goto again;
2589 }
2590
2591
2592
2593
2594
2595
2596 BTRFS_I(inode)->generation = 0;
2597 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
2598 if (ret) {
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609 btrfs_abort_transaction(trans, ret);
2610 goto out_put;
2611 }
2612 WARN_ON(ret);
2613
2614
2615 if (block_group->cache_generation == trans->transid &&
2616 i_size_read(inode)) {
2617 dcs = BTRFS_DC_SETUP;
2618 goto out_put;
2619 }
2620
2621 if (i_size_read(inode) > 0) {
2622 ret = btrfs_check_trunc_cache_free_space(fs_info,
2623 &fs_info->global_block_rsv);
2624 if (ret)
2625 goto out_put;
2626
2627 ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
2628 if (ret)
2629 goto out_put;
2630 }
2631
2632 spin_lock(&block_group->lock);
2633 if (block_group->cached != BTRFS_CACHE_FINISHED ||
2634 !btrfs_test_opt(fs_info, SPACE_CACHE)) {
2635
2636
2637
2638
2639
2640
2641 dcs = BTRFS_DC_WRITTEN;
2642 spin_unlock(&block_group->lock);
2643 goto out_put;
2644 }
2645 spin_unlock(&block_group->lock);
2646
2647
2648
2649
2650
2651 if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) {
2652 ret = -ENOSPC;
2653 goto out_put;
2654 }
2655
2656
2657
2658
2659
2660
2661
2662 cache_size = div_u64(block_group->length, SZ_256M);
2663 if (!cache_size)
2664 cache_size = 1;
2665
2666 cache_size *= 16;
2667 cache_size *= fs_info->sectorsize;
2668
2669 ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0,
2670 cache_size);
2671 if (ret)
2672 goto out_put;
2673
2674 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, cache_size,
2675 cache_size, cache_size,
2676 &alloc_hint);
2677
2678
2679
2680
2681
2682
2683
2684
2685 if (!ret)
2686 dcs = BTRFS_DC_SETUP;
2687 else if (ret == -ENOSPC)
2688 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
2689
2690out_put:
2691 iput(inode);
2692out_free:
2693 btrfs_release_path(path);
2694out:
2695 spin_lock(&block_group->lock);
2696 if (!ret && dcs == BTRFS_DC_SETUP)
2697 block_group->cache_generation = trans->transid;
2698 block_group->disk_cache_state = dcs;
2699 spin_unlock(&block_group->lock);
2700
2701 extent_changeset_free(data_reserved);
2702 return ret;
2703}
2704
2705int btrfs_setup_space_cache(struct btrfs_trans_handle *trans)
2706{
2707 struct btrfs_fs_info *fs_info = trans->fs_info;
2708 struct btrfs_block_group *cache, *tmp;
2709 struct btrfs_transaction *cur_trans = trans->transaction;
2710 struct btrfs_path *path;
2711
2712 if (list_empty(&cur_trans->dirty_bgs) ||
2713 !btrfs_test_opt(fs_info, SPACE_CACHE))
2714 return 0;
2715
2716 path = btrfs_alloc_path();
2717 if (!path)
2718 return -ENOMEM;
2719
2720
2721 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
2722 dirty_list) {
2723 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
2724 cache_save_setup(cache, trans, path);
2725 }
2726
2727 btrfs_free_path(path);
2728 return 0;
2729}
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
2744{
2745 struct btrfs_fs_info *fs_info = trans->fs_info;
2746 struct btrfs_block_group *cache;
2747 struct btrfs_transaction *cur_trans = trans->transaction;
2748 int ret = 0;
2749 int should_put;
2750 struct btrfs_path *path = NULL;
2751 LIST_HEAD(dirty);
2752 struct list_head *io = &cur_trans->io_bgs;
2753 int num_started = 0;
2754 int loops = 0;
2755
2756 spin_lock(&cur_trans->dirty_bgs_lock);
2757 if (list_empty(&cur_trans->dirty_bgs)) {
2758 spin_unlock(&cur_trans->dirty_bgs_lock);
2759 return 0;
2760 }
2761 list_splice_init(&cur_trans->dirty_bgs, &dirty);
2762 spin_unlock(&cur_trans->dirty_bgs_lock);
2763
2764again:
2765
2766 btrfs_create_pending_block_groups(trans);
2767
2768 if (!path) {
2769 path = btrfs_alloc_path();
2770 if (!path) {
2771 ret = -ENOMEM;
2772 goto out;
2773 }
2774 }
2775
2776
2777
2778
2779
2780
2781 mutex_lock(&trans->transaction->cache_write_mutex);
2782 while (!list_empty(&dirty)) {
2783 bool drop_reserve = true;
2784
2785 cache = list_first_entry(&dirty, struct btrfs_block_group,
2786 dirty_list);
2787
2788
2789
2790
2791
2792 if (!list_empty(&cache->io_list)) {
2793 list_del_init(&cache->io_list);
2794 btrfs_wait_cache_io(trans, cache, path);
2795 btrfs_put_block_group(cache);
2796 }
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807 spin_lock(&cur_trans->dirty_bgs_lock);
2808 list_del_init(&cache->dirty_list);
2809 spin_unlock(&cur_trans->dirty_bgs_lock);
2810
2811 should_put = 1;
2812
2813 cache_save_setup(cache, trans, path);
2814
2815 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
2816 cache->io_ctl.inode = NULL;
2817 ret = btrfs_write_out_cache(trans, cache, path);
2818 if (ret == 0 && cache->io_ctl.inode) {
2819 num_started++;
2820 should_put = 0;
2821
2822
2823
2824
2825
2826
2827 list_add_tail(&cache->io_list, io);
2828 } else {
2829
2830
2831
2832
2833 ret = 0;
2834 }
2835 }
2836 if (!ret) {
2837 ret = update_block_group_item(trans, path, cache);
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847 if (ret == -ENOENT) {
2848 ret = 0;
2849 spin_lock(&cur_trans->dirty_bgs_lock);
2850 if (list_empty(&cache->dirty_list)) {
2851 list_add_tail(&cache->dirty_list,
2852 &cur_trans->dirty_bgs);
2853 btrfs_get_block_group(cache);
2854 drop_reserve = false;
2855 }
2856 spin_unlock(&cur_trans->dirty_bgs_lock);
2857 } else if (ret) {
2858 btrfs_abort_transaction(trans, ret);
2859 }
2860 }
2861
2862
2863 if (should_put)
2864 btrfs_put_block_group(cache);
2865 if (drop_reserve)
2866 btrfs_delayed_refs_rsv_release(fs_info, 1);
2867
2868
2869
2870
2871
2872 mutex_unlock(&trans->transaction->cache_write_mutex);
2873 if (ret)
2874 goto out;
2875 mutex_lock(&trans->transaction->cache_write_mutex);
2876 }
2877 mutex_unlock(&trans->transaction->cache_write_mutex);
2878
2879
2880
2881
2882
2883 if (!ret)
2884 ret = btrfs_run_delayed_refs(trans, 0);
2885 if (!ret && loops == 0) {
2886 loops++;
2887 spin_lock(&cur_trans->dirty_bgs_lock);
2888 list_splice_init(&cur_trans->dirty_bgs, &dirty);
2889
2890
2891
2892
2893 if (!list_empty(&dirty)) {
2894 spin_unlock(&cur_trans->dirty_bgs_lock);
2895 goto again;
2896 }
2897 spin_unlock(&cur_trans->dirty_bgs_lock);
2898 }
2899out:
2900 if (ret < 0) {
2901 spin_lock(&cur_trans->dirty_bgs_lock);
2902 list_splice_init(&dirty, &cur_trans->dirty_bgs);
2903 spin_unlock(&cur_trans->dirty_bgs_lock);
2904 btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
2905 }
2906
2907 btrfs_free_path(path);
2908 return ret;
2909}
2910
2911int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
2912{
2913 struct btrfs_fs_info *fs_info = trans->fs_info;
2914 struct btrfs_block_group *cache;
2915 struct btrfs_transaction *cur_trans = trans->transaction;
2916 int ret = 0;
2917 int should_put;
2918 struct btrfs_path *path;
2919 struct list_head *io = &cur_trans->io_bgs;
2920 int num_started = 0;
2921
2922 path = btrfs_alloc_path();
2923 if (!path)
2924 return -ENOMEM;
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941 spin_lock(&cur_trans->dirty_bgs_lock);
2942 while (!list_empty(&cur_trans->dirty_bgs)) {
2943 cache = list_first_entry(&cur_trans->dirty_bgs,
2944 struct btrfs_block_group,
2945 dirty_list);
2946
2947
2948
2949
2950
2951
2952 if (!list_empty(&cache->io_list)) {
2953 spin_unlock(&cur_trans->dirty_bgs_lock);
2954 list_del_init(&cache->io_list);
2955 btrfs_wait_cache_io(trans, cache, path);
2956 btrfs_put_block_group(cache);
2957 spin_lock(&cur_trans->dirty_bgs_lock);
2958 }
2959
2960
2961
2962
2963
2964 list_del_init(&cache->dirty_list);
2965 spin_unlock(&cur_trans->dirty_bgs_lock);
2966 should_put = 1;
2967
2968 cache_save_setup(cache, trans, path);
2969
2970 if (!ret)
2971 ret = btrfs_run_delayed_refs(trans,
2972 (unsigned long) -1);
2973
2974 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
2975 cache->io_ctl.inode = NULL;
2976 ret = btrfs_write_out_cache(trans, cache, path);
2977 if (ret == 0 && cache->io_ctl.inode) {
2978 num_started++;
2979 should_put = 0;
2980 list_add_tail(&cache->io_list, io);
2981 } else {
2982
2983
2984
2985
2986 ret = 0;
2987 }
2988 }
2989 if (!ret) {
2990 ret = update_block_group_item(trans, path, cache);
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004 if (ret == -ENOENT) {
3005 wait_event(cur_trans->writer_wait,
3006 atomic_read(&cur_trans->num_writers) == 1);
3007 ret = update_block_group_item(trans, path, cache);
3008 }
3009 if (ret)
3010 btrfs_abort_transaction(trans, ret);
3011 }
3012
3013
3014 if (should_put)
3015 btrfs_put_block_group(cache);
3016 btrfs_delayed_refs_rsv_release(fs_info, 1);
3017 spin_lock(&cur_trans->dirty_bgs_lock);
3018 }
3019 spin_unlock(&cur_trans->dirty_bgs_lock);
3020
3021
3022
3023
3024
3025 while (!list_empty(io)) {
3026 cache = list_first_entry(io, struct btrfs_block_group,
3027 io_list);
3028 list_del_init(&cache->io_list);
3029 btrfs_wait_cache_io(trans, cache, path);
3030 btrfs_put_block_group(cache);
3031 }
3032
3033 btrfs_free_path(path);
3034 return ret;
3035}
3036
3037int btrfs_update_block_group(struct btrfs_trans_handle *trans,
3038 u64 bytenr, u64 num_bytes, int alloc)
3039{
3040 struct btrfs_fs_info *info = trans->fs_info;
3041 struct btrfs_block_group *cache = NULL;
3042 u64 total = num_bytes;
3043 u64 old_val;
3044 u64 byte_in_group;
3045 int factor;
3046 int ret = 0;
3047
3048
3049 spin_lock(&info->delalloc_root_lock);
3050 old_val = btrfs_super_bytes_used(info->super_copy);
3051 if (alloc)
3052 old_val += num_bytes;
3053 else
3054 old_val -= num_bytes;
3055 btrfs_set_super_bytes_used(info->super_copy, old_val);
3056 spin_unlock(&info->delalloc_root_lock);
3057
3058 while (total) {
3059 cache = btrfs_lookup_block_group(info, bytenr);
3060 if (!cache) {
3061 ret = -ENOENT;
3062 break;
3063 }
3064 factor = btrfs_bg_type_to_factor(cache->flags);
3065
3066
3067
3068
3069
3070
3071
3072 if (!alloc && !btrfs_block_group_done(cache))
3073 btrfs_cache_block_group(cache, 1);
3074
3075 byte_in_group = bytenr - cache->start;
3076 WARN_ON(byte_in_group > cache->length);
3077
3078 spin_lock(&cache->space_info->lock);
3079 spin_lock(&cache->lock);
3080
3081 if (btrfs_test_opt(info, SPACE_CACHE) &&
3082 cache->disk_cache_state < BTRFS_DC_CLEAR)
3083 cache->disk_cache_state = BTRFS_DC_CLEAR;
3084
3085 old_val = cache->used;
3086 num_bytes = min(total, cache->length - byte_in_group);
3087 if (alloc) {
3088 old_val += num_bytes;
3089 cache->used = old_val;
3090 cache->reserved -= num_bytes;
3091 cache->space_info->bytes_reserved -= num_bytes;
3092 cache->space_info->bytes_used += num_bytes;
3093 cache->space_info->disk_used += num_bytes * factor;
3094 spin_unlock(&cache->lock);
3095 spin_unlock(&cache->space_info->lock);
3096 } else {
3097 old_val -= num_bytes;
3098 cache->used = old_val;
3099 cache->pinned += num_bytes;
3100 btrfs_space_info_update_bytes_pinned(info,
3101 cache->space_info, num_bytes);
3102 cache->space_info->bytes_used -= num_bytes;
3103 cache->space_info->disk_used -= num_bytes * factor;
3104 spin_unlock(&cache->lock);
3105 spin_unlock(&cache->space_info->lock);
3106
3107 set_extent_dirty(&trans->transaction->pinned_extents,
3108 bytenr, bytenr + num_bytes - 1,
3109 GFP_NOFS | __GFP_NOFAIL);
3110 }
3111
3112 spin_lock(&trans->transaction->dirty_bgs_lock);
3113 if (list_empty(&cache->dirty_list)) {
3114 list_add_tail(&cache->dirty_list,
3115 &trans->transaction->dirty_bgs);
3116 trans->delayed_ref_updates++;
3117 btrfs_get_block_group(cache);
3118 }
3119 spin_unlock(&trans->transaction->dirty_bgs_lock);
3120
3121
3122
3123
3124
3125
3126
3127 if (!alloc && old_val == 0) {
3128 if (!btrfs_test_opt(info, DISCARD_ASYNC))
3129 btrfs_mark_bg_unused(cache);
3130 }
3131
3132 btrfs_put_block_group(cache);
3133 total -= num_bytes;
3134 bytenr += num_bytes;
3135 }
3136
3137
3138 btrfs_update_delayed_refs_rsv(trans);
3139 return ret;
3140}
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
3155 u64 ram_bytes, u64 num_bytes, int delalloc)
3156{
3157 struct btrfs_space_info *space_info = cache->space_info;
3158 int ret = 0;
3159
3160 spin_lock(&space_info->lock);
3161 spin_lock(&cache->lock);
3162 if (cache->ro) {
3163 ret = -EAGAIN;
3164 } else {
3165 cache->reserved += num_bytes;
3166 space_info->bytes_reserved += num_bytes;
3167 trace_btrfs_space_reservation(cache->fs_info, "space_info",
3168 space_info->flags, num_bytes, 1);
3169 btrfs_space_info_update_bytes_may_use(cache->fs_info,
3170 space_info, -ram_bytes);
3171 if (delalloc)
3172 cache->delalloc_bytes += num_bytes;
3173
3174
3175
3176
3177
3178 if (num_bytes < ram_bytes)
3179 btrfs_try_granting_tickets(cache->fs_info, space_info);
3180 }
3181 spin_unlock(&cache->lock);
3182 spin_unlock(&space_info->lock);
3183 return ret;
3184}
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
3198 u64 num_bytes, int delalloc)
3199{
3200 struct btrfs_space_info *space_info = cache->space_info;
3201
3202 spin_lock(&space_info->lock);
3203 spin_lock(&cache->lock);
3204 if (cache->ro)
3205 space_info->bytes_readonly += num_bytes;
3206 cache->reserved -= num_bytes;
3207 space_info->bytes_reserved -= num_bytes;
3208 space_info->max_extent_size = 0;
3209
3210 if (delalloc)
3211 cache->delalloc_bytes -= num_bytes;
3212 spin_unlock(&cache->lock);
3213
3214 btrfs_try_granting_tickets(cache->fs_info, space_info);
3215 spin_unlock(&space_info->lock);
3216}
3217
3218static void force_metadata_allocation(struct btrfs_fs_info *info)
3219{
3220 struct list_head *head = &info->space_info;
3221 struct btrfs_space_info *found;
3222
3223 list_for_each_entry(found, head, list) {
3224 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3225 found->force_alloc = CHUNK_ALLOC_FORCE;
3226 }
3227}
3228
3229static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
3230 struct btrfs_space_info *sinfo, int force)
3231{
3232 u64 bytes_used = btrfs_space_info_used(sinfo, false);
3233 u64 thresh;
3234
3235 if (force == CHUNK_ALLOC_FORCE)
3236 return 1;
3237
3238
3239
3240
3241
3242 if (force == CHUNK_ALLOC_LIMITED) {
3243 thresh = btrfs_super_total_bytes(fs_info->super_copy);
3244 thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
3245
3246 if (sinfo->total_bytes - bytes_used < thresh)
3247 return 1;
3248 }
3249
3250 if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
3251 return 0;
3252 return 1;
3253}
3254
3255int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
3256{
3257 u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type);
3258
3259 return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
3260}
3261
3262static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
3263{
3264 struct btrfs_block_group *bg;
3265 int ret;
3266
3267
3268
3269
3270
3271
3272
3273 check_system_chunk(trans, flags);
3274
3275 bg = btrfs_alloc_chunk(trans, flags);
3276 if (IS_ERR(bg)) {
3277 ret = PTR_ERR(bg);
3278 goto out;
3279 }
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3298 return 0;
3299
3300 ret = btrfs_chunk_alloc_add_chunk_item(trans, bg);
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332 if (ret == -ENOSPC) {
3333 const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info);
3334 struct btrfs_block_group *sys_bg;
3335
3336 sys_bg = btrfs_alloc_chunk(trans, sys_flags);
3337 if (IS_ERR(sys_bg)) {
3338 ret = PTR_ERR(sys_bg);
3339 btrfs_abort_transaction(trans, ret);
3340 goto out;
3341 }
3342
3343 ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg);
3344 if (ret) {
3345 btrfs_abort_transaction(trans, ret);
3346 goto out;
3347 }
3348
3349 ret = btrfs_chunk_alloc_add_chunk_item(trans, bg);
3350 if (ret) {
3351 btrfs_abort_transaction(trans, ret);
3352 goto out;
3353 }
3354 } else if (ret) {
3355 btrfs_abort_transaction(trans, ret);
3356 goto out;
3357 }
3358out:
3359 btrfs_trans_release_chunk_metadata(trans);
3360
3361 return ret;
3362}
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
3464 enum btrfs_chunk_alloc_enum force)
3465{
3466 struct btrfs_fs_info *fs_info = trans->fs_info;
3467 struct btrfs_space_info *space_info;
3468 bool wait_for_alloc = false;
3469 bool should_alloc = false;
3470 int ret = 0;
3471
3472
3473 if (trans->allocating_chunk)
3474 return -ENOSPC;
3475
3476
3477
3478
3479
3480 if (trans->removing_chunk)
3481 return -ENOSPC;
3482
3483 space_info = btrfs_find_space_info(fs_info, flags);
3484 ASSERT(space_info);
3485
3486 do {
3487 spin_lock(&space_info->lock);
3488 if (force < space_info->force_alloc)
3489 force = space_info->force_alloc;
3490 should_alloc = should_alloc_chunk(fs_info, space_info, force);
3491 if (space_info->full) {
3492
3493 if (should_alloc)
3494 ret = -ENOSPC;
3495 else
3496 ret = 0;
3497 spin_unlock(&space_info->lock);
3498 return ret;
3499 } else if (!should_alloc) {
3500 spin_unlock(&space_info->lock);
3501 return 0;
3502 } else if (space_info->chunk_alloc) {
3503
3504
3505
3506
3507
3508
3509 wait_for_alloc = true;
3510 spin_unlock(&space_info->lock);
3511 mutex_lock(&fs_info->chunk_mutex);
3512 mutex_unlock(&fs_info->chunk_mutex);
3513 } else {
3514
3515 space_info->chunk_alloc = 1;
3516 wait_for_alloc = false;
3517 spin_unlock(&space_info->lock);
3518 }
3519
3520 cond_resched();
3521 } while (wait_for_alloc);
3522
3523 mutex_lock(&fs_info->chunk_mutex);
3524 trans->allocating_chunk = true;
3525
3526
3527
3528
3529
3530 if (btrfs_mixed_space_info(space_info))
3531 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
3532
3533
3534
3535
3536
3537
3538 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
3539 fs_info->data_chunk_allocations++;
3540 if (!(fs_info->data_chunk_allocations %
3541 fs_info->metadata_ratio))
3542 force_metadata_allocation(fs_info);
3543 }
3544
3545 ret = do_chunk_alloc(trans, flags);
3546 trans->allocating_chunk = false;
3547
3548 spin_lock(&space_info->lock);
3549 if (ret < 0) {
3550 if (ret == -ENOSPC)
3551 space_info->full = 1;
3552 else
3553 goto out;
3554 } else {
3555 ret = 1;
3556 space_info->max_extent_size = 0;
3557 }
3558
3559 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3560out:
3561 space_info->chunk_alloc = 0;
3562 spin_unlock(&space_info->lock);
3563 mutex_unlock(&fs_info->chunk_mutex);
3564
3565 return ret;
3566}
3567
3568static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
3569{
3570 u64 num_dev;
3571
3572 num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max;
3573 if (!num_dev)
3574 num_dev = fs_info->fs_devices->rw_devices;
3575
3576 return num_dev;
3577}
3578
3579
3580
3581
3582void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
3583{
3584 struct btrfs_fs_info *fs_info = trans->fs_info;
3585 struct btrfs_space_info *info;
3586 u64 left;
3587 u64 thresh;
3588 int ret = 0;
3589 u64 num_devs;
3590
3591
3592
3593
3594
3595 lockdep_assert_held(&fs_info->chunk_mutex);
3596
3597 info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
3598 spin_lock(&info->lock);
3599 left = info->total_bytes - btrfs_space_info_used(info, true);
3600 spin_unlock(&info->lock);
3601
3602 num_devs = get_profile_num_devs(fs_info, type);
3603
3604
3605 thresh = btrfs_calc_metadata_size(fs_info, num_devs) +
3606 btrfs_calc_insert_metadata_size(fs_info, 1);
3607
3608 if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
3609 btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
3610 left, thresh, type);
3611 btrfs_dump_space_info(fs_info, info, 0, 0);
3612 }
3613
3614 if (left < thresh) {
3615 u64 flags = btrfs_system_alloc_profile(fs_info);
3616 struct btrfs_block_group *bg;
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629 bg = btrfs_alloc_chunk(trans, flags);
3630 if (IS_ERR(bg)) {
3631 ret = PTR_ERR(bg);
3632 } else if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) {
3633
3634
3635
3636
3637
3638
3639 btrfs_chunk_alloc_add_chunk_item(trans, bg);
3640 }
3641 }
3642
3643 if (!ret) {
3644 ret = btrfs_block_rsv_add(fs_info->chunk_root,
3645 &fs_info->chunk_block_rsv,
3646 thresh, BTRFS_RESERVE_NO_FLUSH);
3647 if (!ret)
3648 trans->chunk_bytes_reserved += thresh;
3649 }
3650}
3651
3652void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
3653{
3654 struct btrfs_block_group *block_group;
3655 u64 last = 0;
3656
3657 while (1) {
3658 struct inode *inode;
3659
3660 block_group = btrfs_lookup_first_block_group(info, last);
3661 while (block_group) {
3662 btrfs_wait_block_group_cache_done(block_group);
3663 spin_lock(&block_group->lock);
3664 if (block_group->iref)
3665 break;
3666 spin_unlock(&block_group->lock);
3667 block_group = btrfs_next_block_group(block_group);
3668 }
3669 if (!block_group) {
3670 if (last == 0)
3671 break;
3672 last = 0;
3673 continue;
3674 }
3675
3676 inode = block_group->inode;
3677 block_group->iref = 0;
3678 block_group->inode = NULL;
3679 spin_unlock(&block_group->lock);
3680 ASSERT(block_group->io_ctl.inode == NULL);
3681 iput(inode);
3682 last = block_group->start + block_group->length;
3683 btrfs_put_block_group(block_group);
3684 }
3685}
3686
3687
3688
3689
3690
3691
3692int btrfs_free_block_groups(struct btrfs_fs_info *info)
3693{
3694 struct btrfs_block_group *block_group;
3695 struct btrfs_space_info *space_info;
3696 struct btrfs_caching_control *caching_ctl;
3697 struct rb_node *n;
3698
3699 spin_lock(&info->block_group_cache_lock);
3700 while (!list_empty(&info->caching_block_groups)) {
3701 caching_ctl = list_entry(info->caching_block_groups.next,
3702 struct btrfs_caching_control, list);
3703 list_del(&caching_ctl->list);
3704 btrfs_put_caching_control(caching_ctl);
3705 }
3706 spin_unlock(&info->block_group_cache_lock);
3707
3708 spin_lock(&info->unused_bgs_lock);
3709 while (!list_empty(&info->unused_bgs)) {
3710 block_group = list_first_entry(&info->unused_bgs,
3711 struct btrfs_block_group,
3712 bg_list);
3713 list_del_init(&block_group->bg_list);
3714 btrfs_put_block_group(block_group);
3715 }
3716 spin_unlock(&info->unused_bgs_lock);
3717
3718 spin_lock(&info->unused_bgs_lock);
3719 while (!list_empty(&info->reclaim_bgs)) {
3720 block_group = list_first_entry(&info->reclaim_bgs,
3721 struct btrfs_block_group,
3722 bg_list);
3723 list_del_init(&block_group->bg_list);
3724 btrfs_put_block_group(block_group);
3725 }
3726 spin_unlock(&info->unused_bgs_lock);
3727
3728 spin_lock(&info->block_group_cache_lock);
3729 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
3730 block_group = rb_entry(n, struct btrfs_block_group,
3731 cache_node);
3732 rb_erase(&block_group->cache_node,
3733 &info->block_group_cache_tree);
3734 RB_CLEAR_NODE(&block_group->cache_node);
3735 spin_unlock(&info->block_group_cache_lock);
3736
3737 down_write(&block_group->space_info->groups_sem);
3738 list_del(&block_group->list);
3739 up_write(&block_group->space_info->groups_sem);
3740
3741
3742
3743
3744
3745 if (block_group->cached == BTRFS_CACHE_NO ||
3746 block_group->cached == BTRFS_CACHE_ERROR)
3747 btrfs_free_excluded_extents(block_group);
3748
3749 btrfs_remove_free_space_cache(block_group);
3750 ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
3751 ASSERT(list_empty(&block_group->dirty_list));
3752 ASSERT(list_empty(&block_group->io_list));
3753 ASSERT(list_empty(&block_group->bg_list));
3754 ASSERT(refcount_read(&block_group->refs) == 1);
3755 ASSERT(block_group->swap_extents == 0);
3756 btrfs_put_block_group(block_group);
3757
3758 spin_lock(&info->block_group_cache_lock);
3759 }
3760 spin_unlock(&info->block_group_cache_lock);
3761
3762 btrfs_release_global_block_rsv(info);
3763
3764 while (!list_empty(&info->space_info)) {
3765 space_info = list_entry(info->space_info.next,
3766 struct btrfs_space_info,
3767 list);
3768
3769
3770
3771
3772
3773 if (WARN_ON(space_info->bytes_pinned > 0 ||
3774 space_info->bytes_reserved > 0 ||
3775 space_info->bytes_may_use > 0))
3776 btrfs_dump_space_info(info, space_info, 0, 0);
3777 WARN_ON(space_info->reclaim_size > 0);
3778 list_del(&space_info->list);
3779 btrfs_sysfs_remove_space_info(space_info);
3780 }
3781 return 0;
3782}
3783
3784void btrfs_freeze_block_group(struct btrfs_block_group *cache)
3785{
3786 atomic_inc(&cache->frozen);
3787}
3788
3789void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
3790{
3791 struct btrfs_fs_info *fs_info = block_group->fs_info;
3792 struct extent_map_tree *em_tree;
3793 struct extent_map *em;
3794 bool cleanup;
3795
3796 spin_lock(&block_group->lock);
3797 cleanup = (atomic_dec_and_test(&block_group->frozen) &&
3798 block_group->removed);
3799 spin_unlock(&block_group->lock);
3800
3801 if (cleanup) {
3802 em_tree = &fs_info->mapping_tree;
3803 write_lock(&em_tree->lock);
3804 em = lookup_extent_mapping(em_tree, block_group->start,
3805 1);
3806 BUG_ON(!em);
3807 remove_extent_mapping(em_tree, em);
3808 write_unlock(&em_tree->lock);
3809
3810
3811 free_extent_map(em);
3812 free_extent_map(em);
3813
3814
3815
3816
3817
3818
3819 __btrfs_remove_free_space_cache(block_group->free_space_ctl);
3820 }
3821}
3822
3823bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg)
3824{
3825 bool ret = true;
3826
3827 spin_lock(&bg->lock);
3828 if (bg->ro)
3829 ret = false;
3830 else
3831 bg->swap_extents++;
3832 spin_unlock(&bg->lock);
3833
3834 return ret;
3835}
3836
3837void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount)
3838{
3839 spin_lock(&bg->lock);
3840 ASSERT(!bg->ro);
3841 ASSERT(bg->swap_extents >= amount);
3842 bg->swap_extents -= amount;
3843 spin_unlock(&bg->lock);
3844}
3845