1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include "ext4_jbd2.h"
25#include "mballoc.h"
26#include <linux/debugfs.h>
27#include <linux/log2.h>
28#include <linux/slab.h>
29#include <trace/events/ext4.h>
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342static struct kmem_cache *ext4_pspace_cachep;
343static struct kmem_cache *ext4_ac_cachep;
344static struct kmem_cache *ext4_free_data_cachep;
345
346
347
348
349#define NR_GRPINFO_CACHES 8
350static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
351
352static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
353 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
354 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
355 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
356};
357
358static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
359 ext4_group_t group);
360static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
361 ext4_group_t group);
362static void ext4_free_data_callback(struct super_block *sb,
363 struct ext4_journal_cb_entry *jce, int rc);
364
365static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
366{
367#if BITS_PER_LONG == 64
368 *bit += ((unsigned long) addr & 7UL) << 3;
369 addr = (void *) ((unsigned long) addr & ~7UL);
370#elif BITS_PER_LONG == 32
371 *bit += ((unsigned long) addr & 3UL) << 3;
372 addr = (void *) ((unsigned long) addr & ~3UL);
373#else
374#error "how many bits you are?!"
375#endif
376 return addr;
377}
378
379static inline int mb_test_bit(int bit, void *addr)
380{
381
382
383
384
385 addr = mb_correct_addr_and_bit(&bit, addr);
386 return ext4_test_bit(bit, addr);
387}
388
389static inline void mb_set_bit(int bit, void *addr)
390{
391 addr = mb_correct_addr_and_bit(&bit, addr);
392 ext4_set_bit(bit, addr);
393}
394
395static inline void mb_clear_bit(int bit, void *addr)
396{
397 addr = mb_correct_addr_and_bit(&bit, addr);
398 ext4_clear_bit(bit, addr);
399}
400
401static inline int mb_find_next_zero_bit(void *addr, int max, int start)
402{
403 int fix = 0, ret, tmpmax;
404 addr = mb_correct_addr_and_bit(&fix, addr);
405 tmpmax = max + fix;
406 start += fix;
407
408 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
409 if (ret > max)
410 return max;
411 return ret;
412}
413
414static inline int mb_find_next_bit(void *addr, int max, int start)
415{
416 int fix = 0, ret, tmpmax;
417 addr = mb_correct_addr_and_bit(&fix, addr);
418 tmpmax = max + fix;
419 start += fix;
420
421 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
422 if (ret > max)
423 return max;
424 return ret;
425}
426
427static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
428{
429 char *bb;
430
431 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
432 BUG_ON(max == NULL);
433
434 if (order > e4b->bd_blkbits + 1) {
435 *max = 0;
436 return NULL;
437 }
438
439
440 if (order == 0) {
441 *max = 1 << (e4b->bd_blkbits + 3);
442 return e4b->bd_bitmap;
443 }
444
445 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
446 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
447
448 return bb;
449}
450
451#ifdef DOUBLE_CHECK
452static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
453 int first, int count)
454{
455 int i;
456 struct super_block *sb = e4b->bd_sb;
457
458 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
459 return;
460 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
461 for (i = 0; i < count; i++) {
462 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
463 ext4_fsblk_t blocknr;
464
465 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
466 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
467 ext4_grp_locked_error(sb, e4b->bd_group,
468 inode ? inode->i_ino : 0,
469 blocknr,
470 "freeing block already freed "
471 "(bit %u)",
472 first + i);
473 }
474 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
475 }
476}
477
478static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
479{
480 int i;
481
482 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
483 return;
484 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
485 for (i = 0; i < count; i++) {
486 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
487 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
488 }
489}
490
491static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
492{
493 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
494 unsigned char *b1, *b2;
495 int i;
496 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
497 b2 = (unsigned char *) bitmap;
498 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
499 if (b1[i] != b2[i]) {
500 ext4_msg(e4b->bd_sb, KERN_ERR,
501 "corruption in group %u "
502 "at byte %u(%u): %x in copy != %x "
503 "on disk/prealloc",
504 e4b->bd_group, i, i * 8, b1[i], b2[i]);
505 BUG();
506 }
507 }
508 }
509}
510
511#else
512static inline void mb_free_blocks_double(struct inode *inode,
513 struct ext4_buddy *e4b, int first, int count)
514{
515 return;
516}
517static inline void mb_mark_used_double(struct ext4_buddy *e4b,
518 int first, int count)
519{
520 return;
521}
522static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
523{
524 return;
525}
526#endif
527
528#ifdef AGGRESSIVE_CHECK
529
530#define MB_CHECK_ASSERT(assert) \
531do { \
532 if (!(assert)) { \
533 printk(KERN_EMERG \
534 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
535 function, file, line, # assert); \
536 BUG(); \
537 } \
538} while (0)
539
540static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
541 const char *function, int line)
542{
543 struct super_block *sb = e4b->bd_sb;
544 int order = e4b->bd_blkbits + 1;
545 int max;
546 int max2;
547 int i;
548 int j;
549 int k;
550 int count;
551 struct ext4_group_info *grp;
552 int fragments = 0;
553 int fstart;
554 struct list_head *cur;
555 void *buddy;
556 void *buddy2;
557
558 {
559 static int mb_check_counter;
560 if (mb_check_counter++ % 100 != 0)
561 return 0;
562 }
563
564 while (order > 1) {
565 buddy = mb_find_buddy(e4b, order, &max);
566 MB_CHECK_ASSERT(buddy);
567 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
568 MB_CHECK_ASSERT(buddy2);
569 MB_CHECK_ASSERT(buddy != buddy2);
570 MB_CHECK_ASSERT(max * 2 == max2);
571
572 count = 0;
573 for (i = 0; i < max; i++) {
574
575 if (mb_test_bit(i, buddy)) {
576
577 if (!mb_test_bit(i << 1, buddy2)) {
578 MB_CHECK_ASSERT(
579 mb_test_bit((i<<1)+1, buddy2));
580 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
581 MB_CHECK_ASSERT(
582 mb_test_bit(i << 1, buddy2));
583 }
584 continue;
585 }
586
587
588 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
589 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
590
591 for (j = 0; j < (1 << order); j++) {
592 k = (i * (1 << order)) + j;
593 MB_CHECK_ASSERT(
594 !mb_test_bit(k, e4b->bd_bitmap));
595 }
596 count++;
597 }
598 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
599 order--;
600 }
601
602 fstart = -1;
603 buddy = mb_find_buddy(e4b, 0, &max);
604 for (i = 0; i < max; i++) {
605 if (!mb_test_bit(i, buddy)) {
606 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
607 if (fstart == -1) {
608 fragments++;
609 fstart = i;
610 }
611 continue;
612 }
613 fstart = -1;
614
615 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
616 buddy2 = mb_find_buddy(e4b, j, &max2);
617 k = i >> j;
618 MB_CHECK_ASSERT(k < max2);
619 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
620 }
621 }
622 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
623 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
624
625 grp = ext4_get_group_info(sb, e4b->bd_group);
626 list_for_each(cur, &grp->bb_prealloc_list) {
627 ext4_group_t groupnr;
628 struct ext4_prealloc_space *pa;
629 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
630 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
631 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
632 for (i = 0; i < pa->pa_len; i++)
633 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
634 }
635 return 0;
636}
637#undef MB_CHECK_ASSERT
638#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
639 __FILE__, __func__, __LINE__)
640#else
641#define mb_check_buddy(e4b)
642#endif
643
644
645
646
647
648
649
650static void ext4_mb_mark_free_simple(struct super_block *sb,
651 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
652 struct ext4_group_info *grp)
653{
654 struct ext4_sb_info *sbi = EXT4_SB(sb);
655 ext4_grpblk_t min;
656 ext4_grpblk_t max;
657 ext4_grpblk_t chunk;
658 unsigned short border;
659
660 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
661
662 border = 2 << sb->s_blocksize_bits;
663
664 while (len > 0) {
665
666 max = ffs(first | border) - 1;
667
668
669 min = fls(len) - 1;
670
671 if (max < min)
672 min = max;
673 chunk = 1 << min;
674
675
676 grp->bb_counters[min]++;
677 if (min > 0)
678 mb_clear_bit(first >> min,
679 buddy + sbi->s_mb_offsets[min]);
680
681 len -= chunk;
682 first += chunk;
683 }
684}
685
686
687
688
689
690static void
691mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
692{
693 int i;
694 int bits;
695
696 grp->bb_largest_free_order = -1;
697
698 bits = sb->s_blocksize_bits + 1;
699 for (i = bits; i >= 0; i--) {
700 if (grp->bb_counters[i] > 0) {
701 grp->bb_largest_free_order = i;
702 break;
703 }
704 }
705}
706
707static noinline_for_stack
708void ext4_mb_generate_buddy(struct super_block *sb,
709 void *buddy, void *bitmap, ext4_group_t group)
710{
711 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
712 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
713 ext4_grpblk_t i = 0;
714 ext4_grpblk_t first;
715 ext4_grpblk_t len;
716 unsigned free = 0;
717 unsigned fragments = 0;
718 unsigned long long period = get_cycles();
719
720
721
722 i = mb_find_next_zero_bit(bitmap, max, 0);
723 grp->bb_first_free = i;
724 while (i < max) {
725 fragments++;
726 first = i;
727 i = mb_find_next_bit(bitmap, max, i);
728 len = i - first;
729 free += len;
730 if (len > 1)
731 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
732 else
733 grp->bb_counters[0]++;
734 if (i < max)
735 i = mb_find_next_zero_bit(bitmap, max, i);
736 }
737 grp->bb_fragments = fragments;
738
739 if (free != grp->bb_free) {
740 ext4_grp_locked_error(sb, group, 0, 0,
741 "%u clusters in bitmap, %u in gd",
742 free, grp->bb_free);
743
744
745
746
747 grp->bb_free = free;
748 }
749 mb_set_largest_free_order(sb, grp);
750
751 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
752
753 period = get_cycles() - period;
754 spin_lock(&EXT4_SB(sb)->s_bal_lock);
755 EXT4_SB(sb)->s_mb_buddies_generated++;
756 EXT4_SB(sb)->s_mb_generation_time += period;
757 spin_unlock(&EXT4_SB(sb)->s_bal_lock);
758}
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780static int ext4_mb_init_cache(struct page *page, char *incore)
781{
782 ext4_group_t ngroups;
783 int blocksize;
784 int blocks_per_page;
785 int groups_per_page;
786 int err = 0;
787 int i;
788 ext4_group_t first_group, group;
789 int first_block;
790 struct super_block *sb;
791 struct buffer_head *bhs;
792 struct buffer_head **bh = NULL;
793 struct inode *inode;
794 char *data;
795 char *bitmap;
796 struct ext4_group_info *grinfo;
797
798 mb_debug(1, "init page %lu\n", page->index);
799
800 inode = page->mapping->host;
801 sb = inode->i_sb;
802 ngroups = ext4_get_groups_count(sb);
803 blocksize = 1 << inode->i_blkbits;
804 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
805
806 groups_per_page = blocks_per_page >> 1;
807 if (groups_per_page == 0)
808 groups_per_page = 1;
809
810
811 if (groups_per_page > 1) {
812 i = sizeof(struct buffer_head *) * groups_per_page;
813 bh = kzalloc(i, GFP_NOFS);
814 if (bh == NULL) {
815 err = -ENOMEM;
816 goto out;
817 }
818 } else
819 bh = &bhs;
820
821 first_group = page->index * blocks_per_page / 2;
822
823
824 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
825 if (group >= ngroups)
826 break;
827
828 grinfo = ext4_get_group_info(sb, group);
829
830
831
832
833
834
835 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
836 bh[i] = NULL;
837 continue;
838 }
839 if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
840 err = -ENOMEM;
841 goto out;
842 }
843 mb_debug(1, "read bitmap for group %u\n", group);
844 }
845
846
847 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
848 if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
849 err = -EIO;
850 goto out;
851 }
852 }
853
854 first_block = page->index * blocks_per_page;
855 for (i = 0; i < blocks_per_page; i++) {
856 int group;
857
858 group = (first_block + i) >> 1;
859 if (group >= ngroups)
860 break;
861
862 if (!bh[group - first_group])
863
864 continue;
865
866
867
868
869
870
871
872 data = page_address(page) + (i * blocksize);
873 bitmap = bh[group - first_group]->b_data;
874
875
876
877
878
879 if ((first_block + i) & 1) {
880
881 BUG_ON(incore == NULL);
882 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
883 group, page->index, i * blocksize);
884 trace_ext4_mb_buddy_bitmap_load(sb, group);
885 grinfo = ext4_get_group_info(sb, group);
886 grinfo->bb_fragments = 0;
887 memset(grinfo->bb_counters, 0,
888 sizeof(*grinfo->bb_counters) *
889 (sb->s_blocksize_bits+2));
890
891
892
893 ext4_lock_group(sb, group);
894
895 memset(data, 0xff, blocksize);
896 ext4_mb_generate_buddy(sb, data, incore, group);
897 ext4_unlock_group(sb, group);
898 incore = NULL;
899 } else {
900
901 BUG_ON(incore != NULL);
902 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
903 group, page->index, i * blocksize);
904 trace_ext4_mb_bitmap_load(sb, group);
905
906
907 ext4_lock_group(sb, group);
908 memcpy(data, bitmap, blocksize);
909
910
911 ext4_mb_generate_from_pa(sb, data, group);
912 ext4_mb_generate_from_freelist(sb, data, group);
913 ext4_unlock_group(sb, group);
914
915
916
917
918 incore = data;
919 }
920 }
921 SetPageUptodate(page);
922
923out:
924 if (bh) {
925 for (i = 0; i < groups_per_page; i++)
926 brelse(bh[i]);
927 if (bh != &bhs)
928 kfree(bh);
929 }
930 return err;
931}
932
933
934
935
936
937
938
939static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
940 ext4_group_t group, struct ext4_buddy *e4b)
941{
942 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
943 int block, pnum, poff;
944 int blocks_per_page;
945 struct page *page;
946
947 e4b->bd_buddy_page = NULL;
948 e4b->bd_bitmap_page = NULL;
949
950 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
951
952
953
954
955
956 block = group * 2;
957 pnum = block / blocks_per_page;
958 poff = block % blocks_per_page;
959 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
960 if (!page)
961 return -EIO;
962 BUG_ON(page->mapping != inode->i_mapping);
963 e4b->bd_bitmap_page = page;
964 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
965
966 if (blocks_per_page >= 2) {
967
968 return 0;
969 }
970
971 block++;
972 pnum = block / blocks_per_page;
973 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
974 if (!page)
975 return -EIO;
976 BUG_ON(page->mapping != inode->i_mapping);
977 e4b->bd_buddy_page = page;
978 return 0;
979}
980
981static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
982{
983 if (e4b->bd_bitmap_page) {
984 unlock_page(e4b->bd_bitmap_page);
985 page_cache_release(e4b->bd_bitmap_page);
986 }
987 if (e4b->bd_buddy_page) {
988 unlock_page(e4b->bd_buddy_page);
989 page_cache_release(e4b->bd_buddy_page);
990 }
991}
992
993
994
995
996
997
998static noinline_for_stack
999int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1000{
1001
1002 struct ext4_group_info *this_grp;
1003 struct ext4_buddy e4b;
1004 struct page *page;
1005 int ret = 0;
1006
1007 mb_debug(1, "init group %u\n", group);
1008 this_grp = ext4_get_group_info(sb, group);
1009
1010
1011
1012
1013
1014
1015
1016 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
1017 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1018
1019
1020
1021
1022 goto err;
1023 }
1024
1025 page = e4b.bd_bitmap_page;
1026 ret = ext4_mb_init_cache(page, NULL);
1027 if (ret)
1028 goto err;
1029 if (!PageUptodate(page)) {
1030 ret = -EIO;
1031 goto err;
1032 }
1033 mark_page_accessed(page);
1034
1035 if (e4b.bd_buddy_page == NULL) {
1036
1037
1038
1039
1040
1041 ret = 0;
1042 goto err;
1043 }
1044
1045 page = e4b.bd_buddy_page;
1046 ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
1047 if (ret)
1048 goto err;
1049 if (!PageUptodate(page)) {
1050 ret = -EIO;
1051 goto err;
1052 }
1053 mark_page_accessed(page);
1054err:
1055 ext4_mb_put_buddy_page_lock(&e4b);
1056 return ret;
1057}
1058
1059
1060
1061
1062
1063
1064static noinline_for_stack int
1065ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1066 struct ext4_buddy *e4b)
1067{
1068 int blocks_per_page;
1069 int block;
1070 int pnum;
1071 int poff;
1072 struct page *page;
1073 int ret;
1074 struct ext4_group_info *grp;
1075 struct ext4_sb_info *sbi = EXT4_SB(sb);
1076 struct inode *inode = sbi->s_buddy_cache;
1077
1078 mb_debug(1, "load group %u\n", group);
1079
1080 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1081 grp = ext4_get_group_info(sb, group);
1082
1083 e4b->bd_blkbits = sb->s_blocksize_bits;
1084 e4b->bd_info = grp;
1085 e4b->bd_sb = sb;
1086 e4b->bd_group = group;
1087 e4b->bd_buddy_page = NULL;
1088 e4b->bd_bitmap_page = NULL;
1089
1090 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1091
1092
1093
1094
1095 ret = ext4_mb_init_group(sb, group);
1096 if (ret)
1097 return ret;
1098 }
1099
1100
1101
1102
1103
1104
1105 block = group * 2;
1106 pnum = block / blocks_per_page;
1107 poff = block % blocks_per_page;
1108
1109
1110
1111 page = find_get_page(inode->i_mapping, pnum);
1112 if (page == NULL || !PageUptodate(page)) {
1113 if (page)
1114
1115
1116
1117
1118
1119
1120
1121
1122 page_cache_release(page);
1123 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1124 if (page) {
1125 BUG_ON(page->mapping != inode->i_mapping);
1126 if (!PageUptodate(page)) {
1127 ret = ext4_mb_init_cache(page, NULL);
1128 if (ret) {
1129 unlock_page(page);
1130 goto err;
1131 }
1132 mb_cmp_bitmaps(e4b, page_address(page) +
1133 (poff * sb->s_blocksize));
1134 }
1135 unlock_page(page);
1136 }
1137 }
1138 if (page == NULL || !PageUptodate(page)) {
1139 ret = -EIO;
1140 goto err;
1141 }
1142 e4b->bd_bitmap_page = page;
1143 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1144 mark_page_accessed(page);
1145
1146 block++;
1147 pnum = block / blocks_per_page;
1148 poff = block % blocks_per_page;
1149
1150 page = find_get_page(inode->i_mapping, pnum);
1151 if (page == NULL || !PageUptodate(page)) {
1152 if (page)
1153 page_cache_release(page);
1154 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1155 if (page) {
1156 BUG_ON(page->mapping != inode->i_mapping);
1157 if (!PageUptodate(page)) {
1158 ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
1159 if (ret) {
1160 unlock_page(page);
1161 goto err;
1162 }
1163 }
1164 unlock_page(page);
1165 }
1166 }
1167 if (page == NULL || !PageUptodate(page)) {
1168 ret = -EIO;
1169 goto err;
1170 }
1171 e4b->bd_buddy_page = page;
1172 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1173 mark_page_accessed(page);
1174
1175 BUG_ON(e4b->bd_bitmap_page == NULL);
1176 BUG_ON(e4b->bd_buddy_page == NULL);
1177
1178 return 0;
1179
1180err:
1181 if (page)
1182 page_cache_release(page);
1183 if (e4b->bd_bitmap_page)
1184 page_cache_release(e4b->bd_bitmap_page);
1185 if (e4b->bd_buddy_page)
1186 page_cache_release(e4b->bd_buddy_page);
1187 e4b->bd_buddy = NULL;
1188 e4b->bd_bitmap = NULL;
1189 return ret;
1190}
1191
1192static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1193{
1194 if (e4b->bd_bitmap_page)
1195 page_cache_release(e4b->bd_bitmap_page);
1196 if (e4b->bd_buddy_page)
1197 page_cache_release(e4b->bd_buddy_page);
1198}
1199
1200
1201static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1202{
1203 int order = 1;
1204 void *bb;
1205
1206 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1207 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1208
1209 bb = e4b->bd_buddy;
1210 while (order <= e4b->bd_blkbits + 1) {
1211 block = block >> 1;
1212 if (!mb_test_bit(block, bb)) {
1213
1214 return order;
1215 }
1216 bb += 1 << (e4b->bd_blkbits - order);
1217 order++;
1218 }
1219 return 0;
1220}
1221
1222static void mb_clear_bits(void *bm, int cur, int len)
1223{
1224 __u32 *addr;
1225
1226 len = cur + len;
1227 while (cur < len) {
1228 if ((cur & 31) == 0 && (len - cur) >= 32) {
1229
1230 addr = bm + (cur >> 3);
1231 *addr = 0;
1232 cur += 32;
1233 continue;
1234 }
1235 mb_clear_bit(cur, bm);
1236 cur++;
1237 }
1238}
1239
1240void ext4_set_bits(void *bm, int cur, int len)
1241{
1242 __u32 *addr;
1243
1244 len = cur + len;
1245 while (cur < len) {
1246 if ((cur & 31) == 0 && (len - cur) >= 32) {
1247
1248 addr = bm + (cur >> 3);
1249 *addr = 0xffffffff;
1250 cur += 32;
1251 continue;
1252 }
1253 mb_set_bit(cur, bm);
1254 cur++;
1255 }
1256}
1257
1258static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1259 int first, int count)
1260{
1261 int block = 0;
1262 int max = 0;
1263 int order;
1264 void *buddy;
1265 void *buddy2;
1266 struct super_block *sb = e4b->bd_sb;
1267
1268 BUG_ON(first + count > (sb->s_blocksize << 3));
1269 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1270 mb_check_buddy(e4b);
1271 mb_free_blocks_double(inode, e4b, first, count);
1272
1273 e4b->bd_info->bb_free += count;
1274 if (first < e4b->bd_info->bb_first_free)
1275 e4b->bd_info->bb_first_free = first;
1276
1277
1278 if (first != 0)
1279 block = !mb_test_bit(first - 1, e4b->bd_bitmap);
1280 if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
1281 max = !mb_test_bit(first + count, e4b->bd_bitmap);
1282 if (block && max)
1283 e4b->bd_info->bb_fragments--;
1284 else if (!block && !max)
1285 e4b->bd_info->bb_fragments++;
1286
1287
1288 while (count-- > 0) {
1289 block = first++;
1290 order = 0;
1291
1292 if (!mb_test_bit(block, e4b->bd_bitmap)) {
1293 ext4_fsblk_t blocknr;
1294
1295 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1296 blocknr += EXT4_C2B(EXT4_SB(sb), block);
1297 ext4_grp_locked_error(sb, e4b->bd_group,
1298 inode ? inode->i_ino : 0,
1299 blocknr,
1300 "freeing already freed block "
1301 "(bit %u)", block);
1302 }
1303 mb_clear_bit(block, e4b->bd_bitmap);
1304 e4b->bd_info->bb_counters[order]++;
1305
1306
1307 buddy = mb_find_buddy(e4b, order, &max);
1308
1309 do {
1310 block &= ~1UL;
1311 if (mb_test_bit(block, buddy) ||
1312 mb_test_bit(block + 1, buddy))
1313 break;
1314
1315
1316 buddy2 = mb_find_buddy(e4b, order + 1, &max);
1317
1318 if (!buddy2)
1319 break;
1320
1321 if (order > 0) {
1322
1323
1324 mb_set_bit(block, buddy);
1325 mb_set_bit(block + 1, buddy);
1326 }
1327 e4b->bd_info->bb_counters[order]--;
1328 e4b->bd_info->bb_counters[order]--;
1329
1330 block = block >> 1;
1331 order++;
1332 e4b->bd_info->bb_counters[order]++;
1333
1334 mb_clear_bit(block, buddy2);
1335 buddy = buddy2;
1336 } while (1);
1337 }
1338 mb_set_largest_free_order(sb, e4b->bd_info);
1339 mb_check_buddy(e4b);
1340}
1341
1342static int mb_find_extent(struct ext4_buddy *e4b, int block,
1343 int needed, struct ext4_free_extent *ex)
1344{
1345 int next = block;
1346 int max, order;
1347 void *buddy;
1348
1349 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1350 BUG_ON(ex == NULL);
1351
1352 buddy = mb_find_buddy(e4b, 0, &max);
1353 BUG_ON(buddy == NULL);
1354 BUG_ON(block >= max);
1355 if (mb_test_bit(block, buddy)) {
1356 ex->fe_len = 0;
1357 ex->fe_start = 0;
1358 ex->fe_group = 0;
1359 return 0;
1360 }
1361
1362
1363 order = mb_find_order_for_block(e4b, block);
1364 block = block >> order;
1365
1366 ex->fe_len = 1 << order;
1367 ex->fe_start = block << order;
1368 ex->fe_group = e4b->bd_group;
1369
1370
1371 next = next - ex->fe_start;
1372 ex->fe_len -= next;
1373 ex->fe_start += next;
1374
1375 while (needed > ex->fe_len &&
1376 (buddy = mb_find_buddy(e4b, order, &max))) {
1377
1378 if (block + 1 >= max)
1379 break;
1380
1381 next = (block + 1) * (1 << order);
1382 if (mb_test_bit(next, e4b->bd_bitmap))
1383 break;
1384
1385 order = mb_find_order_for_block(e4b, next);
1386
1387 block = next >> order;
1388 ex->fe_len += 1 << order;
1389 }
1390
1391 BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
1392 return ex->fe_len;
1393}
1394
1395static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1396{
1397 int ord;
1398 int mlen = 0;
1399 int max = 0;
1400 int cur;
1401 int start = ex->fe_start;
1402 int len = ex->fe_len;
1403 unsigned ret = 0;
1404 int len0 = len;
1405 void *buddy;
1406
1407 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1408 BUG_ON(e4b->bd_group != ex->fe_group);
1409 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1410 mb_check_buddy(e4b);
1411 mb_mark_used_double(e4b, start, len);
1412
1413 e4b->bd_info->bb_free -= len;
1414 if (e4b->bd_info->bb_first_free == start)
1415 e4b->bd_info->bb_first_free += len;
1416
1417
1418 if (start != 0)
1419 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1420 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1421 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1422 if (mlen && max)
1423 e4b->bd_info->bb_fragments++;
1424 else if (!mlen && !max)
1425 e4b->bd_info->bb_fragments--;
1426
1427
1428 while (len) {
1429 ord = mb_find_order_for_block(e4b, start);
1430
1431 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1432
1433 mlen = 1 << ord;
1434 buddy = mb_find_buddy(e4b, ord, &max);
1435 BUG_ON((start >> ord) >= max);
1436 mb_set_bit(start >> ord, buddy);
1437 e4b->bd_info->bb_counters[ord]--;
1438 start += mlen;
1439 len -= mlen;
1440 BUG_ON(len < 0);
1441 continue;
1442 }
1443
1444
1445 if (ret == 0)
1446 ret = len | (ord << 16);
1447
1448
1449 BUG_ON(ord <= 0);
1450 buddy = mb_find_buddy(e4b, ord, &max);
1451 mb_set_bit(start >> ord, buddy);
1452 e4b->bd_info->bb_counters[ord]--;
1453
1454 ord--;
1455 cur = (start >> ord) & ~1U;
1456 buddy = mb_find_buddy(e4b, ord, &max);
1457 mb_clear_bit(cur, buddy);
1458 mb_clear_bit(cur + 1, buddy);
1459 e4b->bd_info->bb_counters[ord]++;
1460 e4b->bd_info->bb_counters[ord]++;
1461 }
1462 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1463
1464 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1465 mb_check_buddy(e4b);
1466
1467 return ret;
1468}
1469
1470
1471
1472
1473static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1474 struct ext4_buddy *e4b)
1475{
1476 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1477 int ret;
1478
1479 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1480 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1481
1482 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1483 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1484 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1485
1486
1487
1488 ac->ac_f_ex = ac->ac_b_ex;
1489
1490 ac->ac_status = AC_STATUS_FOUND;
1491 ac->ac_tail = ret & 0xffff;
1492 ac->ac_buddy = ret >> 16;
1493
1494
1495
1496
1497
1498
1499
1500
1501 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1502 get_page(ac->ac_bitmap_page);
1503 ac->ac_buddy_page = e4b->bd_buddy_page;
1504 get_page(ac->ac_buddy_page);
1505
1506 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1507 spin_lock(&sbi->s_md_lock);
1508 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1509 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1510 spin_unlock(&sbi->s_md_lock);
1511 }
1512}
1513
1514
1515
1516
1517
1518static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1519 struct ext4_buddy *e4b,
1520 int finish_group)
1521{
1522 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1523 struct ext4_free_extent *bex = &ac->ac_b_ex;
1524 struct ext4_free_extent *gex = &ac->ac_g_ex;
1525 struct ext4_free_extent ex;
1526 int max;
1527
1528 if (ac->ac_status == AC_STATUS_FOUND)
1529 return;
1530
1531
1532
1533 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1534 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1535 ac->ac_status = AC_STATUS_BREAK;
1536 return;
1537 }
1538
1539
1540
1541
1542 if (bex->fe_len < gex->fe_len)
1543 return;
1544
1545 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1546 && bex->fe_group == e4b->bd_group) {
1547
1548
1549
1550 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
1551 if (max >= gex->fe_len) {
1552 ext4_mb_use_best_found(ac, e4b);
1553 return;
1554 }
1555 }
1556}
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1569 struct ext4_free_extent *ex,
1570 struct ext4_buddy *e4b)
1571{
1572 struct ext4_free_extent *bex = &ac->ac_b_ex;
1573 struct ext4_free_extent *gex = &ac->ac_g_ex;
1574
1575 BUG_ON(ex->fe_len <= 0);
1576 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1577 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1578 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1579
1580 ac->ac_found++;
1581
1582
1583
1584
1585 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1586 *bex = *ex;
1587 ext4_mb_use_best_found(ac, e4b);
1588 return;
1589 }
1590
1591
1592
1593
1594 if (ex->fe_len == gex->fe_len) {
1595 *bex = *ex;
1596 ext4_mb_use_best_found(ac, e4b);
1597 return;
1598 }
1599
1600
1601
1602
1603 if (bex->fe_len == 0) {
1604 *bex = *ex;
1605 return;
1606 }
1607
1608
1609
1610
1611 if (bex->fe_len < gex->fe_len) {
1612
1613
1614 if (ex->fe_len > bex->fe_len)
1615 *bex = *ex;
1616 } else if (ex->fe_len > gex->fe_len) {
1617
1618
1619
1620 if (ex->fe_len < bex->fe_len)
1621 *bex = *ex;
1622 }
1623
1624 ext4_mb_check_limits(ac, e4b, 0);
1625}
1626
1627static noinline_for_stack
1628int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1629 struct ext4_buddy *e4b)
1630{
1631 struct ext4_free_extent ex = ac->ac_b_ex;
1632 ext4_group_t group = ex.fe_group;
1633 int max;
1634 int err;
1635
1636 BUG_ON(ex.fe_len <= 0);
1637 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1638 if (err)
1639 return err;
1640
1641 ext4_lock_group(ac->ac_sb, group);
1642 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
1643
1644 if (max > 0) {
1645 ac->ac_b_ex = ex;
1646 ext4_mb_use_best_found(ac, e4b);
1647 }
1648
1649 ext4_unlock_group(ac->ac_sb, group);
1650 ext4_mb_unload_buddy(e4b);
1651
1652 return 0;
1653}
1654
1655static noinline_for_stack
1656int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1657 struct ext4_buddy *e4b)
1658{
1659 ext4_group_t group = ac->ac_g_ex.fe_group;
1660 int max;
1661 int err;
1662 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1663 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1664 struct ext4_free_extent ex;
1665
1666 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1667 return 0;
1668 if (grp->bb_free == 0)
1669 return 0;
1670
1671 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1672 if (err)
1673 return err;
1674
1675 ext4_lock_group(ac->ac_sb, group);
1676 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1677 ac->ac_g_ex.fe_len, &ex);
1678
1679 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1680 ext4_fsblk_t start;
1681
1682 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1683 ex.fe_start;
1684
1685 if (do_div(start, sbi->s_stripe) == 0) {
1686 ac->ac_found++;
1687 ac->ac_b_ex = ex;
1688 ext4_mb_use_best_found(ac, e4b);
1689 }
1690 } else if (max >= ac->ac_g_ex.fe_len) {
1691 BUG_ON(ex.fe_len <= 0);
1692 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1693 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1694 ac->ac_found++;
1695 ac->ac_b_ex = ex;
1696 ext4_mb_use_best_found(ac, e4b);
1697 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1698
1699
1700 BUG_ON(ex.fe_len <= 0);
1701 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1702 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1703 ac->ac_found++;
1704 ac->ac_b_ex = ex;
1705 ext4_mb_use_best_found(ac, e4b);
1706 }
1707 ext4_unlock_group(ac->ac_sb, group);
1708 ext4_mb_unload_buddy(e4b);
1709
1710 return 0;
1711}
1712
1713
1714
1715
1716
1717static noinline_for_stack
1718void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1719 struct ext4_buddy *e4b)
1720{
1721 struct super_block *sb = ac->ac_sb;
1722 struct ext4_group_info *grp = e4b->bd_info;
1723 void *buddy;
1724 int i;
1725 int k;
1726 int max;
1727
1728 BUG_ON(ac->ac_2order <= 0);
1729 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1730 if (grp->bb_counters[i] == 0)
1731 continue;
1732
1733 buddy = mb_find_buddy(e4b, i, &max);
1734 BUG_ON(buddy == NULL);
1735
1736 k = mb_find_next_zero_bit(buddy, max, 0);
1737 BUG_ON(k >= max);
1738
1739 ac->ac_found++;
1740
1741 ac->ac_b_ex.fe_len = 1 << i;
1742 ac->ac_b_ex.fe_start = k << i;
1743 ac->ac_b_ex.fe_group = e4b->bd_group;
1744
1745 ext4_mb_use_best_found(ac, e4b);
1746
1747 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1748
1749 if (EXT4_SB(sb)->s_mb_stats)
1750 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1751
1752 break;
1753 }
1754}
1755
1756
1757
1758
1759
1760
1761static noinline_for_stack
1762void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1763 struct ext4_buddy *e4b)
1764{
1765 struct super_block *sb = ac->ac_sb;
1766 void *bitmap = e4b->bd_bitmap;
1767 struct ext4_free_extent ex;
1768 int i;
1769 int free;
1770
1771 free = e4b->bd_info->bb_free;
1772 BUG_ON(free <= 0);
1773
1774 i = e4b->bd_info->bb_first_free;
1775
1776 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1777 i = mb_find_next_zero_bit(bitmap,
1778 EXT4_CLUSTERS_PER_GROUP(sb), i);
1779 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1780
1781
1782
1783
1784
1785 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1786 "%d free clusters as per "
1787 "group info. But bitmap says 0",
1788 free);
1789 break;
1790 }
1791
1792 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
1793 BUG_ON(ex.fe_len <= 0);
1794 if (free < ex.fe_len) {
1795 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1796 "%d free clusters as per "
1797 "group info. But got %d blocks",
1798 free, ex.fe_len);
1799
1800
1801
1802
1803
1804 break;
1805 }
1806
1807 ext4_mb_measure_extent(ac, &ex, e4b);
1808
1809 i += ex.fe_len;
1810 free -= ex.fe_len;
1811 }
1812
1813 ext4_mb_check_limits(ac, e4b, 1);
1814}
1815
1816
1817
1818
1819
1820static noinline_for_stack
1821void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1822 struct ext4_buddy *e4b)
1823{
1824 struct super_block *sb = ac->ac_sb;
1825 struct ext4_sb_info *sbi = EXT4_SB(sb);
1826 void *bitmap = e4b->bd_bitmap;
1827 struct ext4_free_extent ex;
1828 ext4_fsblk_t first_group_block;
1829 ext4_fsblk_t a;
1830 ext4_grpblk_t i;
1831 int max;
1832
1833 BUG_ON(sbi->s_stripe == 0);
1834
1835
1836 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
1837
1838 a = first_group_block + sbi->s_stripe - 1;
1839 do_div(a, sbi->s_stripe);
1840 i = (a * sbi->s_stripe) - first_group_block;
1841
1842 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
1843 if (!mb_test_bit(i, bitmap)) {
1844 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
1845 if (max >= sbi->s_stripe) {
1846 ac->ac_found++;
1847 ac->ac_b_ex = ex;
1848 ext4_mb_use_best_found(ac, e4b);
1849 break;
1850 }
1851 }
1852 i += sbi->s_stripe;
1853 }
1854}
1855
1856
1857static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1858 ext4_group_t group, int cr)
1859{
1860 unsigned free, fragments;
1861 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1862 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1863
1864 BUG_ON(cr < 0 || cr >= 4);
1865
1866 free = grp->bb_free;
1867 if (free == 0)
1868 return 0;
1869 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
1870 return 0;
1871
1872
1873 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1874 int ret = ext4_mb_init_group(ac->ac_sb, group);
1875 if (ret)
1876 return 0;
1877 }
1878
1879 fragments = grp->bb_fragments;
1880 if (fragments == 0)
1881 return 0;
1882
1883 switch (cr) {
1884 case 0:
1885 BUG_ON(ac->ac_2order == 0);
1886
1887 if (grp->bb_largest_free_order < ac->ac_2order)
1888 return 0;
1889
1890
1891 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
1892 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
1893 ((group % flex_size) == 0))
1894 return 0;
1895
1896 return 1;
1897 case 1:
1898 if ((free / fragments) >= ac->ac_g_ex.fe_len)
1899 return 1;
1900 break;
1901 case 2:
1902 if (free >= ac->ac_g_ex.fe_len)
1903 return 1;
1904 break;
1905 case 3:
1906 return 1;
1907 default:
1908 BUG();
1909 }
1910
1911 return 0;
1912}
1913
1914static noinline_for_stack int
1915ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1916{
1917 ext4_group_t ngroups, group, i;
1918 int cr;
1919 int err = 0;
1920 struct ext4_sb_info *sbi;
1921 struct super_block *sb;
1922 struct ext4_buddy e4b;
1923
1924 sb = ac->ac_sb;
1925 sbi = EXT4_SB(sb);
1926 ngroups = ext4_get_groups_count(sb);
1927
1928 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
1929 ngroups = sbi->s_blockfile_groups;
1930
1931 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1932
1933
1934 err = ext4_mb_find_by_goal(ac, &e4b);
1935 if (err || ac->ac_status == AC_STATUS_FOUND)
1936 goto out;
1937
1938 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
1939 goto out;
1940
1941
1942
1943
1944
1945
1946 i = fls(ac->ac_g_ex.fe_len);
1947 ac->ac_2order = 0;
1948
1949
1950
1951
1952
1953 if (i >= sbi->s_mb_order2_reqs) {
1954
1955
1956
1957 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
1958 ac->ac_2order = i - 1;
1959 }
1960
1961
1962 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1963
1964 spin_lock(&sbi->s_md_lock);
1965 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
1966 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
1967 spin_unlock(&sbi->s_md_lock);
1968 }
1969
1970
1971 cr = ac->ac_2order ? 0 : 1;
1972
1973
1974
1975
1976repeat:
1977 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
1978 ac->ac_criteria = cr;
1979
1980
1981
1982
1983 group = ac->ac_g_ex.fe_group;
1984
1985 for (i = 0; i < ngroups; group++, i++) {
1986 if (group == ngroups)
1987 group = 0;
1988
1989
1990 if (!ext4_mb_good_group(ac, group, cr))
1991 continue;
1992
1993 err = ext4_mb_load_buddy(sb, group, &e4b);
1994 if (err)
1995 goto out;
1996
1997 ext4_lock_group(sb, group);
1998
1999
2000
2001
2002
2003 if (!ext4_mb_good_group(ac, group, cr)) {
2004 ext4_unlock_group(sb, group);
2005 ext4_mb_unload_buddy(&e4b);
2006 continue;
2007 }
2008
2009 ac->ac_groups_scanned++;
2010 if (cr == 0)
2011 ext4_mb_simple_scan_group(ac, &e4b);
2012 else if (cr == 1 && sbi->s_stripe &&
2013 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2014 ext4_mb_scan_aligned(ac, &e4b);
2015 else
2016 ext4_mb_complex_scan_group(ac, &e4b);
2017
2018 ext4_unlock_group(sb, group);
2019 ext4_mb_unload_buddy(&e4b);
2020
2021 if (ac->ac_status != AC_STATUS_CONTINUE)
2022 break;
2023 }
2024 }
2025
2026 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2027 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2028
2029
2030
2031
2032
2033 ext4_mb_try_best_found(ac, &e4b);
2034 if (ac->ac_status != AC_STATUS_FOUND) {
2035
2036
2037
2038
2039
2040
2041 ac->ac_b_ex.fe_group = 0;
2042 ac->ac_b_ex.fe_start = 0;
2043 ac->ac_b_ex.fe_len = 0;
2044 ac->ac_status = AC_STATUS_CONTINUE;
2045 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2046 cr = 3;
2047 atomic_inc(&sbi->s_mb_lost_chunks);
2048 goto repeat;
2049 }
2050 }
2051out:
2052 return err;
2053}
2054
2055static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2056{
2057 struct super_block *sb = seq->private;
2058 ext4_group_t group;
2059
2060 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2061 return NULL;
2062 group = *pos + 1;
2063 return (void *) ((unsigned long) group);
2064}
2065
2066static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2067{
2068 struct super_block *sb = seq->private;
2069 ext4_group_t group;
2070
2071 ++*pos;
2072 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2073 return NULL;
2074 group = *pos + 1;
2075 return (void *) ((unsigned long) group);
2076}
2077
2078static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2079{
2080 struct super_block *sb = seq->private;
2081 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2082 int i;
2083 int err, buddy_loaded = 0;
2084 struct ext4_buddy e4b;
2085 struct ext4_group_info *grinfo;
2086 struct sg {
2087 struct ext4_group_info info;
2088 ext4_grpblk_t counters[16];
2089 } sg;
2090
2091 group--;
2092 if (group == 0)
2093 seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
2094 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
2095 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
2096 "group", "free", "frags", "first",
2097 "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
2098 "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
2099
2100 i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2101 sizeof(struct ext4_group_info);
2102 grinfo = ext4_get_group_info(sb, group);
2103
2104 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2105 err = ext4_mb_load_buddy(sb, group, &e4b);
2106 if (err) {
2107 seq_printf(seq, "#%-5u: I/O error\n", group);
2108 return 0;
2109 }
2110 buddy_loaded = 1;
2111 }
2112
2113 memcpy(&sg, ext4_get_group_info(sb, group), i);
2114
2115 if (buddy_loaded)
2116 ext4_mb_unload_buddy(&e4b);
2117
2118 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2119 sg.info.bb_fragments, sg.info.bb_first_free);
2120 for (i = 0; i <= 13; i++)
2121 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
2122 sg.info.bb_counters[i] : 0);
2123 seq_printf(seq, " ]\n");
2124
2125 return 0;
2126}
2127
2128static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2129{
2130}
2131
2132static const struct seq_operations ext4_mb_seq_groups_ops = {
2133 .start = ext4_mb_seq_groups_start,
2134 .next = ext4_mb_seq_groups_next,
2135 .stop = ext4_mb_seq_groups_stop,
2136 .show = ext4_mb_seq_groups_show,
2137};
2138
2139static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2140{
2141 struct super_block *sb = PDE(inode)->data;
2142 int rc;
2143
2144 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2145 if (rc == 0) {
2146 struct seq_file *m = file->private_data;
2147 m->private = sb;
2148 }
2149 return rc;
2150
2151}
2152
2153static const struct file_operations ext4_mb_seq_groups_fops = {
2154 .owner = THIS_MODULE,
2155 .open = ext4_mb_seq_groups_open,
2156 .read = seq_read,
2157 .llseek = seq_lseek,
2158 .release = seq_release,
2159};
2160
2161static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2162{
2163 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2164 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2165
2166 BUG_ON(!cachep);
2167 return cachep;
2168}
2169
2170
2171
2172
2173
2174int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2175{
2176 struct ext4_sb_info *sbi = EXT4_SB(sb);
2177 unsigned size;
2178 struct ext4_group_info ***new_groupinfo;
2179
2180 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2181 EXT4_DESC_PER_BLOCK_BITS(sb);
2182 if (size <= sbi->s_group_info_size)
2183 return 0;
2184
2185 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2186 new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
2187 if (!new_groupinfo) {
2188 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2189 return -ENOMEM;
2190 }
2191 if (sbi->s_group_info) {
2192 memcpy(new_groupinfo, sbi->s_group_info,
2193 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2194 ext4_kvfree(sbi->s_group_info);
2195 }
2196 sbi->s_group_info = new_groupinfo;
2197 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2198 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2199 sbi->s_group_info_size);
2200 return 0;
2201}
2202
2203
2204int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2205 struct ext4_group_desc *desc)
2206{
2207 int i;
2208 int metalen = 0;
2209 struct ext4_sb_info *sbi = EXT4_SB(sb);
2210 struct ext4_group_info **meta_group_info;
2211 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2212
2213
2214
2215
2216
2217
2218 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2219 metalen = sizeof(*meta_group_info) <<
2220 EXT4_DESC_PER_BLOCK_BITS(sb);
2221 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2222 if (meta_group_info == NULL) {
2223 ext4_msg(sb, KERN_ERR, "can't allocate mem "
2224 "for a buddy group");
2225 goto exit_meta_group_info;
2226 }
2227 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2228 meta_group_info;
2229 }
2230
2231 meta_group_info =
2232 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2233 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2234
2235 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
2236 if (meta_group_info[i] == NULL) {
2237 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2238 goto exit_group_info;
2239 }
2240 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2241 &(meta_group_info[i]->bb_state));
2242
2243
2244
2245
2246
2247 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2248 meta_group_info[i]->bb_free =
2249 ext4_free_clusters_after_init(sb, group, desc);
2250 } else {
2251 meta_group_info[i]->bb_free =
2252 ext4_free_group_clusters(sb, desc);
2253 }
2254
2255 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2256 init_rwsem(&meta_group_info[i]->alloc_sem);
2257 meta_group_info[i]->bb_free_root = RB_ROOT;
2258 meta_group_info[i]->bb_largest_free_order = -1;
2259
2260#ifdef DOUBLE_CHECK
2261 {
2262 struct buffer_head *bh;
2263 meta_group_info[i]->bb_bitmap =
2264 kmalloc(sb->s_blocksize, GFP_KERNEL);
2265 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2266 bh = ext4_read_block_bitmap(sb, group);
2267 BUG_ON(bh == NULL);
2268 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2269 sb->s_blocksize);
2270 put_bh(bh);
2271 }
2272#endif
2273
2274 return 0;
2275
2276exit_group_info:
2277
2278 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2279 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2280 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
2281 }
2282exit_meta_group_info:
2283 return -ENOMEM;
2284}
2285
2286static int ext4_mb_init_backend(struct super_block *sb)
2287{
2288 ext4_group_t ngroups = ext4_get_groups_count(sb);
2289 ext4_group_t i;
2290 struct ext4_sb_info *sbi = EXT4_SB(sb);
2291 int err;
2292 struct ext4_group_desc *desc;
2293 struct kmem_cache *cachep;
2294
2295 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2296 if (err)
2297 return err;
2298
2299 sbi->s_buddy_cache = new_inode(sb);
2300 if (sbi->s_buddy_cache == NULL) {
2301 ext4_msg(sb, KERN_ERR, "can't get new inode");
2302 goto err_freesgi;
2303 }
2304
2305
2306
2307
2308 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
2309 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2310 for (i = 0; i < ngroups; i++) {
2311 desc = ext4_get_group_desc(sb, i, NULL);
2312 if (desc == NULL) {
2313 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
2314 goto err_freebuddy;
2315 }
2316 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2317 goto err_freebuddy;
2318 }
2319
2320 return 0;
2321
2322err_freebuddy:
2323 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2324 while (i-- > 0)
2325 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2326 i = sbi->s_group_info_size;
2327 while (i-- > 0)
2328 kfree(sbi->s_group_info[i]);
2329 iput(sbi->s_buddy_cache);
2330err_freesgi:
2331 ext4_kvfree(sbi->s_group_info);
2332 return -ENOMEM;
2333}
2334
2335static void ext4_groupinfo_destroy_slabs(void)
2336{
2337 int i;
2338
2339 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2340 if (ext4_groupinfo_caches[i])
2341 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2342 ext4_groupinfo_caches[i] = NULL;
2343 }
2344}
2345
2346static int ext4_groupinfo_create_slab(size_t size)
2347{
2348 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2349 int slab_size;
2350 int blocksize_bits = order_base_2(size);
2351 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2352 struct kmem_cache *cachep;
2353
2354 if (cache_index >= NR_GRPINFO_CACHES)
2355 return -EINVAL;
2356
2357 if (unlikely(cache_index < 0))
2358 cache_index = 0;
2359
2360 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2361 if (ext4_groupinfo_caches[cache_index]) {
2362 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2363 return 0;
2364 }
2365
2366 slab_size = offsetof(struct ext4_group_info,
2367 bb_counters[blocksize_bits + 2]);
2368
2369 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2370 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2371 NULL);
2372
2373 ext4_groupinfo_caches[cache_index] = cachep;
2374
2375 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2376 if (!cachep) {
2377 printk(KERN_EMERG
2378 "EXT4-fs: no memory for groupinfo slab cache\n");
2379 return -ENOMEM;
2380 }
2381
2382 return 0;
2383}
2384
2385int ext4_mb_init(struct super_block *sb)
2386{
2387 struct ext4_sb_info *sbi = EXT4_SB(sb);
2388 unsigned i, j;
2389 unsigned offset;
2390 unsigned max;
2391 int ret;
2392
2393 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2394
2395 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2396 if (sbi->s_mb_offsets == NULL) {
2397 ret = -ENOMEM;
2398 goto out;
2399 }
2400
2401 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2402 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2403 if (sbi->s_mb_maxs == NULL) {
2404 ret = -ENOMEM;
2405 goto out;
2406 }
2407
2408 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2409 if (ret < 0)
2410 goto out;
2411
2412
2413 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2414 sbi->s_mb_offsets[0] = 0;
2415
2416 i = 1;
2417 offset = 0;
2418 max = sb->s_blocksize << 2;
2419 do {
2420 sbi->s_mb_offsets[i] = offset;
2421 sbi->s_mb_maxs[i] = max;
2422 offset += 1 << (sb->s_blocksize_bits - i);
2423 max = max >> 1;
2424 i++;
2425 } while (i <= sb->s_blocksize_bits + 1);
2426
2427 spin_lock_init(&sbi->s_md_lock);
2428 spin_lock_init(&sbi->s_bal_lock);
2429
2430 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2431 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2432 sbi->s_mb_stats = MB_DEFAULT_STATS;
2433 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2434 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2448 sbi->s_cluster_bits, 32);
2449
2450
2451
2452
2453
2454
2455
2456
2457 if (sbi->s_stripe > 1) {
2458 sbi->s_mb_group_prealloc = roundup(
2459 sbi->s_mb_group_prealloc, sbi->s_stripe);
2460 }
2461
2462 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2463 if (sbi->s_locality_groups == NULL) {
2464 ret = -ENOMEM;
2465 goto out_free_groupinfo_slab;
2466 }
2467 for_each_possible_cpu(i) {
2468 struct ext4_locality_group *lg;
2469 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2470 mutex_init(&lg->lg_mutex);
2471 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2472 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2473 spin_lock_init(&lg->lg_prealloc_lock);
2474 }
2475
2476
2477 ret = ext4_mb_init_backend(sb);
2478 if (ret != 0)
2479 goto out_free_locality_groups;
2480
2481 if (sbi->s_proc)
2482 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2483 &ext4_mb_seq_groups_fops, sb);
2484
2485 return 0;
2486
2487out_free_locality_groups:
2488 free_percpu(sbi->s_locality_groups);
2489 sbi->s_locality_groups = NULL;
2490out_free_groupinfo_slab:
2491 ext4_groupinfo_destroy_slabs();
2492out:
2493 kfree(sbi->s_mb_offsets);
2494 sbi->s_mb_offsets = NULL;
2495 kfree(sbi->s_mb_maxs);
2496 sbi->s_mb_maxs = NULL;
2497 return ret;
2498}
2499
2500
2501static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2502{
2503 struct ext4_prealloc_space *pa;
2504 struct list_head *cur, *tmp;
2505 int count = 0;
2506
2507 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2508 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2509 list_del(&pa->pa_group_list);
2510 count++;
2511 kmem_cache_free(ext4_pspace_cachep, pa);
2512 }
2513 if (count)
2514 mb_debug(1, "mballoc: %u PAs left\n", count);
2515
2516}
2517
2518int ext4_mb_release(struct super_block *sb)
2519{
2520 ext4_group_t ngroups = ext4_get_groups_count(sb);
2521 ext4_group_t i;
2522 int num_meta_group_infos;
2523 struct ext4_group_info *grinfo;
2524 struct ext4_sb_info *sbi = EXT4_SB(sb);
2525 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2526
2527 if (sbi->s_proc)
2528 remove_proc_entry("mb_groups", sbi->s_proc);
2529
2530 if (sbi->s_group_info) {
2531 for (i = 0; i < ngroups; i++) {
2532 grinfo = ext4_get_group_info(sb, i);
2533#ifdef DOUBLE_CHECK
2534 kfree(grinfo->bb_bitmap);
2535#endif
2536 ext4_lock_group(sb, i);
2537 ext4_mb_cleanup_pa(grinfo);
2538 ext4_unlock_group(sb, i);
2539 kmem_cache_free(cachep, grinfo);
2540 }
2541 num_meta_group_infos = (ngroups +
2542 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2543 EXT4_DESC_PER_BLOCK_BITS(sb);
2544 for (i = 0; i < num_meta_group_infos; i++)
2545 kfree(sbi->s_group_info[i]);
2546 ext4_kvfree(sbi->s_group_info);
2547 }
2548 kfree(sbi->s_mb_offsets);
2549 kfree(sbi->s_mb_maxs);
2550 if (sbi->s_buddy_cache)
2551 iput(sbi->s_buddy_cache);
2552 if (sbi->s_mb_stats) {
2553 ext4_msg(sb, KERN_INFO,
2554 "mballoc: %u blocks %u reqs (%u success)",
2555 atomic_read(&sbi->s_bal_allocated),
2556 atomic_read(&sbi->s_bal_reqs),
2557 atomic_read(&sbi->s_bal_success));
2558 ext4_msg(sb, KERN_INFO,
2559 "mballoc: %u extents scanned, %u goal hits, "
2560 "%u 2^N hits, %u breaks, %u lost",
2561 atomic_read(&sbi->s_bal_ex_scanned),
2562 atomic_read(&sbi->s_bal_goals),
2563 atomic_read(&sbi->s_bal_2orders),
2564 atomic_read(&sbi->s_bal_breaks),
2565 atomic_read(&sbi->s_mb_lost_chunks));
2566 ext4_msg(sb, KERN_INFO,
2567 "mballoc: %lu generated and it took %Lu",
2568 sbi->s_mb_buddies_generated,
2569 sbi->s_mb_generation_time);
2570 ext4_msg(sb, KERN_INFO,
2571 "mballoc: %u preallocated, %u discarded",
2572 atomic_read(&sbi->s_mb_preallocated),
2573 atomic_read(&sbi->s_mb_discarded));
2574 }
2575
2576 free_percpu(sbi->s_locality_groups);
2577
2578 return 0;
2579}
2580
2581static inline int ext4_issue_discard(struct super_block *sb,
2582 ext4_group_t block_group, ext4_grpblk_t cluster, int count)
2583{
2584 ext4_fsblk_t discard_block;
2585
2586 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2587 ext4_group_first_block_no(sb, block_group));
2588 count = EXT4_C2B(EXT4_SB(sb), count);
2589 trace_ext4_discard_blocks(sb,
2590 (unsigned long long) discard_block, count);
2591 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2592}
2593
2594
2595
2596
2597
2598static void ext4_free_data_callback(struct super_block *sb,
2599 struct ext4_journal_cb_entry *jce,
2600 int rc)
2601{
2602 struct ext4_free_data *entry = (struct ext4_free_data *)jce;
2603 struct ext4_buddy e4b;
2604 struct ext4_group_info *db;
2605 int err, count = 0, count2 = 0;
2606
2607 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2608 entry->efd_count, entry->efd_group, entry);
2609
2610 if (test_opt(sb, DISCARD))
2611 ext4_issue_discard(sb, entry->efd_group,
2612 entry->efd_start_cluster, entry->efd_count);
2613
2614 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2615
2616 BUG_ON(err != 0);
2617
2618
2619 db = e4b.bd_info;
2620
2621 count += entry->efd_count;
2622 count2++;
2623 ext4_lock_group(sb, entry->efd_group);
2624
2625 rb_erase(&entry->efd_node, &(db->bb_free_root));
2626 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2627
2628
2629
2630
2631
2632
2633
2634 if (!test_opt(sb, DISCARD))
2635 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2636
2637 if (!db->bb_free_root.rb_node) {
2638
2639
2640
2641 page_cache_release(e4b.bd_buddy_page);
2642 page_cache_release(e4b.bd_bitmap_page);
2643 }
2644 ext4_unlock_group(sb, entry->efd_group);
2645 kmem_cache_free(ext4_free_data_cachep, entry);
2646 ext4_mb_unload_buddy(&e4b);
2647
2648 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2649}
2650
2651#ifdef CONFIG_EXT4_DEBUG
2652u8 mb_enable_debug __read_mostly;
2653
2654static struct dentry *debugfs_dir;
2655static struct dentry *debugfs_debug;
2656
2657static void __init ext4_create_debugfs_entry(void)
2658{
2659 debugfs_dir = debugfs_create_dir("ext4", NULL);
2660 if (debugfs_dir)
2661 debugfs_debug = debugfs_create_u8("mballoc-debug",
2662 S_IRUGO | S_IWUSR,
2663 debugfs_dir,
2664 &mb_enable_debug);
2665}
2666
2667static void ext4_remove_debugfs_entry(void)
2668{
2669 debugfs_remove(debugfs_debug);
2670 debugfs_remove(debugfs_dir);
2671}
2672
2673#else
2674
2675static void __init ext4_create_debugfs_entry(void)
2676{
2677}
2678
2679static void ext4_remove_debugfs_entry(void)
2680{
2681}
2682
2683#endif
2684
2685int __init ext4_init_mballoc(void)
2686{
2687 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2688 SLAB_RECLAIM_ACCOUNT);
2689 if (ext4_pspace_cachep == NULL)
2690 return -ENOMEM;
2691
2692 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2693 SLAB_RECLAIM_ACCOUNT);
2694 if (ext4_ac_cachep == NULL) {
2695 kmem_cache_destroy(ext4_pspace_cachep);
2696 return -ENOMEM;
2697 }
2698
2699 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2700 SLAB_RECLAIM_ACCOUNT);
2701 if (ext4_free_data_cachep == NULL) {
2702 kmem_cache_destroy(ext4_pspace_cachep);
2703 kmem_cache_destroy(ext4_ac_cachep);
2704 return -ENOMEM;
2705 }
2706 ext4_create_debugfs_entry();
2707 return 0;
2708}
2709
2710void ext4_exit_mballoc(void)
2711{
2712
2713
2714
2715
2716 rcu_barrier();
2717 kmem_cache_destroy(ext4_pspace_cachep);
2718 kmem_cache_destroy(ext4_ac_cachep);
2719 kmem_cache_destroy(ext4_free_data_cachep);
2720 ext4_groupinfo_destroy_slabs();
2721 ext4_remove_debugfs_entry();
2722}
2723
2724
2725
2726
2727
2728
2729static noinline_for_stack int
2730ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2731 handle_t *handle, unsigned int reserv_clstrs)
2732{
2733 struct buffer_head *bitmap_bh = NULL;
2734 struct ext4_group_desc *gdp;
2735 struct buffer_head *gdp_bh;
2736 struct ext4_sb_info *sbi;
2737 struct super_block *sb;
2738 ext4_fsblk_t block;
2739 int err, len;
2740
2741 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2742 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2743
2744 sb = ac->ac_sb;
2745 sbi = EXT4_SB(sb);
2746
2747 err = -EIO;
2748 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2749 if (!bitmap_bh)
2750 goto out_err;
2751
2752 err = ext4_journal_get_write_access(handle, bitmap_bh);
2753 if (err)
2754 goto out_err;
2755
2756 err = -EIO;
2757 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2758 if (!gdp)
2759 goto out_err;
2760
2761 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2762 ext4_free_group_clusters(sb, gdp));
2763
2764 err = ext4_journal_get_write_access(handle, gdp_bh);
2765 if (err)
2766 goto out_err;
2767
2768 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2769
2770 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2771 if (!ext4_data_block_valid(sbi, block, len)) {
2772 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2773 "fs metadata", block, block+len);
2774
2775
2776
2777
2778 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2779 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2780 ac->ac_b_ex.fe_len);
2781 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2782 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2783 if (!err)
2784 err = -EAGAIN;
2785 goto out_err;
2786 }
2787
2788 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2789#ifdef AGGRESSIVE_CHECK
2790 {
2791 int i;
2792 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
2793 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
2794 bitmap_bh->b_data));
2795 }
2796 }
2797#endif
2798 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2799 ac->ac_b_ex.fe_len);
2800 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2801 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2802 ext4_free_group_clusters_set(sb, gdp,
2803 ext4_free_clusters_after_init(sb,
2804 ac->ac_b_ex.fe_group, gdp));
2805 }
2806 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
2807 ext4_free_group_clusters_set(sb, gdp, len);
2808 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
2809 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
2810
2811 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2812 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
2813
2814
2815
2816 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2817
2818 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
2819 reserv_clstrs);
2820
2821 if (sbi->s_log_groups_per_flex) {
2822 ext4_group_t flex_group = ext4_flex_group(sbi,
2823 ac->ac_b_ex.fe_group);
2824 atomic_sub(ac->ac_b_ex.fe_len,
2825 &sbi->s_flex_groups[flex_group].free_clusters);
2826 }
2827
2828 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2829 if (err)
2830 goto out_err;
2831 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
2832
2833out_err:
2834 brelse(bitmap_bh);
2835 return err;
2836}
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
2848{
2849 struct super_block *sb = ac->ac_sb;
2850 struct ext4_locality_group *lg = ac->ac_lg;
2851
2852 BUG_ON(lg == NULL);
2853 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
2854 mb_debug(1, "#%u: goal %u blocks for locality group\n",
2855 current->pid, ac->ac_g_ex.fe_len);
2856}
2857
2858
2859
2860
2861
2862static noinline_for_stack void
2863ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2864 struct ext4_allocation_request *ar)
2865{
2866 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2867 int bsbits, max;
2868 ext4_lblk_t end;
2869 loff_t size, start_off;
2870 loff_t orig_size __maybe_unused;
2871 ext4_lblk_t start;
2872 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2873 struct ext4_prealloc_space *pa;
2874
2875
2876
2877 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
2878 return;
2879
2880
2881 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2882 return;
2883
2884
2885
2886 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
2887 return;
2888
2889 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
2890 ext4_mb_normalize_group_request(ac);
2891 return ;
2892 }
2893
2894 bsbits = ac->ac_sb->s_blocksize_bits;
2895
2896
2897
2898 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
2899 size = size << bsbits;
2900 if (size < i_size_read(ac->ac_inode))
2901 size = i_size_read(ac->ac_inode);
2902 orig_size = size;
2903
2904
2905 max = 2 << bsbits;
2906
2907#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
2908 (req <= (size) || max <= (chunk_size))
2909
2910
2911
2912 start_off = 0;
2913 if (size <= 16 * 1024) {
2914 size = 16 * 1024;
2915 } else if (size <= 32 * 1024) {
2916 size = 32 * 1024;
2917 } else if (size <= 64 * 1024) {
2918 size = 64 * 1024;
2919 } else if (size <= 128 * 1024) {
2920 size = 128 * 1024;
2921 } else if (size <= 256 * 1024) {
2922 size = 256 * 1024;
2923 } else if (size <= 512 * 1024) {
2924 size = 512 * 1024;
2925 } else if (size <= 1024 * 1024) {
2926 size = 1024 * 1024;
2927 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
2928 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2929 (21 - bsbits)) << 21;
2930 size = 2 * 1024 * 1024;
2931 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
2932 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2933 (22 - bsbits)) << 22;
2934 size = 4 * 1024 * 1024;
2935 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
2936 (8<<20)>>bsbits, max, 8 * 1024)) {
2937 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2938 (23 - bsbits)) << 23;
2939 size = 8 * 1024 * 1024;
2940 } else {
2941 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
2942 size = ac->ac_o_ex.fe_len << bsbits;
2943 }
2944 size = size >> bsbits;
2945 start = start_off >> bsbits;
2946
2947
2948 if (ar->pleft && start <= ar->lleft) {
2949 size -= ar->lleft + 1 - start;
2950 start = ar->lleft + 1;
2951 }
2952 if (ar->pright && start + size - 1 >= ar->lright)
2953 size -= start + size - ar->lright;
2954
2955 end = start + size;
2956
2957
2958 rcu_read_lock();
2959 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
2960 ext4_lblk_t pa_end;
2961
2962 if (pa->pa_deleted)
2963 continue;
2964 spin_lock(&pa->pa_lock);
2965 if (pa->pa_deleted) {
2966 spin_unlock(&pa->pa_lock);
2967 continue;
2968 }
2969
2970 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
2971 pa->pa_len);
2972
2973
2974 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
2975 ac->ac_o_ex.fe_logical < pa->pa_lstart));
2976
2977
2978 if (pa->pa_lstart >= end || pa_end <= start) {
2979 spin_unlock(&pa->pa_lock);
2980 continue;
2981 }
2982 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
2983
2984
2985 if (pa_end <= ac->ac_o_ex.fe_logical) {
2986 BUG_ON(pa_end < start);
2987 start = pa_end;
2988 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
2989 BUG_ON(pa->pa_lstart > end);
2990 end = pa->pa_lstart;
2991 }
2992 spin_unlock(&pa->pa_lock);
2993 }
2994 rcu_read_unlock();
2995 size = end - start;
2996
2997
2998 rcu_read_lock();
2999 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3000 ext4_lblk_t pa_end;
3001
3002 spin_lock(&pa->pa_lock);
3003 if (pa->pa_deleted == 0) {
3004 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3005 pa->pa_len);
3006 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3007 }
3008 spin_unlock(&pa->pa_lock);
3009 }
3010 rcu_read_unlock();
3011
3012 if (start + size <= ac->ac_o_ex.fe_logical &&
3013 start > ac->ac_o_ex.fe_logical) {
3014 ext4_msg(ac->ac_sb, KERN_ERR,
3015 "start %lu, size %lu, fe_logical %lu",
3016 (unsigned long) start, (unsigned long) size,
3017 (unsigned long) ac->ac_o_ex.fe_logical);
3018 }
3019 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3020 start > ac->ac_o_ex.fe_logical);
3021 BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
3022
3023
3024
3025
3026
3027 ac->ac_g_ex.fe_logical = start;
3028 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3029
3030
3031 if (ar->pright && (ar->lright == (start + size))) {
3032
3033 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3034 &ac->ac_f_ex.fe_group,
3035 &ac->ac_f_ex.fe_start);
3036 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3037 }
3038 if (ar->pleft && (ar->lleft + 1 == start)) {
3039
3040 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3041 &ac->ac_f_ex.fe_group,
3042 &ac->ac_f_ex.fe_start);
3043 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3044 }
3045
3046 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3047 (unsigned) orig_size, (unsigned) start);
3048}
3049
3050static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3051{
3052 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3053
3054 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3055 atomic_inc(&sbi->s_bal_reqs);
3056 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3057 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3058 atomic_inc(&sbi->s_bal_success);
3059 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3060 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3061 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3062 atomic_inc(&sbi->s_bal_goals);
3063 if (ac->ac_found > sbi->s_mb_max_to_scan)
3064 atomic_inc(&sbi->s_bal_breaks);
3065 }
3066
3067 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3068 trace_ext4_mballoc_alloc(ac);
3069 else
3070 trace_ext4_mballoc_prealloc(ac);
3071}
3072
3073
3074
3075
3076
3077
3078
3079static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3080{
3081 struct ext4_prealloc_space *pa = ac->ac_pa;
3082
3083 if (pa && pa->pa_type == MB_INODE_PA)
3084 pa->pa_free += ac->ac_b_ex.fe_len;
3085}
3086
3087
3088
3089
3090static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3091 struct ext4_prealloc_space *pa)
3092{
3093 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3094 ext4_fsblk_t start;
3095 ext4_fsblk_t end;
3096 int len;
3097
3098
3099 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3100 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3101 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3102 len = EXT4_NUM_B2C(sbi, end - start);
3103 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3104 &ac->ac_b_ex.fe_start);
3105 ac->ac_b_ex.fe_len = len;
3106 ac->ac_status = AC_STATUS_FOUND;
3107 ac->ac_pa = pa;
3108
3109 BUG_ON(start < pa->pa_pstart);
3110 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3111 BUG_ON(pa->pa_free < len);
3112 pa->pa_free -= len;
3113
3114 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3115}
3116
3117
3118
3119
3120static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3121 struct ext4_prealloc_space *pa)
3122{
3123 unsigned int len = ac->ac_o_ex.fe_len;
3124
3125 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3126 &ac->ac_b_ex.fe_group,
3127 &ac->ac_b_ex.fe_start);
3128 ac->ac_b_ex.fe_len = len;
3129 ac->ac_status = AC_STATUS_FOUND;
3130 ac->ac_pa = pa;
3131
3132
3133
3134
3135
3136
3137
3138 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3139}
3140
3141
3142
3143
3144
3145
3146
3147static struct ext4_prealloc_space *
3148ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3149 struct ext4_prealloc_space *pa,
3150 struct ext4_prealloc_space *cpa)
3151{
3152 ext4_fsblk_t cur_distance, new_distance;
3153
3154 if (cpa == NULL) {
3155 atomic_inc(&pa->pa_count);
3156 return pa;
3157 }
3158 cur_distance = abs(goal_block - cpa->pa_pstart);
3159 new_distance = abs(goal_block - pa->pa_pstart);
3160
3161 if (cur_distance <= new_distance)
3162 return cpa;
3163
3164
3165 atomic_dec(&cpa->pa_count);
3166 atomic_inc(&pa->pa_count);
3167 return pa;
3168}
3169
3170
3171
3172
3173static noinline_for_stack int
3174ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3175{
3176 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3177 int order, i;
3178 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3179 struct ext4_locality_group *lg;
3180 struct ext4_prealloc_space *pa, *cpa = NULL;
3181 ext4_fsblk_t goal_block;
3182
3183
3184 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3185 return 0;
3186
3187
3188 rcu_read_lock();
3189 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3190
3191
3192
3193 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3194 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3195 EXT4_C2B(sbi, pa->pa_len)))
3196 continue;
3197
3198
3199 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3200 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3201 EXT4_MAX_BLOCK_FILE_PHYS))
3202 continue;
3203
3204
3205 spin_lock(&pa->pa_lock);
3206 if (pa->pa_deleted == 0 && pa->pa_free) {
3207 atomic_inc(&pa->pa_count);
3208 ext4_mb_use_inode_pa(ac, pa);
3209 spin_unlock(&pa->pa_lock);
3210 ac->ac_criteria = 10;
3211 rcu_read_unlock();
3212 return 1;
3213 }
3214 spin_unlock(&pa->pa_lock);
3215 }
3216 rcu_read_unlock();
3217
3218
3219 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3220 return 0;
3221
3222
3223 lg = ac->ac_lg;
3224 if (lg == NULL)
3225 return 0;
3226 order = fls(ac->ac_o_ex.fe_len) - 1;
3227 if (order > PREALLOC_TB_SIZE - 1)
3228
3229 order = PREALLOC_TB_SIZE - 1;
3230
3231 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3232
3233
3234
3235
3236 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3237 rcu_read_lock();
3238 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3239 pa_inode_list) {
3240 spin_lock(&pa->pa_lock);
3241 if (pa->pa_deleted == 0 &&
3242 pa->pa_free >= ac->ac_o_ex.fe_len) {
3243
3244 cpa = ext4_mb_check_group_pa(goal_block,
3245 pa, cpa);
3246 }
3247 spin_unlock(&pa->pa_lock);
3248 }
3249 rcu_read_unlock();
3250 }
3251 if (cpa) {
3252 ext4_mb_use_group_pa(ac, cpa);
3253 ac->ac_criteria = 20;
3254 return 1;
3255 }
3256 return 0;
3257}
3258
3259
3260
3261
3262
3263
3264
3265static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3266 ext4_group_t group)
3267{
3268 struct rb_node *n;
3269 struct ext4_group_info *grp;
3270 struct ext4_free_data *entry;
3271
3272 grp = ext4_get_group_info(sb, group);
3273 n = rb_first(&(grp->bb_free_root));
3274
3275 while (n) {
3276 entry = rb_entry(n, struct ext4_free_data, efd_node);
3277 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3278 n = rb_next(n);
3279 }
3280 return;
3281}
3282
3283
3284
3285
3286
3287
3288static noinline_for_stack
3289void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3290 ext4_group_t group)
3291{
3292 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3293 struct ext4_prealloc_space *pa;
3294 struct list_head *cur;
3295 ext4_group_t groupnr;
3296 ext4_grpblk_t start;
3297 int preallocated = 0;
3298 int len;
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308 list_for_each(cur, &grp->bb_prealloc_list) {
3309 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3310 spin_lock(&pa->pa_lock);
3311 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3312 &groupnr, &start);
3313 len = pa->pa_len;
3314 spin_unlock(&pa->pa_lock);
3315 if (unlikely(len == 0))
3316 continue;
3317 BUG_ON(groupnr != group);
3318 ext4_set_bits(bitmap, start, len);
3319 preallocated += len;
3320 }
3321 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3322}
3323
3324static void ext4_mb_pa_callback(struct rcu_head *head)
3325{
3326 struct ext4_prealloc_space *pa;
3327 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3328 kmem_cache_free(ext4_pspace_cachep, pa);
3329}
3330
3331
3332
3333
3334
3335static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3336 struct super_block *sb, struct ext4_prealloc_space *pa)
3337{
3338 ext4_group_t grp;
3339 ext4_fsblk_t grp_blk;
3340
3341 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
3342 return;
3343
3344
3345 spin_lock(&pa->pa_lock);
3346 if (pa->pa_deleted == 1) {
3347 spin_unlock(&pa->pa_lock);
3348 return;
3349 }
3350
3351 pa->pa_deleted = 1;
3352 spin_unlock(&pa->pa_lock);
3353
3354 grp_blk = pa->pa_pstart;
3355
3356
3357
3358
3359 if (pa->pa_type == MB_GROUP_PA)
3360 grp_blk--;
3361
3362 ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378 ext4_lock_group(sb, grp);
3379 list_del(&pa->pa_group_list);
3380 ext4_unlock_group(sb, grp);
3381
3382 spin_lock(pa->pa_obj_lock);
3383 list_del_rcu(&pa->pa_inode_list);
3384 spin_unlock(pa->pa_obj_lock);
3385
3386 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3387}
3388
3389
3390
3391
3392static noinline_for_stack int
3393ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3394{
3395 struct super_block *sb = ac->ac_sb;
3396 struct ext4_sb_info *sbi = EXT4_SB(sb);
3397 struct ext4_prealloc_space *pa;
3398 struct ext4_group_info *grp;
3399 struct ext4_inode_info *ei;
3400
3401
3402 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3403 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3404 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3405
3406 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3407 if (pa == NULL)
3408 return -ENOMEM;
3409
3410 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3411 int winl;
3412 int wins;
3413 int win;
3414 int offs;
3415
3416
3417
3418
3419 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3420 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3421
3422
3423
3424
3425 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3426
3427
3428 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3429
3430
3431 win = min(winl, wins);
3432
3433 offs = ac->ac_o_ex.fe_logical %
3434 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3435 if (offs && offs < win)
3436 win = offs;
3437
3438 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3439 EXT4_B2C(sbi, win);
3440 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3441 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3442 }
3443
3444
3445
3446 ac->ac_f_ex = ac->ac_b_ex;
3447
3448 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3449 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3450 pa->pa_len = ac->ac_b_ex.fe_len;
3451 pa->pa_free = pa->pa_len;
3452 atomic_set(&pa->pa_count, 1);
3453 spin_lock_init(&pa->pa_lock);
3454 INIT_LIST_HEAD(&pa->pa_inode_list);
3455 INIT_LIST_HEAD(&pa->pa_group_list);
3456 pa->pa_deleted = 0;
3457 pa->pa_type = MB_INODE_PA;
3458
3459 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3460 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3461 trace_ext4_mb_new_inode_pa(ac, pa);
3462
3463 ext4_mb_use_inode_pa(ac, pa);
3464 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3465
3466 ei = EXT4_I(ac->ac_inode);
3467 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3468
3469 pa->pa_obj_lock = &ei->i_prealloc_lock;
3470 pa->pa_inode = ac->ac_inode;
3471
3472 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3473 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3474 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3475
3476 spin_lock(pa->pa_obj_lock);
3477 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3478 spin_unlock(pa->pa_obj_lock);
3479
3480 return 0;
3481}
3482
3483
3484
3485
3486static noinline_for_stack int
3487ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3488{
3489 struct super_block *sb = ac->ac_sb;
3490 struct ext4_locality_group *lg;
3491 struct ext4_prealloc_space *pa;
3492 struct ext4_group_info *grp;
3493
3494
3495 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3496 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3497 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3498
3499 BUG_ON(ext4_pspace_cachep == NULL);
3500 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3501 if (pa == NULL)
3502 return -ENOMEM;
3503
3504
3505
3506 ac->ac_f_ex = ac->ac_b_ex;
3507
3508 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3509 pa->pa_lstart = pa->pa_pstart;
3510 pa->pa_len = ac->ac_b_ex.fe_len;
3511 pa->pa_free = pa->pa_len;
3512 atomic_set(&pa->pa_count, 1);
3513 spin_lock_init(&pa->pa_lock);
3514 INIT_LIST_HEAD(&pa->pa_inode_list);
3515 INIT_LIST_HEAD(&pa->pa_group_list);
3516 pa->pa_deleted = 0;
3517 pa->pa_type = MB_GROUP_PA;
3518
3519 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3520 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3521 trace_ext4_mb_new_group_pa(ac, pa);
3522
3523 ext4_mb_use_group_pa(ac, pa);
3524 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3525
3526 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3527 lg = ac->ac_lg;
3528 BUG_ON(lg == NULL);
3529
3530 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3531 pa->pa_inode = NULL;
3532
3533 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3534 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3535 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3536
3537
3538
3539
3540
3541 return 0;
3542}
3543
3544static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3545{
3546 int err;
3547
3548 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3549 err = ext4_mb_new_group_pa(ac);
3550 else
3551 err = ext4_mb_new_inode_pa(ac);
3552 return err;
3553}
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563static noinline_for_stack int
3564ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3565 struct ext4_prealloc_space *pa)
3566{
3567 struct super_block *sb = e4b->bd_sb;
3568 struct ext4_sb_info *sbi = EXT4_SB(sb);
3569 unsigned int end;
3570 unsigned int next;
3571 ext4_group_t group;
3572 ext4_grpblk_t bit;
3573 unsigned long long grp_blk_start;
3574 int err = 0;
3575 int free = 0;
3576
3577 BUG_ON(pa->pa_deleted == 0);
3578 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3579 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3580 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3581 end = bit + pa->pa_len;
3582
3583 while (bit < end) {
3584 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3585 if (bit >= end)
3586 break;
3587 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3588 mb_debug(1, " free preallocated %u/%u in group %u\n",
3589 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3590 (unsigned) next - bit, (unsigned) group);
3591 free += next - bit;
3592
3593 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3594 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3595 EXT4_C2B(sbi, bit)),
3596 next - bit);
3597 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3598 bit = next + 1;
3599 }
3600 if (free != pa->pa_free) {
3601 ext4_msg(e4b->bd_sb, KERN_CRIT,
3602 "pa %p: logic %lu, phys. %lu, len %lu",
3603 pa, (unsigned long) pa->pa_lstart,
3604 (unsigned long) pa->pa_pstart,
3605 (unsigned long) pa->pa_len);
3606 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3607 free, pa->pa_free);
3608
3609
3610
3611
3612 }
3613 atomic_add(free, &sbi->s_mb_discarded);
3614
3615 return err;
3616}
3617
3618static noinline_for_stack int
3619ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3620 struct ext4_prealloc_space *pa)
3621{
3622 struct super_block *sb = e4b->bd_sb;
3623 ext4_group_t group;
3624 ext4_grpblk_t bit;
3625
3626 trace_ext4_mb_release_group_pa(sb, pa);
3627 BUG_ON(pa->pa_deleted == 0);
3628 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3629 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3630 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3631 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3632 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3633
3634 return 0;
3635}
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646static noinline_for_stack int
3647ext4_mb_discard_group_preallocations(struct super_block *sb,
3648 ext4_group_t group, int needed)
3649{
3650 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3651 struct buffer_head *bitmap_bh = NULL;
3652 struct ext4_prealloc_space *pa, *tmp;
3653 struct list_head list;
3654 struct ext4_buddy e4b;
3655 int err;
3656 int busy = 0;
3657 int free = 0;
3658
3659 mb_debug(1, "discard preallocation for group %u\n", group);
3660
3661 if (list_empty(&grp->bb_prealloc_list))
3662 return 0;
3663
3664 bitmap_bh = ext4_read_block_bitmap(sb, group);
3665 if (bitmap_bh == NULL) {
3666 ext4_error(sb, "Error reading block bitmap for %u", group);
3667 return 0;
3668 }
3669
3670 err = ext4_mb_load_buddy(sb, group, &e4b);
3671 if (err) {
3672 ext4_error(sb, "Error loading buddy information for %u", group);
3673 put_bh(bitmap_bh);
3674 return 0;
3675 }
3676
3677 if (needed == 0)
3678 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3679
3680 INIT_LIST_HEAD(&list);
3681repeat:
3682 ext4_lock_group(sb, group);
3683 list_for_each_entry_safe(pa, tmp,
3684 &grp->bb_prealloc_list, pa_group_list) {
3685 spin_lock(&pa->pa_lock);
3686 if (atomic_read(&pa->pa_count)) {
3687 spin_unlock(&pa->pa_lock);
3688 busy = 1;
3689 continue;
3690 }
3691 if (pa->pa_deleted) {
3692 spin_unlock(&pa->pa_lock);
3693 continue;
3694 }
3695
3696
3697 pa->pa_deleted = 1;
3698
3699
3700 free += pa->pa_free;
3701
3702 spin_unlock(&pa->pa_lock);
3703
3704 list_del(&pa->pa_group_list);
3705 list_add(&pa->u.pa_tmp_list, &list);
3706 }
3707
3708
3709 if (free < needed && busy) {
3710 busy = 0;
3711 ext4_unlock_group(sb, group);
3712
3713
3714
3715
3716 yield();
3717 goto repeat;
3718 }
3719
3720
3721 if (list_empty(&list)) {
3722 BUG_ON(free != 0);
3723 goto out;
3724 }
3725
3726
3727 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3728
3729
3730 spin_lock(pa->pa_obj_lock);
3731 list_del_rcu(&pa->pa_inode_list);
3732 spin_unlock(pa->pa_obj_lock);
3733
3734 if (pa->pa_type == MB_GROUP_PA)
3735 ext4_mb_release_group_pa(&e4b, pa);
3736 else
3737 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3738
3739 list_del(&pa->u.pa_tmp_list);
3740 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3741 }
3742
3743out:
3744 ext4_unlock_group(sb, group);
3745 ext4_mb_unload_buddy(&e4b);
3746 put_bh(bitmap_bh);
3747 return free;
3748}
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759void ext4_discard_preallocations(struct inode *inode)
3760{
3761 struct ext4_inode_info *ei = EXT4_I(inode);
3762 struct super_block *sb = inode->i_sb;
3763 struct buffer_head *bitmap_bh = NULL;
3764 struct ext4_prealloc_space *pa, *tmp;
3765 ext4_group_t group = 0;
3766 struct list_head list;
3767 struct ext4_buddy e4b;
3768 int err;
3769
3770 if (!S_ISREG(inode->i_mode)) {
3771
3772 return;
3773 }
3774
3775 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
3776 trace_ext4_discard_preallocations(inode);
3777
3778 INIT_LIST_HEAD(&list);
3779
3780repeat:
3781
3782 spin_lock(&ei->i_prealloc_lock);
3783 while (!list_empty(&ei->i_prealloc_list)) {
3784 pa = list_entry(ei->i_prealloc_list.next,
3785 struct ext4_prealloc_space, pa_inode_list);
3786 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
3787 spin_lock(&pa->pa_lock);
3788 if (atomic_read(&pa->pa_count)) {
3789
3790
3791 spin_unlock(&pa->pa_lock);
3792 spin_unlock(&ei->i_prealloc_lock);
3793 ext4_msg(sb, KERN_ERR,
3794 "uh-oh! used pa while discarding");
3795 WARN_ON(1);
3796 schedule_timeout_uninterruptible(HZ);
3797 goto repeat;
3798
3799 }
3800 if (pa->pa_deleted == 0) {
3801 pa->pa_deleted = 1;
3802 spin_unlock(&pa->pa_lock);
3803 list_del_rcu(&pa->pa_inode_list);
3804 list_add(&pa->u.pa_tmp_list, &list);
3805 continue;
3806 }
3807
3808
3809 spin_unlock(&pa->pa_lock);
3810 spin_unlock(&ei->i_prealloc_lock);
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824 schedule_timeout_uninterruptible(HZ);
3825 goto repeat;
3826 }
3827 spin_unlock(&ei->i_prealloc_lock);
3828
3829 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3830 BUG_ON(pa->pa_type != MB_INODE_PA);
3831 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
3832
3833 err = ext4_mb_load_buddy(sb, group, &e4b);
3834 if (err) {
3835 ext4_error(sb, "Error loading buddy information for %u",
3836 group);
3837 continue;
3838 }
3839
3840 bitmap_bh = ext4_read_block_bitmap(sb, group);
3841 if (bitmap_bh == NULL) {
3842 ext4_error(sb, "Error reading block bitmap for %u",
3843 group);
3844 ext4_mb_unload_buddy(&e4b);
3845 continue;
3846 }
3847
3848 ext4_lock_group(sb, group);
3849 list_del(&pa->pa_group_list);
3850 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3851 ext4_unlock_group(sb, group);
3852
3853 ext4_mb_unload_buddy(&e4b);
3854 put_bh(bitmap_bh);
3855
3856 list_del(&pa->u.pa_tmp_list);
3857 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3858 }
3859}
3860
3861#ifdef CONFIG_EXT4_DEBUG
3862static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3863{
3864 struct super_block *sb = ac->ac_sb;
3865 ext4_group_t ngroups, i;
3866
3867 if (!mb_enable_debug ||
3868 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3869 return;
3870
3871 ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
3872 " Allocation context details:");
3873 ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
3874 ac->ac_status, ac->ac_flags);
3875 ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
3876 "goal %lu/%lu/%lu@%lu, "
3877 "best %lu/%lu/%lu@%lu cr %d",
3878 (unsigned long)ac->ac_o_ex.fe_group,
3879 (unsigned long)ac->ac_o_ex.fe_start,
3880 (unsigned long)ac->ac_o_ex.fe_len,
3881 (unsigned long)ac->ac_o_ex.fe_logical,
3882 (unsigned long)ac->ac_g_ex.fe_group,
3883 (unsigned long)ac->ac_g_ex.fe_start,
3884 (unsigned long)ac->ac_g_ex.fe_len,
3885 (unsigned long)ac->ac_g_ex.fe_logical,
3886 (unsigned long)ac->ac_b_ex.fe_group,
3887 (unsigned long)ac->ac_b_ex.fe_start,
3888 (unsigned long)ac->ac_b_ex.fe_len,
3889 (unsigned long)ac->ac_b_ex.fe_logical,
3890 (int)ac->ac_criteria);
3891 ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found",
3892 ac->ac_ex_scanned, ac->ac_found);
3893 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
3894 ngroups = ext4_get_groups_count(sb);
3895 for (i = 0; i < ngroups; i++) {
3896 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
3897 struct ext4_prealloc_space *pa;
3898 ext4_grpblk_t start;
3899 struct list_head *cur;
3900 ext4_lock_group(sb, i);
3901 list_for_each(cur, &grp->bb_prealloc_list) {
3902 pa = list_entry(cur, struct ext4_prealloc_space,
3903 pa_group_list);
3904 spin_lock(&pa->pa_lock);
3905 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3906 NULL, &start);
3907 spin_unlock(&pa->pa_lock);
3908 printk(KERN_ERR "PA:%u:%d:%u \n", i,
3909 start, pa->pa_len);
3910 }
3911 ext4_unlock_group(sb, i);
3912
3913 if (grp->bb_free == 0)
3914 continue;
3915 printk(KERN_ERR "%u: %d/%d \n",
3916 i, grp->bb_free, grp->bb_fragments);
3917 }
3918 printk(KERN_ERR "\n");
3919}
3920#else
3921static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3922{
3923 return;
3924}
3925#endif
3926
3927
3928
3929
3930
3931
3932
3933
3934static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3935{
3936 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3937 int bsbits = ac->ac_sb->s_blocksize_bits;
3938 loff_t size, isize;
3939
3940 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3941 return;
3942
3943 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3944 return;
3945
3946 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3947 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
3948 >> bsbits;
3949
3950 if ((size == isize) &&
3951 !ext4_fs_is_busy(sbi) &&
3952 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
3953 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
3954 return;
3955 }
3956
3957 if (sbi->s_mb_group_prealloc <= 0) {
3958 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
3959 return;
3960 }
3961
3962
3963 size = max(size, isize);
3964 if (size > sbi->s_mb_stream_request) {
3965 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
3966 return;
3967 }
3968
3969 BUG_ON(ac->ac_lg != NULL);
3970
3971
3972
3973
3974
3975 ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
3976
3977
3978 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
3979
3980
3981 mutex_lock(&ac->ac_lg->lg_mutex);
3982}
3983
3984static noinline_for_stack int
3985ext4_mb_initialize_context(struct ext4_allocation_context *ac,
3986 struct ext4_allocation_request *ar)
3987{
3988 struct super_block *sb = ar->inode->i_sb;
3989 struct ext4_sb_info *sbi = EXT4_SB(sb);
3990 struct ext4_super_block *es = sbi->s_es;
3991 ext4_group_t group;
3992 unsigned int len;
3993 ext4_fsblk_t goal;
3994 ext4_grpblk_t block;
3995
3996
3997 len = ar->len;
3998
3999
4000 if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
4001 len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
4002
4003
4004 goal = ar->goal;
4005 if (goal < le32_to_cpu(es->s_first_data_block) ||
4006 goal >= ext4_blocks_count(es))
4007 goal = le32_to_cpu(es->s_first_data_block);
4008 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4009
4010
4011 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
4012 ac->ac_status = AC_STATUS_CONTINUE;
4013 ac->ac_sb = sb;
4014 ac->ac_inode = ar->inode;
4015 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4016 ac->ac_o_ex.fe_group = group;
4017 ac->ac_o_ex.fe_start = block;
4018 ac->ac_o_ex.fe_len = len;
4019 ac->ac_g_ex = ac->ac_o_ex;
4020 ac->ac_flags = ar->flags;
4021
4022
4023
4024 ext4_mb_group_or_file(ac);
4025
4026 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4027 "left: %u/%u, right %u/%u to %swritable\n",
4028 (unsigned) ar->len, (unsigned) ar->logical,
4029 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4030 (unsigned) ar->lleft, (unsigned) ar->pleft,
4031 (unsigned) ar->lright, (unsigned) ar->pright,
4032 atomic_read(&ar->inode->i_writecount) ? "" : "non-");
4033 return 0;
4034
4035}
4036
4037static noinline_for_stack void
4038ext4_mb_discard_lg_preallocations(struct super_block *sb,
4039 struct ext4_locality_group *lg,
4040 int order, int total_entries)
4041{
4042 ext4_group_t group = 0;
4043 struct ext4_buddy e4b;
4044 struct list_head discard_list;
4045 struct ext4_prealloc_space *pa, *tmp;
4046
4047 mb_debug(1, "discard locality group preallocation\n");
4048
4049 INIT_LIST_HEAD(&discard_list);
4050
4051 spin_lock(&lg->lg_prealloc_lock);
4052 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4053 pa_inode_list) {
4054 spin_lock(&pa->pa_lock);
4055 if (atomic_read(&pa->pa_count)) {
4056
4057
4058
4059
4060
4061 spin_unlock(&pa->pa_lock);
4062 continue;
4063 }
4064 if (pa->pa_deleted) {
4065 spin_unlock(&pa->pa_lock);
4066 continue;
4067 }
4068
4069 BUG_ON(pa->pa_type != MB_GROUP_PA);
4070
4071
4072 pa->pa_deleted = 1;
4073 spin_unlock(&pa->pa_lock);
4074
4075 list_del_rcu(&pa->pa_inode_list);
4076 list_add(&pa->u.pa_tmp_list, &discard_list);
4077
4078 total_entries--;
4079 if (total_entries <= 5) {
4080
4081
4082
4083
4084
4085
4086 break;
4087 }
4088 }
4089 spin_unlock(&lg->lg_prealloc_lock);
4090
4091 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4092
4093 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4094 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4095 ext4_error(sb, "Error loading buddy information for %u",
4096 group);
4097 continue;
4098 }
4099 ext4_lock_group(sb, group);
4100 list_del(&pa->pa_group_list);
4101 ext4_mb_release_group_pa(&e4b, pa);
4102 ext4_unlock_group(sb, group);
4103
4104 ext4_mb_unload_buddy(&e4b);
4105 list_del(&pa->u.pa_tmp_list);
4106 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4107 }
4108}
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4120{
4121 int order, added = 0, lg_prealloc_count = 1;
4122 struct super_block *sb = ac->ac_sb;
4123 struct ext4_locality_group *lg = ac->ac_lg;
4124 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4125
4126 order = fls(pa->pa_free) - 1;
4127 if (order > PREALLOC_TB_SIZE - 1)
4128
4129 order = PREALLOC_TB_SIZE - 1;
4130
4131 rcu_read_lock();
4132 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4133 pa_inode_list) {
4134 spin_lock(&tmp_pa->pa_lock);
4135 if (tmp_pa->pa_deleted) {
4136 spin_unlock(&tmp_pa->pa_lock);
4137 continue;
4138 }
4139 if (!added && pa->pa_free < tmp_pa->pa_free) {
4140
4141 list_add_tail_rcu(&pa->pa_inode_list,
4142 &tmp_pa->pa_inode_list);
4143 added = 1;
4144
4145
4146
4147
4148 }
4149 spin_unlock(&tmp_pa->pa_lock);
4150 lg_prealloc_count++;
4151 }
4152 if (!added)
4153 list_add_tail_rcu(&pa->pa_inode_list,
4154 &lg->lg_prealloc_list[order]);
4155 rcu_read_unlock();
4156
4157
4158 if (lg_prealloc_count > 8) {
4159 ext4_mb_discard_lg_preallocations(sb, lg,
4160 order, lg_prealloc_count);
4161 return;
4162 }
4163 return ;
4164}
4165
4166
4167
4168
4169static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4170{
4171 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4172 struct ext4_prealloc_space *pa = ac->ac_pa;
4173 if (pa) {
4174 if (pa->pa_type == MB_GROUP_PA) {
4175
4176 spin_lock(&pa->pa_lock);
4177 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4178 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4179 pa->pa_free -= ac->ac_b_ex.fe_len;
4180 pa->pa_len -= ac->ac_b_ex.fe_len;
4181 spin_unlock(&pa->pa_lock);
4182 }
4183 }
4184 if (pa) {
4185
4186
4187
4188
4189
4190
4191 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4192 spin_lock(pa->pa_obj_lock);
4193 list_del_rcu(&pa->pa_inode_list);
4194 spin_unlock(pa->pa_obj_lock);
4195 ext4_mb_add_n_trim(ac);
4196 }
4197 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4198 }
4199 if (ac->ac_bitmap_page)
4200 page_cache_release(ac->ac_bitmap_page);
4201 if (ac->ac_buddy_page)
4202 page_cache_release(ac->ac_buddy_page);
4203 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4204 mutex_unlock(&ac->ac_lg->lg_mutex);
4205 ext4_mb_collect_stats(ac);
4206 return 0;
4207}
4208
4209static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4210{
4211 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4212 int ret;
4213 int freed = 0;
4214
4215 trace_ext4_mb_discard_preallocations(sb, needed);
4216 for (i = 0; i < ngroups && needed > 0; i++) {
4217 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4218 freed += ret;
4219 needed -= ret;
4220 }
4221
4222 return freed;
4223}
4224
4225
4226
4227
4228
4229
4230ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4231 struct ext4_allocation_request *ar, int *errp)
4232{
4233 int freed;
4234 struct ext4_allocation_context *ac = NULL;
4235 struct ext4_sb_info *sbi;
4236 struct super_block *sb;
4237 ext4_fsblk_t block = 0;
4238 unsigned int inquota = 0;
4239 unsigned int reserv_clstrs = 0;
4240
4241 sb = ar->inode->i_sb;
4242 sbi = EXT4_SB(sb);
4243
4244 trace_ext4_request_blocks(ar);
4245
4246
4247 if (IS_NOQUOTA(ar->inode))
4248 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4249
4250
4251
4252
4253
4254
4255 if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
4256 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4257 else {
4258
4259
4260
4261
4262 while (ar->len &&
4263 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4264
4265
4266 yield();
4267 ar->len = ar->len >> 1;
4268 }
4269 if (!ar->len) {
4270 *errp = -ENOSPC;
4271 return 0;
4272 }
4273 reserv_clstrs = ar->len;
4274 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4275 dquot_alloc_block_nofail(ar->inode,
4276 EXT4_C2B(sbi, ar->len));
4277 } else {
4278 while (ar->len &&
4279 dquot_alloc_block(ar->inode,
4280 EXT4_C2B(sbi, ar->len))) {
4281
4282 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4283 ar->len--;
4284 }
4285 }
4286 inquota = ar->len;
4287 if (ar->len == 0) {
4288 *errp = -EDQUOT;
4289 goto out;
4290 }
4291 }
4292
4293 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
4294 if (!ac) {
4295 ar->len = 0;
4296 *errp = -ENOMEM;
4297 goto out;
4298 }
4299
4300 *errp = ext4_mb_initialize_context(ac, ar);
4301 if (*errp) {
4302 ar->len = 0;
4303 goto out;
4304 }
4305
4306 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4307 if (!ext4_mb_use_preallocated(ac)) {
4308 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4309 ext4_mb_normalize_request(ac, ar);
4310repeat:
4311
4312 *errp = ext4_mb_regular_allocator(ac);
4313 if (*errp)
4314 goto errout;
4315
4316
4317
4318
4319 if (ac->ac_status == AC_STATUS_FOUND &&
4320 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4321 ext4_mb_new_preallocation(ac);
4322 }
4323 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4324 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4325 if (*errp == -EAGAIN) {
4326
4327
4328
4329
4330 ext4_mb_release_context(ac);
4331 ac->ac_b_ex.fe_group = 0;
4332 ac->ac_b_ex.fe_start = 0;
4333 ac->ac_b_ex.fe_len = 0;
4334 ac->ac_status = AC_STATUS_CONTINUE;
4335 goto repeat;
4336 } else if (*errp)
4337 errout:
4338 ext4_discard_allocated_blocks(ac);
4339 else {
4340 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4341 ar->len = ac->ac_b_ex.fe_len;
4342 }
4343 } else {
4344 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4345 if (freed)
4346 goto repeat;
4347 *errp = -ENOSPC;
4348 }
4349
4350 if (*errp) {
4351 ac->ac_b_ex.fe_len = 0;
4352 ar->len = 0;
4353 ext4_mb_show_ac(ac);
4354 }
4355 ext4_mb_release_context(ac);
4356out:
4357 if (ac)
4358 kmem_cache_free(ext4_ac_cachep, ac);
4359 if (inquota && ar->len < inquota)
4360 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4361 if (!ar->len) {
4362 if (!ext4_test_inode_state(ar->inode,
4363 EXT4_STATE_DELALLOC_RESERVED))
4364
4365 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4366 reserv_clstrs);
4367 }
4368
4369 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4370
4371 return block;
4372}
4373
4374
4375
4376
4377
4378
4379static int can_merge(struct ext4_free_data *entry1,
4380 struct ext4_free_data *entry2)
4381{
4382 if ((entry1->efd_tid == entry2->efd_tid) &&
4383 (entry1->efd_group == entry2->efd_group) &&
4384 ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
4385 return 1;
4386 return 0;
4387}
4388
4389static noinline_for_stack int
4390ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4391 struct ext4_free_data *new_entry)
4392{
4393 ext4_group_t group = e4b->bd_group;
4394 ext4_grpblk_t cluster;
4395 struct ext4_free_data *entry;
4396 struct ext4_group_info *db = e4b->bd_info;
4397 struct super_block *sb = e4b->bd_sb;
4398 struct ext4_sb_info *sbi = EXT4_SB(sb);
4399 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4400 struct rb_node *parent = NULL, *new_node;
4401
4402 BUG_ON(!ext4_handle_valid(handle));
4403 BUG_ON(e4b->bd_bitmap_page == NULL);
4404 BUG_ON(e4b->bd_buddy_page == NULL);
4405
4406 new_node = &new_entry->efd_node;
4407 cluster = new_entry->efd_start_cluster;
4408
4409 if (!*n) {
4410
4411
4412
4413
4414
4415 page_cache_get(e4b->bd_buddy_page);
4416 page_cache_get(e4b->bd_bitmap_page);
4417 }
4418 while (*n) {
4419 parent = *n;
4420 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4421 if (cluster < entry->efd_start_cluster)
4422 n = &(*n)->rb_left;
4423 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4424 n = &(*n)->rb_right;
4425 else {
4426 ext4_grp_locked_error(sb, group, 0,
4427 ext4_group_first_block_no(sb, group) +
4428 EXT4_C2B(sbi, cluster),
4429 "Block already on to-be-freed list");
4430 return 0;
4431 }
4432 }
4433
4434 rb_link_node(new_node, parent, n);
4435 rb_insert_color(new_node, &db->bb_free_root);
4436
4437
4438 node = rb_prev(new_node);
4439 if (node) {
4440 entry = rb_entry(node, struct ext4_free_data, efd_node);
4441 if (can_merge(entry, new_entry)) {
4442 new_entry->efd_start_cluster = entry->efd_start_cluster;
4443 new_entry->efd_count += entry->efd_count;
4444 rb_erase(node, &(db->bb_free_root));
4445 ext4_journal_callback_del(handle, &entry->efd_jce);
4446 kmem_cache_free(ext4_free_data_cachep, entry);
4447 }
4448 }
4449
4450 node = rb_next(new_node);
4451 if (node) {
4452 entry = rb_entry(node, struct ext4_free_data, efd_node);
4453 if (can_merge(new_entry, entry)) {
4454 new_entry->efd_count += entry->efd_count;
4455 rb_erase(node, &(db->bb_free_root));
4456 ext4_journal_callback_del(handle, &entry->efd_jce);
4457 kmem_cache_free(ext4_free_data_cachep, entry);
4458 }
4459 }
4460
4461 ext4_journal_callback_add(handle, ext4_free_data_callback,
4462 &new_entry->efd_jce);
4463 return 0;
4464}
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474void ext4_free_blocks(handle_t *handle, struct inode *inode,
4475 struct buffer_head *bh, ext4_fsblk_t block,
4476 unsigned long count, int flags)
4477{
4478 struct buffer_head *bitmap_bh = NULL;
4479 struct super_block *sb = inode->i_sb;
4480 struct ext4_group_desc *gdp;
4481 unsigned long freed = 0;
4482 unsigned int overflow;
4483 ext4_grpblk_t bit;
4484 struct buffer_head *gd_bh;
4485 ext4_group_t block_group;
4486 struct ext4_sb_info *sbi;
4487 struct ext4_buddy e4b;
4488 unsigned int count_clusters;
4489 int err = 0;
4490 int ret;
4491
4492 if (bh) {
4493 if (block)
4494 BUG_ON(block != bh->b_blocknr);
4495 else
4496 block = bh->b_blocknr;
4497 }
4498
4499 sbi = EXT4_SB(sb);
4500 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4501 !ext4_data_block_valid(sbi, block, count)) {
4502 ext4_error(sb, "Freeing blocks not in datazone - "
4503 "block = %llu, count = %lu", block, count);
4504 goto error_return;
4505 }
4506
4507 ext4_debug("freeing block %llu\n", block);
4508 trace_ext4_free_blocks(inode, block, count, flags);
4509
4510 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4511 struct buffer_head *tbh = bh;
4512 int i;
4513
4514 BUG_ON(bh && (count > 1));
4515
4516 for (i = 0; i < count; i++) {
4517 if (!bh)
4518 tbh = sb_find_get_block(inode->i_sb,
4519 block + i);
4520 if (unlikely(!tbh))
4521 continue;
4522 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4523 inode, tbh, block + i);
4524 }
4525 }
4526
4527
4528
4529
4530
4531
4532
4533
4534 if (!ext4_should_writeback_data(inode))
4535 flags |= EXT4_FREE_BLOCKS_METADATA;
4536
4537
4538
4539
4540
4541
4542
4543
4544 overflow = block & (sbi->s_cluster_ratio - 1);
4545 if (overflow) {
4546 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4547 overflow = sbi->s_cluster_ratio - overflow;
4548 block += overflow;
4549 if (count > overflow)
4550 count -= overflow;
4551 else
4552 return;
4553 } else {
4554 block -= overflow;
4555 count += overflow;
4556 }
4557 }
4558 overflow = count & (sbi->s_cluster_ratio - 1);
4559 if (overflow) {
4560 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4561 if (count > overflow)
4562 count -= overflow;
4563 else
4564 return;
4565 } else
4566 count += sbi->s_cluster_ratio - overflow;
4567 }
4568
4569do_more:
4570 overflow = 0;
4571 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4572
4573
4574
4575
4576
4577 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4578 overflow = EXT4_C2B(sbi, bit) + count -
4579 EXT4_BLOCKS_PER_GROUP(sb);
4580 count -= overflow;
4581 }
4582 count_clusters = EXT4_B2C(sbi, count);
4583 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4584 if (!bitmap_bh) {
4585 err = -EIO;
4586 goto error_return;
4587 }
4588 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4589 if (!gdp) {
4590 err = -EIO;
4591 goto error_return;
4592 }
4593
4594 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4595 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4596 in_range(block, ext4_inode_table(sb, gdp),
4597 EXT4_SB(sb)->s_itb_per_group) ||
4598 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4599 EXT4_SB(sb)->s_itb_per_group)) {
4600
4601 ext4_error(sb, "Freeing blocks in system zone - "
4602 "Block = %llu, count = %lu", block, count);
4603
4604 goto error_return;
4605 }
4606
4607 BUFFER_TRACE(bitmap_bh, "getting write access");
4608 err = ext4_journal_get_write_access(handle, bitmap_bh);
4609 if (err)
4610 goto error_return;
4611
4612
4613
4614
4615
4616
4617 BUFFER_TRACE(gd_bh, "get_write_access");
4618 err = ext4_journal_get_write_access(handle, gd_bh);
4619 if (err)
4620 goto error_return;
4621#ifdef AGGRESSIVE_CHECK
4622 {
4623 int i;
4624 for (i = 0; i < count_clusters; i++)
4625 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4626 }
4627#endif
4628 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4629
4630 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4631 if (err)
4632 goto error_return;
4633
4634 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4635 struct ext4_free_data *new_entry;
4636
4637
4638
4639
4640 new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
4641 if (!new_entry) {
4642 ext4_mb_unload_buddy(&e4b);
4643 err = -ENOMEM;
4644 goto error_return;
4645 }
4646 new_entry->efd_start_cluster = bit;
4647 new_entry->efd_group = block_group;
4648 new_entry->efd_count = count_clusters;
4649 new_entry->efd_tid = handle->h_transaction->t_tid;
4650
4651 ext4_lock_group(sb, block_group);
4652 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4653 ext4_mb_free_metadata(handle, &e4b, new_entry);
4654 } else {
4655
4656
4657
4658
4659 if (test_opt(sb, DISCARD))
4660 ext4_issue_discard(sb, block_group, bit, count);
4661 ext4_lock_group(sb, block_group);
4662 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4663 mb_free_blocks(inode, &e4b, bit, count_clusters);
4664 }
4665
4666 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4667 ext4_free_group_clusters_set(sb, gdp, ret);
4668 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4669 ext4_group_desc_csum_set(sb, block_group, gdp);
4670 ext4_unlock_group(sb, block_group);
4671 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4672
4673 if (sbi->s_log_groups_per_flex) {
4674 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4675 atomic_add(count_clusters,
4676 &sbi->s_flex_groups[flex_group].free_clusters);
4677 }
4678
4679 ext4_mb_unload_buddy(&e4b);
4680
4681 freed += count;
4682
4683 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4684 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4685
4686
4687 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4688 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4689
4690
4691 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4692 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4693 if (!err)
4694 err = ret;
4695
4696 if (overflow && !err) {
4697 block += count;
4698 count = overflow;
4699 put_bh(bitmap_bh);
4700 goto do_more;
4701 }
4702error_return:
4703 brelse(bitmap_bh);
4704 ext4_std_error(sb, err);
4705 return;
4706}
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4718 ext4_fsblk_t block, unsigned long count)
4719{
4720 struct buffer_head *bitmap_bh = NULL;
4721 struct buffer_head *gd_bh;
4722 ext4_group_t block_group;
4723 ext4_grpblk_t bit;
4724 unsigned int i;
4725 struct ext4_group_desc *desc;
4726 struct ext4_sb_info *sbi = EXT4_SB(sb);
4727 struct ext4_buddy e4b;
4728 int err = 0, ret, blk_free_count;
4729 ext4_grpblk_t blocks_freed;
4730
4731 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
4732
4733 if (count == 0)
4734 return 0;
4735
4736 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4737
4738
4739
4740
4741 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4742 ext4_warning(sb, "too much blocks added to group %u\n",
4743 block_group);
4744 err = -EINVAL;
4745 goto error_return;
4746 }
4747
4748 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4749 if (!bitmap_bh) {
4750 err = -EIO;
4751 goto error_return;
4752 }
4753
4754 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
4755 if (!desc) {
4756 err = -EIO;
4757 goto error_return;
4758 }
4759
4760 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
4761 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
4762 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
4763 in_range(block + count - 1, ext4_inode_table(sb, desc),
4764 sbi->s_itb_per_group)) {
4765 ext4_error(sb, "Adding blocks in system zones - "
4766 "Block = %llu, count = %lu",
4767 block, count);
4768 err = -EINVAL;
4769 goto error_return;
4770 }
4771
4772 BUFFER_TRACE(bitmap_bh, "getting write access");
4773 err = ext4_journal_get_write_access(handle, bitmap_bh);
4774 if (err)
4775 goto error_return;
4776
4777
4778
4779
4780
4781
4782 BUFFER_TRACE(gd_bh, "get_write_access");
4783 err = ext4_journal_get_write_access(handle, gd_bh);
4784 if (err)
4785 goto error_return;
4786
4787 for (i = 0, blocks_freed = 0; i < count; i++) {
4788 BUFFER_TRACE(bitmap_bh, "clear bit");
4789 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
4790 ext4_error(sb, "bit already cleared for block %llu",
4791 (ext4_fsblk_t)(block + i));
4792 BUFFER_TRACE(bitmap_bh, "bit already cleared");
4793 } else {
4794 blocks_freed++;
4795 }
4796 }
4797
4798 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4799 if (err)
4800 goto error_return;
4801
4802
4803
4804
4805
4806
4807 ext4_lock_group(sb, block_group);
4808 mb_clear_bits(bitmap_bh->b_data, bit, count);
4809 mb_free_blocks(NULL, &e4b, bit, count);
4810 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
4811 ext4_free_group_clusters_set(sb, desc, blk_free_count);
4812 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
4813 ext4_group_desc_csum_set(sb, block_group, desc);
4814 ext4_unlock_group(sb, block_group);
4815 percpu_counter_add(&sbi->s_freeclusters_counter,
4816 EXT4_B2C(sbi, blocks_freed));
4817
4818 if (sbi->s_log_groups_per_flex) {
4819 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4820 atomic_add(EXT4_B2C(sbi, blocks_freed),
4821 &sbi->s_flex_groups[flex_group].free_clusters);
4822 }
4823
4824 ext4_mb_unload_buddy(&e4b);
4825
4826
4827 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4828 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4829
4830
4831 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4832 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4833 if (!err)
4834 err = ret;
4835
4836error_return:
4837 brelse(bitmap_bh);
4838 ext4_std_error(sb, err);
4839 return err;
4840}
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854static void ext4_trim_extent(struct super_block *sb, int start, int count,
4855 ext4_group_t group, struct ext4_buddy *e4b)
4856{
4857 struct ext4_free_extent ex;
4858
4859 trace_ext4_trim_extent(sb, group, start, count);
4860
4861 assert_spin_locked(ext4_group_lock_ptr(sb, group));
4862
4863 ex.fe_start = start;
4864 ex.fe_group = group;
4865 ex.fe_len = count;
4866
4867
4868
4869
4870
4871 mb_mark_used(e4b, &ex);
4872 ext4_unlock_group(sb, group);
4873 ext4_issue_discard(sb, group, start, count);
4874 ext4_lock_group(sb, group);
4875 mb_free_blocks(NULL, e4b, start, ex.fe_len);
4876}
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896static ext4_grpblk_t
4897ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4898 ext4_grpblk_t start, ext4_grpblk_t max,
4899 ext4_grpblk_t minblocks)
4900{
4901 void *bitmap;
4902 ext4_grpblk_t next, count = 0, free_count = 0;
4903 struct ext4_buddy e4b;
4904 int ret;
4905
4906 trace_ext4_trim_all_free(sb, group, start, max);
4907
4908 ret = ext4_mb_load_buddy(sb, group, &e4b);
4909 if (ret) {
4910 ext4_error(sb, "Error in loading buddy "
4911 "information for %u", group);
4912 return ret;
4913 }
4914 bitmap = e4b.bd_bitmap;
4915
4916 ext4_lock_group(sb, group);
4917 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
4918 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
4919 goto out;
4920
4921 start = (e4b.bd_info->bb_first_free > start) ?
4922 e4b.bd_info->bb_first_free : start;
4923
4924 while (start <= max) {
4925 start = mb_find_next_zero_bit(bitmap, max + 1, start);
4926 if (start > max)
4927 break;
4928 next = mb_find_next_bit(bitmap, max + 1, start);
4929
4930 if ((next - start) >= minblocks) {
4931 ext4_trim_extent(sb, start,
4932 next - start, group, &e4b);
4933 count += next - start;
4934 }
4935 free_count += next - start;
4936 start = next + 1;
4937
4938 if (fatal_signal_pending(current)) {
4939 count = -ERESTARTSYS;
4940 break;
4941 }
4942
4943 if (need_resched()) {
4944 ext4_unlock_group(sb, group);
4945 cond_resched();
4946 ext4_lock_group(sb, group);
4947 }
4948
4949 if ((e4b.bd_info->bb_free - free_count) < minblocks)
4950 break;
4951 }
4952
4953 if (!ret)
4954 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
4955out:
4956 ext4_unlock_group(sb, group);
4957 ext4_mb_unload_buddy(&e4b);
4958
4959 ext4_debug("trimmed %d blocks in the group %d\n",
4960 count, group);
4961
4962 return count;
4963}
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4978{
4979 struct ext4_group_info *grp;
4980 ext4_group_t group, first_group, last_group;
4981 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
4982 uint64_t start, end, minlen, trimmed = 0;
4983 ext4_fsblk_t first_data_blk =
4984 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
4985 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
4986 int ret = 0;
4987
4988 start = range->start >> sb->s_blocksize_bits;
4989 end = start + (range->len >> sb->s_blocksize_bits) - 1;
4990 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
4991 range->minlen >> sb->s_blocksize_bits);
4992
4993 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
4994 start >= max_blks ||
4995 range->len < sb->s_blocksize)
4996 return -EINVAL;
4997 if (end >= max_blks)
4998 end = max_blks - 1;
4999 if (end <= first_data_blk)
5000 goto out;
5001 if (start < first_data_blk)
5002 start = first_data_blk;
5003
5004
5005 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
5006 &first_group, &first_cluster);
5007 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
5008 &last_group, &last_cluster);
5009
5010
5011 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5012
5013 for (group = first_group; group <= last_group; group++) {
5014 grp = ext4_get_group_info(sb, group);
5015
5016 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
5017 ret = ext4_mb_init_group(sb, group);
5018 if (ret)
5019 break;
5020 }
5021
5022
5023
5024
5025
5026
5027
5028 if (group == last_group)
5029 end = last_cluster;
5030
5031 if (grp->bb_free >= minlen) {
5032 cnt = ext4_trim_all_free(sb, group, first_cluster,
5033 end, minlen);
5034 if (cnt < 0) {
5035 ret = cnt;
5036 break;
5037 }
5038 trimmed += cnt;
5039 }
5040
5041
5042
5043
5044
5045 first_cluster = 0;
5046 }
5047
5048 if (!ret)
5049 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5050
5051out:
5052 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
5053 return ret;
5054}
5055