1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include "ext4_jbd2.h"
25#include "mballoc.h"
26#include <linux/debugfs.h>
27#include <linux/slab.h>
28#include <trace/events/ext4.h>
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341static struct kmem_cache *ext4_pspace_cachep;
342static struct kmem_cache *ext4_ac_cachep;
343static struct kmem_cache *ext4_free_data_cachep;
344
345
346
347
348#define NR_GRPINFO_CACHES 8
349static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
350
351static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
352 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
353 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
354 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
355};
356
357static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
358 ext4_group_t group);
359static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
360 ext4_group_t group);
361static void ext4_free_data_callback(struct super_block *sb,
362 struct ext4_journal_cb_entry *jce, int rc);
363
364static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
365{
366#if BITS_PER_LONG == 64
367 *bit += ((unsigned long) addr & 7UL) << 3;
368 addr = (void *) ((unsigned long) addr & ~7UL);
369#elif BITS_PER_LONG == 32
370 *bit += ((unsigned long) addr & 3UL) << 3;
371 addr = (void *) ((unsigned long) addr & ~3UL);
372#else
373#error "how many bits you are?!"
374#endif
375 return addr;
376}
377
378static inline int mb_test_bit(int bit, void *addr)
379{
380
381
382
383
384 addr = mb_correct_addr_and_bit(&bit, addr);
385 return ext4_test_bit(bit, addr);
386}
387
388static inline void mb_set_bit(int bit, void *addr)
389{
390 addr = mb_correct_addr_and_bit(&bit, addr);
391 ext4_set_bit(bit, addr);
392}
393
394static inline void mb_clear_bit(int bit, void *addr)
395{
396 addr = mb_correct_addr_and_bit(&bit, addr);
397 ext4_clear_bit(bit, addr);
398}
399
400static inline int mb_find_next_zero_bit(void *addr, int max, int start)
401{
402 int fix = 0, ret, tmpmax;
403 addr = mb_correct_addr_and_bit(&fix, addr);
404 tmpmax = max + fix;
405 start += fix;
406
407 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
408 if (ret > max)
409 return max;
410 return ret;
411}
412
413static inline int mb_find_next_bit(void *addr, int max, int start)
414{
415 int fix = 0, ret, tmpmax;
416 addr = mb_correct_addr_and_bit(&fix, addr);
417 tmpmax = max + fix;
418 start += fix;
419
420 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
421 if (ret > max)
422 return max;
423 return ret;
424}
425
426static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
427{
428 char *bb;
429
430 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
431 BUG_ON(max == NULL);
432
433 if (order > e4b->bd_blkbits + 1) {
434 *max = 0;
435 return NULL;
436 }
437
438
439 if (order == 0) {
440 *max = 1 << (e4b->bd_blkbits + 3);
441 return e4b->bd_bitmap;
442 }
443
444 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
445 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
446
447 return bb;
448}
449
450#ifdef DOUBLE_CHECK
451static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
452 int first, int count)
453{
454 int i;
455 struct super_block *sb = e4b->bd_sb;
456
457 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
458 return;
459 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
460 for (i = 0; i < count; i++) {
461 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
462 ext4_fsblk_t blocknr;
463
464 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
465 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
466 ext4_grp_locked_error(sb, e4b->bd_group,
467 inode ? inode->i_ino : 0,
468 blocknr,
469 "freeing block already freed "
470 "(bit %u)",
471 first + i);
472 }
473 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
474 }
475}
476
477static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
478{
479 int i;
480
481 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
482 return;
483 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
484 for (i = 0; i < count; i++) {
485 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
486 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
487 }
488}
489
490static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
491{
492 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
493 unsigned char *b1, *b2;
494 int i;
495 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
496 b2 = (unsigned char *) bitmap;
497 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
498 if (b1[i] != b2[i]) {
499 ext4_msg(e4b->bd_sb, KERN_ERR,
500 "corruption in group %u "
501 "at byte %u(%u): %x in copy != %x "
502 "on disk/prealloc",
503 e4b->bd_group, i, i * 8, b1[i], b2[i]);
504 BUG();
505 }
506 }
507 }
508}
509
510#else
511static inline void mb_free_blocks_double(struct inode *inode,
512 struct ext4_buddy *e4b, int first, int count)
513{
514 return;
515}
516static inline void mb_mark_used_double(struct ext4_buddy *e4b,
517 int first, int count)
518{
519 return;
520}
521static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
522{
523 return;
524}
525#endif
526
527#ifdef AGGRESSIVE_CHECK
528
529#define MB_CHECK_ASSERT(assert) \
530do { \
531 if (!(assert)) { \
532 printk(KERN_EMERG \
533 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
534 function, file, line, # assert); \
535 BUG(); \
536 } \
537} while (0)
538
539static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
540 const char *function, int line)
541{
542 struct super_block *sb = e4b->bd_sb;
543 int order = e4b->bd_blkbits + 1;
544 int max;
545 int max2;
546 int i;
547 int j;
548 int k;
549 int count;
550 struct ext4_group_info *grp;
551 int fragments = 0;
552 int fstart;
553 struct list_head *cur;
554 void *buddy;
555 void *buddy2;
556
557 {
558 static int mb_check_counter;
559 if (mb_check_counter++ % 100 != 0)
560 return 0;
561 }
562
563 while (order > 1) {
564 buddy = mb_find_buddy(e4b, order, &max);
565 MB_CHECK_ASSERT(buddy);
566 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
567 MB_CHECK_ASSERT(buddy2);
568 MB_CHECK_ASSERT(buddy != buddy2);
569 MB_CHECK_ASSERT(max * 2 == max2);
570
571 count = 0;
572 for (i = 0; i < max; i++) {
573
574 if (mb_test_bit(i, buddy)) {
575
576 if (!mb_test_bit(i << 1, buddy2)) {
577 MB_CHECK_ASSERT(
578 mb_test_bit((i<<1)+1, buddy2));
579 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
580 MB_CHECK_ASSERT(
581 mb_test_bit(i << 1, buddy2));
582 }
583 continue;
584 }
585
586
587 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
588 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
589
590 for (j = 0; j < (1 << order); j++) {
591 k = (i * (1 << order)) + j;
592 MB_CHECK_ASSERT(
593 !mb_test_bit(k, e4b->bd_bitmap));
594 }
595 count++;
596 }
597 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
598 order--;
599 }
600
601 fstart = -1;
602 buddy = mb_find_buddy(e4b, 0, &max);
603 for (i = 0; i < max; i++) {
604 if (!mb_test_bit(i, buddy)) {
605 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
606 if (fstart == -1) {
607 fragments++;
608 fstart = i;
609 }
610 continue;
611 }
612 fstart = -1;
613
614 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
615 buddy2 = mb_find_buddy(e4b, j, &max2);
616 k = i >> j;
617 MB_CHECK_ASSERT(k < max2);
618 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
619 }
620 }
621 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
622 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
623
624 grp = ext4_get_group_info(sb, e4b->bd_group);
625 list_for_each(cur, &grp->bb_prealloc_list) {
626 ext4_group_t groupnr;
627 struct ext4_prealloc_space *pa;
628 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
629 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
630 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
631 for (i = 0; i < pa->pa_len; i++)
632 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
633 }
634 return 0;
635}
636#undef MB_CHECK_ASSERT
637#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
638 __FILE__, __func__, __LINE__)
639#else
640#define mb_check_buddy(e4b)
641#endif
642
643
644
645
646
647
648
649static void ext4_mb_mark_free_simple(struct super_block *sb,
650 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
651 struct ext4_group_info *grp)
652{
653 struct ext4_sb_info *sbi = EXT4_SB(sb);
654 ext4_grpblk_t min;
655 ext4_grpblk_t max;
656 ext4_grpblk_t chunk;
657 unsigned short border;
658
659 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
660
661 border = 2 << sb->s_blocksize_bits;
662
663 while (len > 0) {
664
665 max = ffs(first | border) - 1;
666
667
668 min = fls(len) - 1;
669
670 if (max < min)
671 min = max;
672 chunk = 1 << min;
673
674
675 grp->bb_counters[min]++;
676 if (min > 0)
677 mb_clear_bit(first >> min,
678 buddy + sbi->s_mb_offsets[min]);
679
680 len -= chunk;
681 first += chunk;
682 }
683}
684
685
686
687
688
689static void
690mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
691{
692 int i;
693 int bits;
694
695 grp->bb_largest_free_order = -1;
696
697 bits = sb->s_blocksize_bits + 1;
698 for (i = bits; i >= 0; i--) {
699 if (grp->bb_counters[i] > 0) {
700 grp->bb_largest_free_order = i;
701 break;
702 }
703 }
704}
705
706static noinline_for_stack
707void ext4_mb_generate_buddy(struct super_block *sb,
708 void *buddy, void *bitmap, ext4_group_t group)
709{
710 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
711 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
712 ext4_grpblk_t i = 0;
713 ext4_grpblk_t first;
714 ext4_grpblk_t len;
715 unsigned free = 0;
716 unsigned fragments = 0;
717 unsigned long long period = get_cycles();
718
719
720
721 i = mb_find_next_zero_bit(bitmap, max, 0);
722 grp->bb_first_free = i;
723 while (i < max) {
724 fragments++;
725 first = i;
726 i = mb_find_next_bit(bitmap, max, i);
727 len = i - first;
728 free += len;
729 if (len > 1)
730 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
731 else
732 grp->bb_counters[0]++;
733 if (i < max)
734 i = mb_find_next_zero_bit(bitmap, max, i);
735 }
736 grp->bb_fragments = fragments;
737
738 if (free != grp->bb_free) {
739 ext4_grp_locked_error(sb, group, 0, 0,
740 "%u clusters in bitmap, %u in gd",
741 free, grp->bb_free);
742
743
744
745
746 grp->bb_free = free;
747 }
748 mb_set_largest_free_order(sb, grp);
749
750 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
751
752 period = get_cycles() - period;
753 spin_lock(&EXT4_SB(sb)->s_bal_lock);
754 EXT4_SB(sb)->s_mb_buddies_generated++;
755 EXT4_SB(sb)->s_mb_generation_time += period;
756 spin_unlock(&EXT4_SB(sb)->s_bal_lock);
757}
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779static int ext4_mb_init_cache(struct page *page, char *incore)
780{
781 ext4_group_t ngroups;
782 int blocksize;
783 int blocks_per_page;
784 int groups_per_page;
785 int err = 0;
786 int i;
787 ext4_group_t first_group, group;
788 int first_block;
789 struct super_block *sb;
790 struct buffer_head *bhs;
791 struct buffer_head **bh = NULL;
792 struct inode *inode;
793 char *data;
794 char *bitmap;
795 struct ext4_group_info *grinfo;
796
797 mb_debug(1, "init page %lu\n", page->index);
798
799 inode = page->mapping->host;
800 sb = inode->i_sb;
801 ngroups = ext4_get_groups_count(sb);
802 blocksize = 1 << inode->i_blkbits;
803 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
804
805 groups_per_page = blocks_per_page >> 1;
806 if (groups_per_page == 0)
807 groups_per_page = 1;
808
809
810 if (groups_per_page > 1) {
811 i = sizeof(struct buffer_head *) * groups_per_page;
812 bh = kzalloc(i, GFP_NOFS);
813 if (bh == NULL) {
814 err = -ENOMEM;
815 goto out;
816 }
817 } else
818 bh = &bhs;
819
820 first_group = page->index * blocks_per_page / 2;
821
822
823 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
824 if (group >= ngroups)
825 break;
826
827 grinfo = ext4_get_group_info(sb, group);
828
829
830
831
832
833
834 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
835 bh[i] = NULL;
836 continue;
837 }
838 if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
839 err = -ENOMEM;
840 goto out;
841 }
842 mb_debug(1, "read bitmap for group %u\n", group);
843 }
844
845
846 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
847 if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
848 err = -EIO;
849 goto out;
850 }
851 }
852
853 first_block = page->index * blocks_per_page;
854 for (i = 0; i < blocks_per_page; i++) {
855 int group;
856
857 group = (first_block + i) >> 1;
858 if (group >= ngroups)
859 break;
860
861 if (!bh[group - first_group])
862
863 continue;
864
865
866
867
868
869
870
871 data = page_address(page) + (i * blocksize);
872 bitmap = bh[group - first_group]->b_data;
873
874
875
876
877
878 if ((first_block + i) & 1) {
879
880 BUG_ON(incore == NULL);
881 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
882 group, page->index, i * blocksize);
883 trace_ext4_mb_buddy_bitmap_load(sb, group);
884 grinfo = ext4_get_group_info(sb, group);
885 grinfo->bb_fragments = 0;
886 memset(grinfo->bb_counters, 0,
887 sizeof(*grinfo->bb_counters) *
888 (sb->s_blocksize_bits+2));
889
890
891
892 ext4_lock_group(sb, group);
893
894 memset(data, 0xff, blocksize);
895 ext4_mb_generate_buddy(sb, data, incore, group);
896 ext4_unlock_group(sb, group);
897 incore = NULL;
898 } else {
899
900 BUG_ON(incore != NULL);
901 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
902 group, page->index, i * blocksize);
903 trace_ext4_mb_bitmap_load(sb, group);
904
905
906 ext4_lock_group(sb, group);
907 memcpy(data, bitmap, blocksize);
908
909
910 ext4_mb_generate_from_pa(sb, data, group);
911 ext4_mb_generate_from_freelist(sb, data, group);
912 ext4_unlock_group(sb, group);
913
914
915
916
917 incore = data;
918 }
919 }
920 SetPageUptodate(page);
921
922out:
923 if (bh) {
924 for (i = 0; i < groups_per_page; i++)
925 brelse(bh[i]);
926 if (bh != &bhs)
927 kfree(bh);
928 }
929 return err;
930}
931
932
933
934
935
936
937
938static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
939 ext4_group_t group, struct ext4_buddy *e4b)
940{
941 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
942 int block, pnum, poff;
943 int blocks_per_page;
944 struct page *page;
945
946 e4b->bd_buddy_page = NULL;
947 e4b->bd_bitmap_page = NULL;
948
949 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
950
951
952
953
954
955 block = group * 2;
956 pnum = block / blocks_per_page;
957 poff = block % blocks_per_page;
958 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
959 if (!page)
960 return -EIO;
961 BUG_ON(page->mapping != inode->i_mapping);
962 e4b->bd_bitmap_page = page;
963 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
964
965 if (blocks_per_page >= 2) {
966
967 return 0;
968 }
969
970 block++;
971 pnum = block / blocks_per_page;
972 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
973 if (!page)
974 return -EIO;
975 BUG_ON(page->mapping != inode->i_mapping);
976 e4b->bd_buddy_page = page;
977 return 0;
978}
979
980static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
981{
982 if (e4b->bd_bitmap_page) {
983 unlock_page(e4b->bd_bitmap_page);
984 page_cache_release(e4b->bd_bitmap_page);
985 }
986 if (e4b->bd_buddy_page) {
987 unlock_page(e4b->bd_buddy_page);
988 page_cache_release(e4b->bd_buddy_page);
989 }
990}
991
992
993
994
995
996
997static noinline_for_stack
998int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
999{
1000
1001 struct ext4_group_info *this_grp;
1002 struct ext4_buddy e4b;
1003 struct page *page;
1004 int ret = 0;
1005
1006 mb_debug(1, "init group %u\n", group);
1007 this_grp = ext4_get_group_info(sb, group);
1008
1009
1010
1011
1012
1013
1014
1015 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
1016 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1017
1018
1019
1020
1021 goto err;
1022 }
1023
1024 page = e4b.bd_bitmap_page;
1025 ret = ext4_mb_init_cache(page, NULL);
1026 if (ret)
1027 goto err;
1028 if (!PageUptodate(page)) {
1029 ret = -EIO;
1030 goto err;
1031 }
1032 mark_page_accessed(page);
1033
1034 if (e4b.bd_buddy_page == NULL) {
1035
1036
1037
1038
1039
1040 ret = 0;
1041 goto err;
1042 }
1043
1044 page = e4b.bd_buddy_page;
1045 ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
1046 if (ret)
1047 goto err;
1048 if (!PageUptodate(page)) {
1049 ret = -EIO;
1050 goto err;
1051 }
1052 mark_page_accessed(page);
1053err:
1054 ext4_mb_put_buddy_page_lock(&e4b);
1055 return ret;
1056}
1057
1058
1059
1060
1061
1062
1063static noinline_for_stack int
1064ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1065 struct ext4_buddy *e4b)
1066{
1067 int blocks_per_page;
1068 int block;
1069 int pnum;
1070 int poff;
1071 struct page *page;
1072 int ret;
1073 struct ext4_group_info *grp;
1074 struct ext4_sb_info *sbi = EXT4_SB(sb);
1075 struct inode *inode = sbi->s_buddy_cache;
1076
1077 mb_debug(1, "load group %u\n", group);
1078
1079 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1080 grp = ext4_get_group_info(sb, group);
1081
1082 e4b->bd_blkbits = sb->s_blocksize_bits;
1083 e4b->bd_info = grp;
1084 e4b->bd_sb = sb;
1085 e4b->bd_group = group;
1086 e4b->bd_buddy_page = NULL;
1087 e4b->bd_bitmap_page = NULL;
1088
1089 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1090
1091
1092
1093
1094 ret = ext4_mb_init_group(sb, group);
1095 if (ret)
1096 return ret;
1097 }
1098
1099
1100
1101
1102
1103
1104 block = group * 2;
1105 pnum = block / blocks_per_page;
1106 poff = block % blocks_per_page;
1107
1108
1109
1110 page = find_get_page(inode->i_mapping, pnum);
1111 if (page == NULL || !PageUptodate(page)) {
1112 if (page)
1113
1114
1115
1116
1117
1118
1119
1120
1121 page_cache_release(page);
1122 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1123 if (page) {
1124 BUG_ON(page->mapping != inode->i_mapping);
1125 if (!PageUptodate(page)) {
1126 ret = ext4_mb_init_cache(page, NULL);
1127 if (ret) {
1128 unlock_page(page);
1129 goto err;
1130 }
1131 mb_cmp_bitmaps(e4b, page_address(page) +
1132 (poff * sb->s_blocksize));
1133 }
1134 unlock_page(page);
1135 }
1136 }
1137 if (page == NULL || !PageUptodate(page)) {
1138 ret = -EIO;
1139 goto err;
1140 }
1141 e4b->bd_bitmap_page = page;
1142 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1143 mark_page_accessed(page);
1144
1145 block++;
1146 pnum = block / blocks_per_page;
1147 poff = block % blocks_per_page;
1148
1149 page = find_get_page(inode->i_mapping, pnum);
1150 if (page == NULL || !PageUptodate(page)) {
1151 if (page)
1152 page_cache_release(page);
1153 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1154 if (page) {
1155 BUG_ON(page->mapping != inode->i_mapping);
1156 if (!PageUptodate(page)) {
1157 ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
1158 if (ret) {
1159 unlock_page(page);
1160 goto err;
1161 }
1162 }
1163 unlock_page(page);
1164 }
1165 }
1166 if (page == NULL || !PageUptodate(page)) {
1167 ret = -EIO;
1168 goto err;
1169 }
1170 e4b->bd_buddy_page = page;
1171 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1172 mark_page_accessed(page);
1173
1174 BUG_ON(e4b->bd_bitmap_page == NULL);
1175 BUG_ON(e4b->bd_buddy_page == NULL);
1176
1177 return 0;
1178
1179err:
1180 if (page)
1181 page_cache_release(page);
1182 if (e4b->bd_bitmap_page)
1183 page_cache_release(e4b->bd_bitmap_page);
1184 if (e4b->bd_buddy_page)
1185 page_cache_release(e4b->bd_buddy_page);
1186 e4b->bd_buddy = NULL;
1187 e4b->bd_bitmap = NULL;
1188 return ret;
1189}
1190
1191static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1192{
1193 if (e4b->bd_bitmap_page)
1194 page_cache_release(e4b->bd_bitmap_page);
1195 if (e4b->bd_buddy_page)
1196 page_cache_release(e4b->bd_buddy_page);
1197}
1198
1199
1200static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1201{
1202 int order = 1;
1203 void *bb;
1204
1205 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1206 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1207
1208 bb = e4b->bd_buddy;
1209 while (order <= e4b->bd_blkbits + 1) {
1210 block = block >> 1;
1211 if (!mb_test_bit(block, bb)) {
1212
1213 return order;
1214 }
1215 bb += 1 << (e4b->bd_blkbits - order);
1216 order++;
1217 }
1218 return 0;
1219}
1220
1221static void mb_clear_bits(void *bm, int cur, int len)
1222{
1223 __u32 *addr;
1224
1225 len = cur + len;
1226 while (cur < len) {
1227 if ((cur & 31) == 0 && (len - cur) >= 32) {
1228
1229 addr = bm + (cur >> 3);
1230 *addr = 0;
1231 cur += 32;
1232 continue;
1233 }
1234 mb_clear_bit(cur, bm);
1235 cur++;
1236 }
1237}
1238
1239void ext4_set_bits(void *bm, int cur, int len)
1240{
1241 __u32 *addr;
1242
1243 len = cur + len;
1244 while (cur < len) {
1245 if ((cur & 31) == 0 && (len - cur) >= 32) {
1246
1247 addr = bm + (cur >> 3);
1248 *addr = 0xffffffff;
1249 cur += 32;
1250 continue;
1251 }
1252 mb_set_bit(cur, bm);
1253 cur++;
1254 }
1255}
1256
1257static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1258 int first, int count)
1259{
1260 int block = 0;
1261 int max = 0;
1262 int order;
1263 void *buddy;
1264 void *buddy2;
1265 struct super_block *sb = e4b->bd_sb;
1266
1267 BUG_ON(first + count > (sb->s_blocksize << 3));
1268 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1269 mb_check_buddy(e4b);
1270 mb_free_blocks_double(inode, e4b, first, count);
1271
1272 e4b->bd_info->bb_free += count;
1273 if (first < e4b->bd_info->bb_first_free)
1274 e4b->bd_info->bb_first_free = first;
1275
1276
1277 if (first != 0)
1278 block = !mb_test_bit(first - 1, e4b->bd_bitmap);
1279 if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
1280 max = !mb_test_bit(first + count, e4b->bd_bitmap);
1281 if (block && max)
1282 e4b->bd_info->bb_fragments--;
1283 else if (!block && !max)
1284 e4b->bd_info->bb_fragments++;
1285
1286
1287 while (count-- > 0) {
1288 block = first++;
1289 order = 0;
1290
1291 if (!mb_test_bit(block, e4b->bd_bitmap)) {
1292 ext4_fsblk_t blocknr;
1293
1294 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1295 blocknr += EXT4_C2B(EXT4_SB(sb), block);
1296 ext4_grp_locked_error(sb, e4b->bd_group,
1297 inode ? inode->i_ino : 0,
1298 blocknr,
1299 "freeing already freed block "
1300 "(bit %u)", block);
1301 }
1302 mb_clear_bit(block, e4b->bd_bitmap);
1303 e4b->bd_info->bb_counters[order]++;
1304
1305
1306 buddy = mb_find_buddy(e4b, order, &max);
1307
1308 do {
1309 block &= ~1UL;
1310 if (mb_test_bit(block, buddy) ||
1311 mb_test_bit(block + 1, buddy))
1312 break;
1313
1314
1315 buddy2 = mb_find_buddy(e4b, order + 1, &max);
1316
1317 if (!buddy2)
1318 break;
1319
1320 if (order > 0) {
1321
1322
1323 mb_set_bit(block, buddy);
1324 mb_set_bit(block + 1, buddy);
1325 }
1326 e4b->bd_info->bb_counters[order]--;
1327 e4b->bd_info->bb_counters[order]--;
1328
1329 block = block >> 1;
1330 order++;
1331 e4b->bd_info->bb_counters[order]++;
1332
1333 mb_clear_bit(block, buddy2);
1334 buddy = buddy2;
1335 } while (1);
1336 }
1337 mb_set_largest_free_order(sb, e4b->bd_info);
1338 mb_check_buddy(e4b);
1339}
1340
1341static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1342 int needed, struct ext4_free_extent *ex)
1343{
1344 int next = block;
1345 int max;
1346 void *buddy;
1347
1348 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1349 BUG_ON(ex == NULL);
1350
1351 buddy = mb_find_buddy(e4b, order, &max);
1352 BUG_ON(buddy == NULL);
1353 BUG_ON(block >= max);
1354 if (mb_test_bit(block, buddy)) {
1355 ex->fe_len = 0;
1356 ex->fe_start = 0;
1357 ex->fe_group = 0;
1358 return 0;
1359 }
1360
1361
1362 if (likely(order == 0)) {
1363
1364 order = mb_find_order_for_block(e4b, block);
1365 block = block >> order;
1366 }
1367
1368 ex->fe_len = 1 << order;
1369 ex->fe_start = block << order;
1370 ex->fe_group = e4b->bd_group;
1371
1372
1373 next = next - ex->fe_start;
1374 ex->fe_len -= next;
1375 ex->fe_start += next;
1376
1377 while (needed > ex->fe_len &&
1378 (buddy = mb_find_buddy(e4b, order, &max))) {
1379
1380 if (block + 1 >= max)
1381 break;
1382
1383 next = (block + 1) * (1 << order);
1384 if (mb_test_bit(next, e4b->bd_bitmap))
1385 break;
1386
1387 order = mb_find_order_for_block(e4b, next);
1388
1389 block = next >> order;
1390 ex->fe_len += 1 << order;
1391 }
1392
1393 BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
1394 return ex->fe_len;
1395}
1396
1397static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1398{
1399 int ord;
1400 int mlen = 0;
1401 int max = 0;
1402 int cur;
1403 int start = ex->fe_start;
1404 int len = ex->fe_len;
1405 unsigned ret = 0;
1406 int len0 = len;
1407 void *buddy;
1408
1409 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1410 BUG_ON(e4b->bd_group != ex->fe_group);
1411 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1412 mb_check_buddy(e4b);
1413 mb_mark_used_double(e4b, start, len);
1414
1415 e4b->bd_info->bb_free -= len;
1416 if (e4b->bd_info->bb_first_free == start)
1417 e4b->bd_info->bb_first_free += len;
1418
1419
1420 if (start != 0)
1421 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1422 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1423 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1424 if (mlen && max)
1425 e4b->bd_info->bb_fragments++;
1426 else if (!mlen && !max)
1427 e4b->bd_info->bb_fragments--;
1428
1429
1430 while (len) {
1431 ord = mb_find_order_for_block(e4b, start);
1432
1433 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1434
1435 mlen = 1 << ord;
1436 buddy = mb_find_buddy(e4b, ord, &max);
1437 BUG_ON((start >> ord) >= max);
1438 mb_set_bit(start >> ord, buddy);
1439 e4b->bd_info->bb_counters[ord]--;
1440 start += mlen;
1441 len -= mlen;
1442 BUG_ON(len < 0);
1443 continue;
1444 }
1445
1446
1447 if (ret == 0)
1448 ret = len | (ord << 16);
1449
1450
1451 BUG_ON(ord <= 0);
1452 buddy = mb_find_buddy(e4b, ord, &max);
1453 mb_set_bit(start >> ord, buddy);
1454 e4b->bd_info->bb_counters[ord]--;
1455
1456 ord--;
1457 cur = (start >> ord) & ~1U;
1458 buddy = mb_find_buddy(e4b, ord, &max);
1459 mb_clear_bit(cur, buddy);
1460 mb_clear_bit(cur + 1, buddy);
1461 e4b->bd_info->bb_counters[ord]++;
1462 e4b->bd_info->bb_counters[ord]++;
1463 }
1464 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1465
1466 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1467 mb_check_buddy(e4b);
1468
1469 return ret;
1470}
1471
1472
1473
1474
1475static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1476 struct ext4_buddy *e4b)
1477{
1478 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1479 int ret;
1480
1481 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1482 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1483
1484 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1485 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1486 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1487
1488
1489
1490 ac->ac_f_ex = ac->ac_b_ex;
1491
1492 ac->ac_status = AC_STATUS_FOUND;
1493 ac->ac_tail = ret & 0xffff;
1494 ac->ac_buddy = ret >> 16;
1495
1496
1497
1498
1499
1500
1501
1502
1503 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1504 get_page(ac->ac_bitmap_page);
1505 ac->ac_buddy_page = e4b->bd_buddy_page;
1506 get_page(ac->ac_buddy_page);
1507
1508 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1509 spin_lock(&sbi->s_md_lock);
1510 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1511 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1512 spin_unlock(&sbi->s_md_lock);
1513 }
1514}
1515
1516
1517
1518
1519
1520static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1521 struct ext4_buddy *e4b,
1522 int finish_group)
1523{
1524 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1525 struct ext4_free_extent *bex = &ac->ac_b_ex;
1526 struct ext4_free_extent *gex = &ac->ac_g_ex;
1527 struct ext4_free_extent ex;
1528 int max;
1529
1530 if (ac->ac_status == AC_STATUS_FOUND)
1531 return;
1532
1533
1534
1535 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1536 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1537 ac->ac_status = AC_STATUS_BREAK;
1538 return;
1539 }
1540
1541
1542
1543
1544 if (bex->fe_len < gex->fe_len)
1545 return;
1546
1547 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1548 && bex->fe_group == e4b->bd_group) {
1549
1550
1551
1552 max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
1553 if (max >= gex->fe_len) {
1554 ext4_mb_use_best_found(ac, e4b);
1555 return;
1556 }
1557 }
1558}
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1571 struct ext4_free_extent *ex,
1572 struct ext4_buddy *e4b)
1573{
1574 struct ext4_free_extent *bex = &ac->ac_b_ex;
1575 struct ext4_free_extent *gex = &ac->ac_g_ex;
1576
1577 BUG_ON(ex->fe_len <= 0);
1578 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1579 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1580 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1581
1582 ac->ac_found++;
1583
1584
1585
1586
1587 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1588 *bex = *ex;
1589 ext4_mb_use_best_found(ac, e4b);
1590 return;
1591 }
1592
1593
1594
1595
1596 if (ex->fe_len == gex->fe_len) {
1597 *bex = *ex;
1598 ext4_mb_use_best_found(ac, e4b);
1599 return;
1600 }
1601
1602
1603
1604
1605 if (bex->fe_len == 0) {
1606 *bex = *ex;
1607 return;
1608 }
1609
1610
1611
1612
1613 if (bex->fe_len < gex->fe_len) {
1614
1615
1616 if (ex->fe_len > bex->fe_len)
1617 *bex = *ex;
1618 } else if (ex->fe_len > gex->fe_len) {
1619
1620
1621
1622 if (ex->fe_len < bex->fe_len)
1623 *bex = *ex;
1624 }
1625
1626 ext4_mb_check_limits(ac, e4b, 0);
1627}
1628
1629static noinline_for_stack
1630int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1631 struct ext4_buddy *e4b)
1632{
1633 struct ext4_free_extent ex = ac->ac_b_ex;
1634 ext4_group_t group = ex.fe_group;
1635 int max;
1636 int err;
1637
1638 BUG_ON(ex.fe_len <= 0);
1639 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1640 if (err)
1641 return err;
1642
1643 ext4_lock_group(ac->ac_sb, group);
1644 max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
1645
1646 if (max > 0) {
1647 ac->ac_b_ex = ex;
1648 ext4_mb_use_best_found(ac, e4b);
1649 }
1650
1651 ext4_unlock_group(ac->ac_sb, group);
1652 ext4_mb_unload_buddy(e4b);
1653
1654 return 0;
1655}
1656
1657static noinline_for_stack
1658int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1659 struct ext4_buddy *e4b)
1660{
1661 ext4_group_t group = ac->ac_g_ex.fe_group;
1662 int max;
1663 int err;
1664 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1665 struct ext4_free_extent ex;
1666
1667 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1668 return 0;
1669
1670 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1671 if (err)
1672 return err;
1673
1674 ext4_lock_group(ac->ac_sb, group);
1675 max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
1676 ac->ac_g_ex.fe_len, &ex);
1677
1678 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1679 ext4_fsblk_t start;
1680
1681 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1682 ex.fe_start;
1683
1684 if (do_div(start, sbi->s_stripe) == 0) {
1685 ac->ac_found++;
1686 ac->ac_b_ex = ex;
1687 ext4_mb_use_best_found(ac, e4b);
1688 }
1689 } else if (max >= ac->ac_g_ex.fe_len) {
1690 BUG_ON(ex.fe_len <= 0);
1691 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1692 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1693 ac->ac_found++;
1694 ac->ac_b_ex = ex;
1695 ext4_mb_use_best_found(ac, e4b);
1696 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1697
1698
1699 BUG_ON(ex.fe_len <= 0);
1700 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1701 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1702 ac->ac_found++;
1703 ac->ac_b_ex = ex;
1704 ext4_mb_use_best_found(ac, e4b);
1705 }
1706 ext4_unlock_group(ac->ac_sb, group);
1707 ext4_mb_unload_buddy(e4b);
1708
1709 return 0;
1710}
1711
1712
1713
1714
1715
1716static noinline_for_stack
1717void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1718 struct ext4_buddy *e4b)
1719{
1720 struct super_block *sb = ac->ac_sb;
1721 struct ext4_group_info *grp = e4b->bd_info;
1722 void *buddy;
1723 int i;
1724 int k;
1725 int max;
1726
1727 BUG_ON(ac->ac_2order <= 0);
1728 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1729 if (grp->bb_counters[i] == 0)
1730 continue;
1731
1732 buddy = mb_find_buddy(e4b, i, &max);
1733 BUG_ON(buddy == NULL);
1734
1735 k = mb_find_next_zero_bit(buddy, max, 0);
1736 BUG_ON(k >= max);
1737
1738 ac->ac_found++;
1739
1740 ac->ac_b_ex.fe_len = 1 << i;
1741 ac->ac_b_ex.fe_start = k << i;
1742 ac->ac_b_ex.fe_group = e4b->bd_group;
1743
1744 ext4_mb_use_best_found(ac, e4b);
1745
1746 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1747
1748 if (EXT4_SB(sb)->s_mb_stats)
1749 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1750
1751 break;
1752 }
1753}
1754
1755
1756
1757
1758
1759
1760static noinline_for_stack
1761void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1762 struct ext4_buddy *e4b)
1763{
1764 struct super_block *sb = ac->ac_sb;
1765 void *bitmap = e4b->bd_bitmap;
1766 struct ext4_free_extent ex;
1767 int i;
1768 int free;
1769
1770 free = e4b->bd_info->bb_free;
1771 BUG_ON(free <= 0);
1772
1773 i = e4b->bd_info->bb_first_free;
1774
1775 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1776 i = mb_find_next_zero_bit(bitmap,
1777 EXT4_CLUSTERS_PER_GROUP(sb), i);
1778 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1779
1780
1781
1782
1783
1784 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1785 "%d free clusters as per "
1786 "group info. But bitmap says 0",
1787 free);
1788 break;
1789 }
1790
1791 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1792 BUG_ON(ex.fe_len <= 0);
1793 if (free < ex.fe_len) {
1794 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1795 "%d free clusters as per "
1796 "group info. But got %d blocks",
1797 free, ex.fe_len);
1798
1799
1800
1801
1802
1803 break;
1804 }
1805
1806 ext4_mb_measure_extent(ac, &ex, e4b);
1807
1808 i += ex.fe_len;
1809 free -= ex.fe_len;
1810 }
1811
1812 ext4_mb_check_limits(ac, e4b, 1);
1813}
1814
1815
1816
1817
1818
1819static noinline_for_stack
1820void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1821 struct ext4_buddy *e4b)
1822{
1823 struct super_block *sb = ac->ac_sb;
1824 struct ext4_sb_info *sbi = EXT4_SB(sb);
1825 void *bitmap = e4b->bd_bitmap;
1826 struct ext4_free_extent ex;
1827 ext4_fsblk_t first_group_block;
1828 ext4_fsblk_t a;
1829 ext4_grpblk_t i;
1830 int max;
1831
1832 BUG_ON(sbi->s_stripe == 0);
1833
1834
1835 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
1836
1837 a = first_group_block + sbi->s_stripe - 1;
1838 do_div(a, sbi->s_stripe);
1839 i = (a * sbi->s_stripe) - first_group_block;
1840
1841 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
1842 if (!mb_test_bit(i, bitmap)) {
1843 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
1844 if (max >= sbi->s_stripe) {
1845 ac->ac_found++;
1846 ac->ac_b_ex = ex;
1847 ext4_mb_use_best_found(ac, e4b);
1848 break;
1849 }
1850 }
1851 i += sbi->s_stripe;
1852 }
1853}
1854
1855
1856static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1857 ext4_group_t group, int cr)
1858{
1859 unsigned free, fragments;
1860 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1861 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1862
1863 BUG_ON(cr < 0 || cr >= 4);
1864
1865
1866 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1867 int ret = ext4_mb_init_group(ac->ac_sb, group);
1868 if (ret)
1869 return 0;
1870 }
1871
1872 free = grp->bb_free;
1873 fragments = grp->bb_fragments;
1874 if (free == 0)
1875 return 0;
1876 if (fragments == 0)
1877 return 0;
1878
1879 switch (cr) {
1880 case 0:
1881 BUG_ON(ac->ac_2order == 0);
1882
1883 if (grp->bb_largest_free_order < ac->ac_2order)
1884 return 0;
1885
1886
1887 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
1888 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
1889 ((group % flex_size) == 0))
1890 return 0;
1891
1892 return 1;
1893 case 1:
1894 if ((free / fragments) >= ac->ac_g_ex.fe_len)
1895 return 1;
1896 break;
1897 case 2:
1898 if (free >= ac->ac_g_ex.fe_len)
1899 return 1;
1900 break;
1901 case 3:
1902 return 1;
1903 default:
1904 BUG();
1905 }
1906
1907 return 0;
1908}
1909
1910static noinline_for_stack int
1911ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1912{
1913 ext4_group_t ngroups, group, i;
1914 int cr;
1915 int err = 0;
1916 struct ext4_sb_info *sbi;
1917 struct super_block *sb;
1918 struct ext4_buddy e4b;
1919
1920 sb = ac->ac_sb;
1921 sbi = EXT4_SB(sb);
1922 ngroups = ext4_get_groups_count(sb);
1923
1924 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
1925 ngroups = sbi->s_blockfile_groups;
1926
1927 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1928
1929
1930 err = ext4_mb_find_by_goal(ac, &e4b);
1931 if (err || ac->ac_status == AC_STATUS_FOUND)
1932 goto out;
1933
1934 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
1935 goto out;
1936
1937
1938
1939
1940
1941
1942 i = fls(ac->ac_g_ex.fe_len);
1943 ac->ac_2order = 0;
1944
1945
1946
1947
1948
1949 if (i >= sbi->s_mb_order2_reqs) {
1950
1951
1952
1953 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
1954 ac->ac_2order = i - 1;
1955 }
1956
1957
1958 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1959
1960 spin_lock(&sbi->s_md_lock);
1961 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
1962 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
1963 spin_unlock(&sbi->s_md_lock);
1964 }
1965
1966
1967 cr = ac->ac_2order ? 0 : 1;
1968
1969
1970
1971
1972repeat:
1973 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
1974 ac->ac_criteria = cr;
1975
1976
1977
1978
1979 group = ac->ac_g_ex.fe_group;
1980
1981 for (i = 0; i < ngroups; group++, i++) {
1982 if (group == ngroups)
1983 group = 0;
1984
1985
1986 if (!ext4_mb_good_group(ac, group, cr))
1987 continue;
1988
1989 err = ext4_mb_load_buddy(sb, group, &e4b);
1990 if (err)
1991 goto out;
1992
1993 ext4_lock_group(sb, group);
1994
1995
1996
1997
1998
1999 if (!ext4_mb_good_group(ac, group, cr)) {
2000 ext4_unlock_group(sb, group);
2001 ext4_mb_unload_buddy(&e4b);
2002 continue;
2003 }
2004
2005 ac->ac_groups_scanned++;
2006 if (cr == 0)
2007 ext4_mb_simple_scan_group(ac, &e4b);
2008 else if (cr == 1 && sbi->s_stripe &&
2009 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2010 ext4_mb_scan_aligned(ac, &e4b);
2011 else
2012 ext4_mb_complex_scan_group(ac, &e4b);
2013
2014 ext4_unlock_group(sb, group);
2015 ext4_mb_unload_buddy(&e4b);
2016
2017 if (ac->ac_status != AC_STATUS_CONTINUE)
2018 break;
2019 }
2020 }
2021
2022 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2023 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2024
2025
2026
2027
2028
2029 ext4_mb_try_best_found(ac, &e4b);
2030 if (ac->ac_status != AC_STATUS_FOUND) {
2031
2032
2033
2034
2035
2036
2037 ac->ac_b_ex.fe_group = 0;
2038 ac->ac_b_ex.fe_start = 0;
2039 ac->ac_b_ex.fe_len = 0;
2040 ac->ac_status = AC_STATUS_CONTINUE;
2041 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2042 cr = 3;
2043 atomic_inc(&sbi->s_mb_lost_chunks);
2044 goto repeat;
2045 }
2046 }
2047out:
2048 return err;
2049}
2050
2051static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2052{
2053 struct super_block *sb = seq->private;
2054 ext4_group_t group;
2055
2056 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2057 return NULL;
2058 group = *pos + 1;
2059 return (void *) ((unsigned long) group);
2060}
2061
2062static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2063{
2064 struct super_block *sb = seq->private;
2065 ext4_group_t group;
2066
2067 ++*pos;
2068 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2069 return NULL;
2070 group = *pos + 1;
2071 return (void *) ((unsigned long) group);
2072}
2073
2074static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2075{
2076 struct super_block *sb = seq->private;
2077 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2078 int i;
2079 int err, buddy_loaded = 0;
2080 struct ext4_buddy e4b;
2081 struct ext4_group_info *grinfo;
2082 struct sg {
2083 struct ext4_group_info info;
2084 ext4_grpblk_t counters[16];
2085 } sg;
2086
2087 group--;
2088 if (group == 0)
2089 seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
2090 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
2091 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
2092 "group", "free", "frags", "first",
2093 "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
2094 "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
2095
2096 i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2097 sizeof(struct ext4_group_info);
2098 grinfo = ext4_get_group_info(sb, group);
2099
2100 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2101 err = ext4_mb_load_buddy(sb, group, &e4b);
2102 if (err) {
2103 seq_printf(seq, "#%-5u: I/O error\n", group);
2104 return 0;
2105 }
2106 buddy_loaded = 1;
2107 }
2108
2109 memcpy(&sg, ext4_get_group_info(sb, group), i);
2110
2111 if (buddy_loaded)
2112 ext4_mb_unload_buddy(&e4b);
2113
2114 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2115 sg.info.bb_fragments, sg.info.bb_first_free);
2116 for (i = 0; i <= 13; i++)
2117 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
2118 sg.info.bb_counters[i] : 0);
2119 seq_printf(seq, " ]\n");
2120
2121 return 0;
2122}
2123
2124static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2125{
2126}
2127
2128static const struct seq_operations ext4_mb_seq_groups_ops = {
2129 .start = ext4_mb_seq_groups_start,
2130 .next = ext4_mb_seq_groups_next,
2131 .stop = ext4_mb_seq_groups_stop,
2132 .show = ext4_mb_seq_groups_show,
2133};
2134
2135static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2136{
2137 struct super_block *sb = PDE(inode)->data;
2138 int rc;
2139
2140 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2141 if (rc == 0) {
2142 struct seq_file *m = file->private_data;
2143 m->private = sb;
2144 }
2145 return rc;
2146
2147}
2148
2149static const struct file_operations ext4_mb_seq_groups_fops = {
2150 .owner = THIS_MODULE,
2151 .open = ext4_mb_seq_groups_open,
2152 .read = seq_read,
2153 .llseek = seq_lseek,
2154 .release = seq_release,
2155};
2156
2157static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2158{
2159 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2160 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2161
2162 BUG_ON(!cachep);
2163 return cachep;
2164}
2165
2166
2167int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2168 struct ext4_group_desc *desc)
2169{
2170 int i;
2171 int metalen = 0;
2172 struct ext4_sb_info *sbi = EXT4_SB(sb);
2173 struct ext4_group_info **meta_group_info;
2174 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2175
2176
2177
2178
2179
2180
2181 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2182 metalen = sizeof(*meta_group_info) <<
2183 EXT4_DESC_PER_BLOCK_BITS(sb);
2184 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2185 if (meta_group_info == NULL) {
2186 ext4_msg(sb, KERN_ERR, "can't allocate mem "
2187 "for a buddy group");
2188 goto exit_meta_group_info;
2189 }
2190 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2191 meta_group_info;
2192 }
2193
2194 meta_group_info =
2195 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2196 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2197
2198 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
2199 if (meta_group_info[i] == NULL) {
2200 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2201 goto exit_group_info;
2202 }
2203 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
2204 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2205 &(meta_group_info[i]->bb_state));
2206
2207
2208
2209
2210
2211 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2212 meta_group_info[i]->bb_free =
2213 ext4_free_clusters_after_init(sb, group, desc);
2214 } else {
2215 meta_group_info[i]->bb_free =
2216 ext4_free_group_clusters(sb, desc);
2217 }
2218
2219 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2220 init_rwsem(&meta_group_info[i]->alloc_sem);
2221 meta_group_info[i]->bb_free_root = RB_ROOT;
2222 meta_group_info[i]->bb_largest_free_order = -1;
2223
2224#ifdef DOUBLE_CHECK
2225 {
2226 struct buffer_head *bh;
2227 meta_group_info[i]->bb_bitmap =
2228 kmalloc(sb->s_blocksize, GFP_KERNEL);
2229 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2230 bh = ext4_read_block_bitmap(sb, group);
2231 BUG_ON(bh == NULL);
2232 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2233 sb->s_blocksize);
2234 put_bh(bh);
2235 }
2236#endif
2237
2238 return 0;
2239
2240exit_group_info:
2241
2242 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2243 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2244 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
2245 }
2246exit_meta_group_info:
2247 return -ENOMEM;
2248}
2249
2250static int ext4_mb_init_backend(struct super_block *sb)
2251{
2252 ext4_group_t ngroups = ext4_get_groups_count(sb);
2253 ext4_group_t i;
2254 struct ext4_sb_info *sbi = EXT4_SB(sb);
2255 struct ext4_super_block *es = sbi->s_es;
2256 int num_meta_group_infos;
2257 int num_meta_group_infos_max;
2258 int array_size;
2259 struct ext4_group_desc *desc;
2260 struct kmem_cache *cachep;
2261
2262
2263 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
2264 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277 num_meta_group_infos_max = num_meta_group_infos +
2278 le16_to_cpu(es->s_reserved_gdt_blocks);
2279
2280
2281
2282
2283
2284
2285
2286 array_size = 1;
2287 while (array_size < sizeof(*sbi->s_group_info) *
2288 num_meta_group_infos_max)
2289 array_size = array_size << 1;
2290
2291
2292
2293 sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
2294 if (sbi->s_group_info == NULL) {
2295 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2296 return -ENOMEM;
2297 }
2298 sbi->s_buddy_cache = new_inode(sb);
2299 if (sbi->s_buddy_cache == NULL) {
2300 ext4_msg(sb, KERN_ERR, "can't get new inode");
2301 goto err_freesgi;
2302 }
2303
2304
2305
2306
2307 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
2308 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2309 for (i = 0; i < ngroups; i++) {
2310 desc = ext4_get_group_desc(sb, i, NULL);
2311 if (desc == NULL) {
2312 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
2313 goto err_freebuddy;
2314 }
2315 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2316 goto err_freebuddy;
2317 }
2318
2319 return 0;
2320
2321err_freebuddy:
2322 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2323 while (i-- > 0)
2324 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2325 i = num_meta_group_infos;
2326 while (i-- > 0)
2327 kfree(sbi->s_group_info[i]);
2328 iput(sbi->s_buddy_cache);
2329err_freesgi:
2330 ext4_kvfree(sbi->s_group_info);
2331 return -ENOMEM;
2332}
2333
2334static void ext4_groupinfo_destroy_slabs(void)
2335{
2336 int i;
2337
2338 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2339 if (ext4_groupinfo_caches[i])
2340 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2341 ext4_groupinfo_caches[i] = NULL;
2342 }
2343}
2344
2345static int ext4_groupinfo_create_slab(size_t size)
2346{
2347 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2348 int slab_size;
2349 int blocksize_bits = order_base_2(size);
2350 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2351 struct kmem_cache *cachep;
2352
2353 if (cache_index >= NR_GRPINFO_CACHES)
2354 return -EINVAL;
2355
2356 if (unlikely(cache_index < 0))
2357 cache_index = 0;
2358
2359 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2360 if (ext4_groupinfo_caches[cache_index]) {
2361 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2362 return 0;
2363 }
2364
2365 slab_size = offsetof(struct ext4_group_info,
2366 bb_counters[blocksize_bits + 2]);
2367
2368 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2369 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2370 NULL);
2371
2372 ext4_groupinfo_caches[cache_index] = cachep;
2373
2374 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2375 if (!cachep) {
2376 printk(KERN_EMERG
2377 "EXT4-fs: no memory for groupinfo slab cache\n");
2378 return -ENOMEM;
2379 }
2380
2381 return 0;
2382}
2383
2384int ext4_mb_init(struct super_block *sb)
2385{
2386 struct ext4_sb_info *sbi = EXT4_SB(sb);
2387 unsigned i, j;
2388 unsigned offset;
2389 unsigned max;
2390 int ret;
2391
2392 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2393
2394 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2395 if (sbi->s_mb_offsets == NULL) {
2396 ret = -ENOMEM;
2397 goto out;
2398 }
2399
2400 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2401 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2402 if (sbi->s_mb_maxs == NULL) {
2403 ret = -ENOMEM;
2404 goto out;
2405 }
2406
2407 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2408 if (ret < 0)
2409 goto out;
2410
2411
2412 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2413 sbi->s_mb_offsets[0] = 0;
2414
2415 i = 1;
2416 offset = 0;
2417 max = sb->s_blocksize << 2;
2418 do {
2419 sbi->s_mb_offsets[i] = offset;
2420 sbi->s_mb_maxs[i] = max;
2421 offset += 1 << (sb->s_blocksize_bits - i);
2422 max = max >> 1;
2423 i++;
2424 } while (i <= sb->s_blocksize_bits + 1);
2425
2426 spin_lock_init(&sbi->s_md_lock);
2427 spin_lock_init(&sbi->s_bal_lock);
2428
2429 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2430 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2431 sbi->s_mb_stats = MB_DEFAULT_STATS;
2432 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2433 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2447 sbi->s_cluster_bits, 32);
2448
2449
2450
2451
2452
2453
2454
2455
2456 if (sbi->s_stripe > 1) {
2457 sbi->s_mb_group_prealloc = roundup(
2458 sbi->s_mb_group_prealloc, sbi->s_stripe);
2459 }
2460
2461 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2462 if (sbi->s_locality_groups == NULL) {
2463 ret = -ENOMEM;
2464 goto out_free_groupinfo_slab;
2465 }
2466 for_each_possible_cpu(i) {
2467 struct ext4_locality_group *lg;
2468 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2469 mutex_init(&lg->lg_mutex);
2470 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2471 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2472 spin_lock_init(&lg->lg_prealloc_lock);
2473 }
2474
2475
2476 ret = ext4_mb_init_backend(sb);
2477 if (ret != 0)
2478 goto out_free_locality_groups;
2479
2480 if (sbi->s_proc)
2481 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2482 &ext4_mb_seq_groups_fops, sb);
2483
2484 return 0;
2485
2486out_free_locality_groups:
2487 free_percpu(sbi->s_locality_groups);
2488 sbi->s_locality_groups = NULL;
2489out_free_groupinfo_slab:
2490 ext4_groupinfo_destroy_slabs();
2491out:
2492 kfree(sbi->s_mb_offsets);
2493 sbi->s_mb_offsets = NULL;
2494 kfree(sbi->s_mb_maxs);
2495 sbi->s_mb_maxs = NULL;
2496 return ret;
2497}
2498
2499
2500static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2501{
2502 struct ext4_prealloc_space *pa;
2503 struct list_head *cur, *tmp;
2504 int count = 0;
2505
2506 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2507 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2508 list_del(&pa->pa_group_list);
2509 count++;
2510 kmem_cache_free(ext4_pspace_cachep, pa);
2511 }
2512 if (count)
2513 mb_debug(1, "mballoc: %u PAs left\n", count);
2514
2515}
2516
2517int ext4_mb_release(struct super_block *sb)
2518{
2519 ext4_group_t ngroups = ext4_get_groups_count(sb);
2520 ext4_group_t i;
2521 int num_meta_group_infos;
2522 struct ext4_group_info *grinfo;
2523 struct ext4_sb_info *sbi = EXT4_SB(sb);
2524 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2525
2526 if (sbi->s_proc)
2527 remove_proc_entry("mb_groups", sbi->s_proc);
2528
2529 if (sbi->s_group_info) {
2530 for (i = 0; i < ngroups; i++) {
2531 grinfo = ext4_get_group_info(sb, i);
2532#ifdef DOUBLE_CHECK
2533 kfree(grinfo->bb_bitmap);
2534#endif
2535 ext4_lock_group(sb, i);
2536 ext4_mb_cleanup_pa(grinfo);
2537 ext4_unlock_group(sb, i);
2538 kmem_cache_free(cachep, grinfo);
2539 }
2540 num_meta_group_infos = (ngroups +
2541 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2542 EXT4_DESC_PER_BLOCK_BITS(sb);
2543 for (i = 0; i < num_meta_group_infos; i++)
2544 kfree(sbi->s_group_info[i]);
2545 ext4_kvfree(sbi->s_group_info);
2546 }
2547 kfree(sbi->s_mb_offsets);
2548 kfree(sbi->s_mb_maxs);
2549 if (sbi->s_buddy_cache)
2550 iput(sbi->s_buddy_cache);
2551 if (sbi->s_mb_stats) {
2552 ext4_msg(sb, KERN_INFO,
2553 "mballoc: %u blocks %u reqs (%u success)",
2554 atomic_read(&sbi->s_bal_allocated),
2555 atomic_read(&sbi->s_bal_reqs),
2556 atomic_read(&sbi->s_bal_success));
2557 ext4_msg(sb, KERN_INFO,
2558 "mballoc: %u extents scanned, %u goal hits, "
2559 "%u 2^N hits, %u breaks, %u lost",
2560 atomic_read(&sbi->s_bal_ex_scanned),
2561 atomic_read(&sbi->s_bal_goals),
2562 atomic_read(&sbi->s_bal_2orders),
2563 atomic_read(&sbi->s_bal_breaks),
2564 atomic_read(&sbi->s_mb_lost_chunks));
2565 ext4_msg(sb, KERN_INFO,
2566 "mballoc: %lu generated and it took %Lu",
2567 sbi->s_mb_buddies_generated,
2568 sbi->s_mb_generation_time);
2569 ext4_msg(sb, KERN_INFO,
2570 "mballoc: %u preallocated, %u discarded",
2571 atomic_read(&sbi->s_mb_preallocated),
2572 atomic_read(&sbi->s_mb_discarded));
2573 }
2574
2575 free_percpu(sbi->s_locality_groups);
2576
2577 return 0;
2578}
2579
2580static inline int ext4_issue_discard(struct super_block *sb,
2581 ext4_group_t block_group, ext4_grpblk_t cluster, int count)
2582{
2583 ext4_fsblk_t discard_block;
2584
2585 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2586 ext4_group_first_block_no(sb, block_group));
2587 count = EXT4_C2B(EXT4_SB(sb), count);
2588 trace_ext4_discard_blocks(sb,
2589 (unsigned long long) discard_block, count);
2590 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2591}
2592
2593
2594
2595
2596
2597static void ext4_free_data_callback(struct super_block *sb,
2598 struct ext4_journal_cb_entry *jce,
2599 int rc)
2600{
2601 struct ext4_free_data *entry = (struct ext4_free_data *)jce;
2602 struct ext4_buddy e4b;
2603 struct ext4_group_info *db;
2604 int err, count = 0, count2 = 0;
2605
2606 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2607 entry->efd_count, entry->efd_group, entry);
2608
2609 if (test_opt(sb, DISCARD))
2610 ext4_issue_discard(sb, entry->efd_group,
2611 entry->efd_start_cluster, entry->efd_count);
2612
2613 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2614
2615 BUG_ON(err != 0);
2616
2617
2618 db = e4b.bd_info;
2619
2620 count += entry->efd_count;
2621 count2++;
2622 ext4_lock_group(sb, entry->efd_group);
2623
2624 rb_erase(&entry->efd_node, &(db->bb_free_root));
2625 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2626
2627
2628
2629
2630
2631
2632
2633 if (!test_opt(sb, DISCARD))
2634 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2635
2636 if (!db->bb_free_root.rb_node) {
2637
2638
2639
2640 page_cache_release(e4b.bd_buddy_page);
2641 page_cache_release(e4b.bd_bitmap_page);
2642 }
2643 ext4_unlock_group(sb, entry->efd_group);
2644 kmem_cache_free(ext4_free_data_cachep, entry);
2645 ext4_mb_unload_buddy(&e4b);
2646
2647 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2648}
2649
2650#ifdef CONFIG_EXT4_DEBUG
2651u8 mb_enable_debug __read_mostly;
2652
2653static struct dentry *debugfs_dir;
2654static struct dentry *debugfs_debug;
2655
2656static void __init ext4_create_debugfs_entry(void)
2657{
2658 debugfs_dir = debugfs_create_dir("ext4", NULL);
2659 if (debugfs_dir)
2660 debugfs_debug = debugfs_create_u8("mballoc-debug",
2661 S_IRUGO | S_IWUSR,
2662 debugfs_dir,
2663 &mb_enable_debug);
2664}
2665
2666static void ext4_remove_debugfs_entry(void)
2667{
2668 debugfs_remove(debugfs_debug);
2669 debugfs_remove(debugfs_dir);
2670}
2671
2672#else
2673
2674static void __init ext4_create_debugfs_entry(void)
2675{
2676}
2677
2678static void ext4_remove_debugfs_entry(void)
2679{
2680}
2681
2682#endif
2683
2684int __init ext4_init_mballoc(void)
2685{
2686 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2687 SLAB_RECLAIM_ACCOUNT);
2688 if (ext4_pspace_cachep == NULL)
2689 return -ENOMEM;
2690
2691 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2692 SLAB_RECLAIM_ACCOUNT);
2693 if (ext4_ac_cachep == NULL) {
2694 kmem_cache_destroy(ext4_pspace_cachep);
2695 return -ENOMEM;
2696 }
2697
2698 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2699 SLAB_RECLAIM_ACCOUNT);
2700 if (ext4_free_data_cachep == NULL) {
2701 kmem_cache_destroy(ext4_pspace_cachep);
2702 kmem_cache_destroy(ext4_ac_cachep);
2703 return -ENOMEM;
2704 }
2705 ext4_create_debugfs_entry();
2706 return 0;
2707}
2708
2709void ext4_exit_mballoc(void)
2710{
2711
2712
2713
2714
2715 rcu_barrier();
2716 kmem_cache_destroy(ext4_pspace_cachep);
2717 kmem_cache_destroy(ext4_ac_cachep);
2718 kmem_cache_destroy(ext4_free_data_cachep);
2719 ext4_groupinfo_destroy_slabs();
2720 ext4_remove_debugfs_entry();
2721}
2722
2723
2724
2725
2726
2727
2728static noinline_for_stack int
2729ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2730 handle_t *handle, unsigned int reserv_clstrs)
2731{
2732 struct buffer_head *bitmap_bh = NULL;
2733 struct ext4_group_desc *gdp;
2734 struct buffer_head *gdp_bh;
2735 struct ext4_sb_info *sbi;
2736 struct super_block *sb;
2737 ext4_fsblk_t block;
2738 int err, len;
2739
2740 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2741 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2742
2743 sb = ac->ac_sb;
2744 sbi = EXT4_SB(sb);
2745
2746 err = -EIO;
2747 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2748 if (!bitmap_bh)
2749 goto out_err;
2750
2751 err = ext4_journal_get_write_access(handle, bitmap_bh);
2752 if (err)
2753 goto out_err;
2754
2755 err = -EIO;
2756 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2757 if (!gdp)
2758 goto out_err;
2759
2760 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2761 ext4_free_group_clusters(sb, gdp));
2762
2763 err = ext4_journal_get_write_access(handle, gdp_bh);
2764 if (err)
2765 goto out_err;
2766
2767 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2768
2769 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2770 if (!ext4_data_block_valid(sbi, block, len)) {
2771 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2772 "fs metadata", block, block+len);
2773
2774
2775
2776
2777 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2778 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2779 ac->ac_b_ex.fe_len);
2780 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2781 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2782 if (!err)
2783 err = -EAGAIN;
2784 goto out_err;
2785 }
2786
2787 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2788#ifdef AGGRESSIVE_CHECK
2789 {
2790 int i;
2791 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
2792 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
2793 bitmap_bh->b_data));
2794 }
2795 }
2796#endif
2797 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2798 ac->ac_b_ex.fe_len);
2799 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2800 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2801 ext4_free_group_clusters_set(sb, gdp,
2802 ext4_free_clusters_after_init(sb,
2803 ac->ac_b_ex.fe_group, gdp));
2804 }
2805 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
2806 ext4_free_group_clusters_set(sb, gdp, len);
2807 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
2808 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
2809
2810 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2811 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
2812
2813
2814
2815 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2816
2817 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
2818 reserv_clstrs);
2819
2820 if (sbi->s_log_groups_per_flex) {
2821 ext4_group_t flex_group = ext4_flex_group(sbi,
2822 ac->ac_b_ex.fe_group);
2823 atomic_sub(ac->ac_b_ex.fe_len,
2824 &sbi->s_flex_groups[flex_group].free_clusters);
2825 }
2826
2827 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2828 if (err)
2829 goto out_err;
2830 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
2831
2832out_err:
2833 brelse(bitmap_bh);
2834 return err;
2835}
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
2847{
2848 struct super_block *sb = ac->ac_sb;
2849 struct ext4_locality_group *lg = ac->ac_lg;
2850
2851 BUG_ON(lg == NULL);
2852 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
2853 mb_debug(1, "#%u: goal %u blocks for locality group\n",
2854 current->pid, ac->ac_g_ex.fe_len);
2855}
2856
2857
2858
2859
2860
2861static noinline_for_stack void
2862ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2863 struct ext4_allocation_request *ar)
2864{
2865 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2866 int bsbits, max;
2867 ext4_lblk_t end;
2868 loff_t size, start_off;
2869 loff_t orig_size __maybe_unused;
2870 ext4_lblk_t start;
2871 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2872 struct ext4_prealloc_space *pa;
2873
2874
2875
2876 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
2877 return;
2878
2879
2880 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2881 return;
2882
2883
2884
2885 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
2886 return;
2887
2888 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
2889 ext4_mb_normalize_group_request(ac);
2890 return ;
2891 }
2892
2893 bsbits = ac->ac_sb->s_blocksize_bits;
2894
2895
2896
2897 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
2898 size = size << bsbits;
2899 if (size < i_size_read(ac->ac_inode))
2900 size = i_size_read(ac->ac_inode);
2901 orig_size = size;
2902
2903
2904 max = 2 << bsbits;
2905
2906#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
2907 (req <= (size) || max <= (chunk_size))
2908
2909
2910
2911 start_off = 0;
2912 if (size <= 16 * 1024) {
2913 size = 16 * 1024;
2914 } else if (size <= 32 * 1024) {
2915 size = 32 * 1024;
2916 } else if (size <= 64 * 1024) {
2917 size = 64 * 1024;
2918 } else if (size <= 128 * 1024) {
2919 size = 128 * 1024;
2920 } else if (size <= 256 * 1024) {
2921 size = 256 * 1024;
2922 } else if (size <= 512 * 1024) {
2923 size = 512 * 1024;
2924 } else if (size <= 1024 * 1024) {
2925 size = 1024 * 1024;
2926 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
2927 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2928 (21 - bsbits)) << 21;
2929 size = 2 * 1024 * 1024;
2930 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
2931 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2932 (22 - bsbits)) << 22;
2933 size = 4 * 1024 * 1024;
2934 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
2935 (8<<20)>>bsbits, max, 8 * 1024)) {
2936 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2937 (23 - bsbits)) << 23;
2938 size = 8 * 1024 * 1024;
2939 } else {
2940 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
2941 size = ac->ac_o_ex.fe_len << bsbits;
2942 }
2943 size = size >> bsbits;
2944 start = start_off >> bsbits;
2945
2946
2947 if (ar->pleft && start <= ar->lleft) {
2948 size -= ar->lleft + 1 - start;
2949 start = ar->lleft + 1;
2950 }
2951 if (ar->pright && start + size - 1 >= ar->lright)
2952 size -= start + size - ar->lright;
2953
2954 end = start + size;
2955
2956
2957 rcu_read_lock();
2958 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
2959 ext4_lblk_t pa_end;
2960
2961 if (pa->pa_deleted)
2962 continue;
2963 spin_lock(&pa->pa_lock);
2964 if (pa->pa_deleted) {
2965 spin_unlock(&pa->pa_lock);
2966 continue;
2967 }
2968
2969 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
2970 pa->pa_len);
2971
2972
2973 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
2974 ac->ac_o_ex.fe_logical < pa->pa_lstart));
2975
2976
2977 if (pa->pa_lstart >= end || pa_end <= start) {
2978 spin_unlock(&pa->pa_lock);
2979 continue;
2980 }
2981 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
2982
2983
2984 if (pa_end <= ac->ac_o_ex.fe_logical) {
2985 BUG_ON(pa_end < start);
2986 start = pa_end;
2987 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
2988 BUG_ON(pa->pa_lstart > end);
2989 end = pa->pa_lstart;
2990 }
2991 spin_unlock(&pa->pa_lock);
2992 }
2993 rcu_read_unlock();
2994 size = end - start;
2995
2996
2997 rcu_read_lock();
2998 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
2999 ext4_lblk_t pa_end;
3000
3001 spin_lock(&pa->pa_lock);
3002 if (pa->pa_deleted == 0) {
3003 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3004 pa->pa_len);
3005 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3006 }
3007 spin_unlock(&pa->pa_lock);
3008 }
3009 rcu_read_unlock();
3010
3011 if (start + size <= ac->ac_o_ex.fe_logical &&
3012 start > ac->ac_o_ex.fe_logical) {
3013 ext4_msg(ac->ac_sb, KERN_ERR,
3014 "start %lu, size %lu, fe_logical %lu",
3015 (unsigned long) start, (unsigned long) size,
3016 (unsigned long) ac->ac_o_ex.fe_logical);
3017 }
3018 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3019 start > ac->ac_o_ex.fe_logical);
3020 BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
3021
3022
3023
3024
3025
3026 ac->ac_g_ex.fe_logical = start;
3027 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3028
3029
3030 if (ar->pright && (ar->lright == (start + size))) {
3031
3032 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3033 &ac->ac_f_ex.fe_group,
3034 &ac->ac_f_ex.fe_start);
3035 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3036 }
3037 if (ar->pleft && (ar->lleft + 1 == start)) {
3038
3039 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3040 &ac->ac_f_ex.fe_group,
3041 &ac->ac_f_ex.fe_start);
3042 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3043 }
3044
3045 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3046 (unsigned) orig_size, (unsigned) start);
3047}
3048
3049static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3050{
3051 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3052
3053 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3054 atomic_inc(&sbi->s_bal_reqs);
3055 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3056 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3057 atomic_inc(&sbi->s_bal_success);
3058 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3059 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3060 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3061 atomic_inc(&sbi->s_bal_goals);
3062 if (ac->ac_found > sbi->s_mb_max_to_scan)
3063 atomic_inc(&sbi->s_bal_breaks);
3064 }
3065
3066 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3067 trace_ext4_mballoc_alloc(ac);
3068 else
3069 trace_ext4_mballoc_prealloc(ac);
3070}
3071
3072
3073
3074
3075
3076
3077
3078static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3079{
3080 struct ext4_prealloc_space *pa = ac->ac_pa;
3081
3082 if (pa && pa->pa_type == MB_INODE_PA)
3083 pa->pa_free += ac->ac_b_ex.fe_len;
3084}
3085
3086
3087
3088
3089static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3090 struct ext4_prealloc_space *pa)
3091{
3092 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3093 ext4_fsblk_t start;
3094 ext4_fsblk_t end;
3095 int len;
3096
3097
3098 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3099 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3100 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3101 len = EXT4_NUM_B2C(sbi, end - start);
3102 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3103 &ac->ac_b_ex.fe_start);
3104 ac->ac_b_ex.fe_len = len;
3105 ac->ac_status = AC_STATUS_FOUND;
3106 ac->ac_pa = pa;
3107
3108 BUG_ON(start < pa->pa_pstart);
3109 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3110 BUG_ON(pa->pa_free < len);
3111 pa->pa_free -= len;
3112
3113 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3114}
3115
3116
3117
3118
3119static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3120 struct ext4_prealloc_space *pa)
3121{
3122 unsigned int len = ac->ac_o_ex.fe_len;
3123
3124 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3125 &ac->ac_b_ex.fe_group,
3126 &ac->ac_b_ex.fe_start);
3127 ac->ac_b_ex.fe_len = len;
3128 ac->ac_status = AC_STATUS_FOUND;
3129 ac->ac_pa = pa;
3130
3131
3132
3133
3134
3135
3136
3137 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3138}
3139
3140
3141
3142
3143
3144
3145
3146static struct ext4_prealloc_space *
3147ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3148 struct ext4_prealloc_space *pa,
3149 struct ext4_prealloc_space *cpa)
3150{
3151 ext4_fsblk_t cur_distance, new_distance;
3152
3153 if (cpa == NULL) {
3154 atomic_inc(&pa->pa_count);
3155 return pa;
3156 }
3157 cur_distance = abs(goal_block - cpa->pa_pstart);
3158 new_distance = abs(goal_block - pa->pa_pstart);
3159
3160 if (cur_distance <= new_distance)
3161 return cpa;
3162
3163
3164 atomic_dec(&cpa->pa_count);
3165 atomic_inc(&pa->pa_count);
3166 return pa;
3167}
3168
3169
3170
3171
3172static noinline_for_stack int
3173ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3174{
3175 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3176 int order, i;
3177 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3178 struct ext4_locality_group *lg;
3179 struct ext4_prealloc_space *pa, *cpa = NULL;
3180 ext4_fsblk_t goal_block;
3181
3182
3183 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3184 return 0;
3185
3186
3187 rcu_read_lock();
3188 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3189
3190
3191
3192 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3193 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3194 EXT4_C2B(sbi, pa->pa_len)))
3195 continue;
3196
3197
3198 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3199 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3200 EXT4_MAX_BLOCK_FILE_PHYS))
3201 continue;
3202
3203
3204 spin_lock(&pa->pa_lock);
3205 if (pa->pa_deleted == 0 && pa->pa_free) {
3206 atomic_inc(&pa->pa_count);
3207 ext4_mb_use_inode_pa(ac, pa);
3208 spin_unlock(&pa->pa_lock);
3209 ac->ac_criteria = 10;
3210 rcu_read_unlock();
3211 return 1;
3212 }
3213 spin_unlock(&pa->pa_lock);
3214 }
3215 rcu_read_unlock();
3216
3217
3218 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3219 return 0;
3220
3221
3222 lg = ac->ac_lg;
3223 if (lg == NULL)
3224 return 0;
3225 order = fls(ac->ac_o_ex.fe_len) - 1;
3226 if (order > PREALLOC_TB_SIZE - 1)
3227
3228 order = PREALLOC_TB_SIZE - 1;
3229
3230 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3231
3232
3233
3234
3235 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3236 rcu_read_lock();
3237 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3238 pa_inode_list) {
3239 spin_lock(&pa->pa_lock);
3240 if (pa->pa_deleted == 0 &&
3241 pa->pa_free >= ac->ac_o_ex.fe_len) {
3242
3243 cpa = ext4_mb_check_group_pa(goal_block,
3244 pa, cpa);
3245 }
3246 spin_unlock(&pa->pa_lock);
3247 }
3248 rcu_read_unlock();
3249 }
3250 if (cpa) {
3251 ext4_mb_use_group_pa(ac, cpa);
3252 ac->ac_criteria = 20;
3253 return 1;
3254 }
3255 return 0;
3256}
3257
3258
3259
3260
3261
3262
3263
3264static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3265 ext4_group_t group)
3266{
3267 struct rb_node *n;
3268 struct ext4_group_info *grp;
3269 struct ext4_free_data *entry;
3270
3271 grp = ext4_get_group_info(sb, group);
3272 n = rb_first(&(grp->bb_free_root));
3273
3274 while (n) {
3275 entry = rb_entry(n, struct ext4_free_data, efd_node);
3276 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3277 n = rb_next(n);
3278 }
3279 return;
3280}
3281
3282
3283
3284
3285
3286
3287static noinline_for_stack
3288void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3289 ext4_group_t group)
3290{
3291 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3292 struct ext4_prealloc_space *pa;
3293 struct list_head *cur;
3294 ext4_group_t groupnr;
3295 ext4_grpblk_t start;
3296 int preallocated = 0;
3297 int len;
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307 list_for_each(cur, &grp->bb_prealloc_list) {
3308 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3309 spin_lock(&pa->pa_lock);
3310 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3311 &groupnr, &start);
3312 len = pa->pa_len;
3313 spin_unlock(&pa->pa_lock);
3314 if (unlikely(len == 0))
3315 continue;
3316 BUG_ON(groupnr != group);
3317 ext4_set_bits(bitmap, start, len);
3318 preallocated += len;
3319 }
3320 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3321}
3322
3323static void ext4_mb_pa_callback(struct rcu_head *head)
3324{
3325 struct ext4_prealloc_space *pa;
3326 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3327 kmem_cache_free(ext4_pspace_cachep, pa);
3328}
3329
3330
3331
3332
3333
3334static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3335 struct super_block *sb, struct ext4_prealloc_space *pa)
3336{
3337 ext4_group_t grp;
3338 ext4_fsblk_t grp_blk;
3339
3340 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
3341 return;
3342
3343
3344 spin_lock(&pa->pa_lock);
3345 if (pa->pa_deleted == 1) {
3346 spin_unlock(&pa->pa_lock);
3347 return;
3348 }
3349
3350 pa->pa_deleted = 1;
3351 spin_unlock(&pa->pa_lock);
3352
3353 grp_blk = pa->pa_pstart;
3354
3355
3356
3357
3358 if (pa->pa_type == MB_GROUP_PA)
3359 grp_blk--;
3360
3361 ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377 ext4_lock_group(sb, grp);
3378 list_del(&pa->pa_group_list);
3379 ext4_unlock_group(sb, grp);
3380
3381 spin_lock(pa->pa_obj_lock);
3382 list_del_rcu(&pa->pa_inode_list);
3383 spin_unlock(pa->pa_obj_lock);
3384
3385 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3386}
3387
3388
3389
3390
3391static noinline_for_stack int
3392ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3393{
3394 struct super_block *sb = ac->ac_sb;
3395 struct ext4_sb_info *sbi = EXT4_SB(sb);
3396 struct ext4_prealloc_space *pa;
3397 struct ext4_group_info *grp;
3398 struct ext4_inode_info *ei;
3399
3400
3401 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3402 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3403 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3404
3405 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3406 if (pa == NULL)
3407 return -ENOMEM;
3408
3409 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3410 int winl;
3411 int wins;
3412 int win;
3413 int offs;
3414
3415
3416
3417
3418 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3419 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3420
3421
3422
3423
3424 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3425
3426
3427 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3428
3429
3430 win = min(winl, wins);
3431
3432 offs = ac->ac_o_ex.fe_logical %
3433 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3434 if (offs && offs < win)
3435 win = offs;
3436
3437 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3438 EXT4_B2C(sbi, win);
3439 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3440 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3441 }
3442
3443
3444
3445 ac->ac_f_ex = ac->ac_b_ex;
3446
3447 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3448 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3449 pa->pa_len = ac->ac_b_ex.fe_len;
3450 pa->pa_free = pa->pa_len;
3451 atomic_set(&pa->pa_count, 1);
3452 spin_lock_init(&pa->pa_lock);
3453 INIT_LIST_HEAD(&pa->pa_inode_list);
3454 INIT_LIST_HEAD(&pa->pa_group_list);
3455 pa->pa_deleted = 0;
3456 pa->pa_type = MB_INODE_PA;
3457
3458 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3459 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3460 trace_ext4_mb_new_inode_pa(ac, pa);
3461
3462 ext4_mb_use_inode_pa(ac, pa);
3463 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3464
3465 ei = EXT4_I(ac->ac_inode);
3466 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3467
3468 pa->pa_obj_lock = &ei->i_prealloc_lock;
3469 pa->pa_inode = ac->ac_inode;
3470
3471 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3472 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3473 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3474
3475 spin_lock(pa->pa_obj_lock);
3476 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3477 spin_unlock(pa->pa_obj_lock);
3478
3479 return 0;
3480}
3481
3482
3483
3484
3485static noinline_for_stack int
3486ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3487{
3488 struct super_block *sb = ac->ac_sb;
3489 struct ext4_locality_group *lg;
3490 struct ext4_prealloc_space *pa;
3491 struct ext4_group_info *grp;
3492
3493
3494 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3495 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3496 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3497
3498 BUG_ON(ext4_pspace_cachep == NULL);
3499 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3500 if (pa == NULL)
3501 return -ENOMEM;
3502
3503
3504
3505 ac->ac_f_ex = ac->ac_b_ex;
3506
3507 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3508 pa->pa_lstart = pa->pa_pstart;
3509 pa->pa_len = ac->ac_b_ex.fe_len;
3510 pa->pa_free = pa->pa_len;
3511 atomic_set(&pa->pa_count, 1);
3512 spin_lock_init(&pa->pa_lock);
3513 INIT_LIST_HEAD(&pa->pa_inode_list);
3514 INIT_LIST_HEAD(&pa->pa_group_list);
3515 pa->pa_deleted = 0;
3516 pa->pa_type = MB_GROUP_PA;
3517
3518 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3519 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3520 trace_ext4_mb_new_group_pa(ac, pa);
3521
3522 ext4_mb_use_group_pa(ac, pa);
3523 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3524
3525 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3526 lg = ac->ac_lg;
3527 BUG_ON(lg == NULL);
3528
3529 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3530 pa->pa_inode = NULL;
3531
3532 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3533 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3534 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3535
3536
3537
3538
3539
3540 return 0;
3541}
3542
3543static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3544{
3545 int err;
3546
3547 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3548 err = ext4_mb_new_group_pa(ac);
3549 else
3550 err = ext4_mb_new_inode_pa(ac);
3551 return err;
3552}
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562static noinline_for_stack int
3563ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3564 struct ext4_prealloc_space *pa)
3565{
3566 struct super_block *sb = e4b->bd_sb;
3567 struct ext4_sb_info *sbi = EXT4_SB(sb);
3568 unsigned int end;
3569 unsigned int next;
3570 ext4_group_t group;
3571 ext4_grpblk_t bit;
3572 unsigned long long grp_blk_start;
3573 int err = 0;
3574 int free = 0;
3575
3576 BUG_ON(pa->pa_deleted == 0);
3577 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3578 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3579 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3580 end = bit + pa->pa_len;
3581
3582 while (bit < end) {
3583 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3584 if (bit >= end)
3585 break;
3586 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3587 mb_debug(1, " free preallocated %u/%u in group %u\n",
3588 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3589 (unsigned) next - bit, (unsigned) group);
3590 free += next - bit;
3591
3592 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3593 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3594 EXT4_C2B(sbi, bit)),
3595 next - bit);
3596 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3597 bit = next + 1;
3598 }
3599 if (free != pa->pa_free) {
3600 ext4_msg(e4b->bd_sb, KERN_CRIT,
3601 "pa %p: logic %lu, phys. %lu, len %lu",
3602 pa, (unsigned long) pa->pa_lstart,
3603 (unsigned long) pa->pa_pstart,
3604 (unsigned long) pa->pa_len);
3605 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3606 free, pa->pa_free);
3607
3608
3609
3610
3611 }
3612 atomic_add(free, &sbi->s_mb_discarded);
3613
3614 return err;
3615}
3616
3617static noinline_for_stack int
3618ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3619 struct ext4_prealloc_space *pa)
3620{
3621 struct super_block *sb = e4b->bd_sb;
3622 ext4_group_t group;
3623 ext4_grpblk_t bit;
3624
3625 trace_ext4_mb_release_group_pa(sb, pa);
3626 BUG_ON(pa->pa_deleted == 0);
3627 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3628 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3629 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3630 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3631 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3632
3633 return 0;
3634}
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645static noinline_for_stack int
3646ext4_mb_discard_group_preallocations(struct super_block *sb,
3647 ext4_group_t group, int needed)
3648{
3649 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3650 struct buffer_head *bitmap_bh = NULL;
3651 struct ext4_prealloc_space *pa, *tmp;
3652 struct list_head list;
3653 struct ext4_buddy e4b;
3654 int err;
3655 int busy = 0;
3656 int free = 0;
3657
3658 mb_debug(1, "discard preallocation for group %u\n", group);
3659
3660 if (list_empty(&grp->bb_prealloc_list))
3661 return 0;
3662
3663 bitmap_bh = ext4_read_block_bitmap(sb, group);
3664 if (bitmap_bh == NULL) {
3665 ext4_error(sb, "Error reading block bitmap for %u", group);
3666 return 0;
3667 }
3668
3669 err = ext4_mb_load_buddy(sb, group, &e4b);
3670 if (err) {
3671 ext4_error(sb, "Error loading buddy information for %u", group);
3672 put_bh(bitmap_bh);
3673 return 0;
3674 }
3675
3676 if (needed == 0)
3677 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3678
3679 INIT_LIST_HEAD(&list);
3680repeat:
3681 ext4_lock_group(sb, group);
3682 list_for_each_entry_safe(pa, tmp,
3683 &grp->bb_prealloc_list, pa_group_list) {
3684 spin_lock(&pa->pa_lock);
3685 if (atomic_read(&pa->pa_count)) {
3686 spin_unlock(&pa->pa_lock);
3687 busy = 1;
3688 continue;
3689 }
3690 if (pa->pa_deleted) {
3691 spin_unlock(&pa->pa_lock);
3692 continue;
3693 }
3694
3695
3696 pa->pa_deleted = 1;
3697
3698
3699 free += pa->pa_free;
3700
3701 spin_unlock(&pa->pa_lock);
3702
3703 list_del(&pa->pa_group_list);
3704 list_add(&pa->u.pa_tmp_list, &list);
3705 }
3706
3707
3708 if (free < needed && busy) {
3709 busy = 0;
3710 ext4_unlock_group(sb, group);
3711
3712
3713
3714
3715 yield();
3716 goto repeat;
3717 }
3718
3719
3720 if (list_empty(&list)) {
3721 BUG_ON(free != 0);
3722 goto out;
3723 }
3724
3725
3726 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3727
3728
3729 spin_lock(pa->pa_obj_lock);
3730 list_del_rcu(&pa->pa_inode_list);
3731 spin_unlock(pa->pa_obj_lock);
3732
3733 if (pa->pa_type == MB_GROUP_PA)
3734 ext4_mb_release_group_pa(&e4b, pa);
3735 else
3736 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3737
3738 list_del(&pa->u.pa_tmp_list);
3739 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3740 }
3741
3742out:
3743 ext4_unlock_group(sb, group);
3744 ext4_mb_unload_buddy(&e4b);
3745 put_bh(bitmap_bh);
3746 return free;
3747}
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758void ext4_discard_preallocations(struct inode *inode)
3759{
3760 struct ext4_inode_info *ei = EXT4_I(inode);
3761 struct super_block *sb = inode->i_sb;
3762 struct buffer_head *bitmap_bh = NULL;
3763 struct ext4_prealloc_space *pa, *tmp;
3764 ext4_group_t group = 0;
3765 struct list_head list;
3766 struct ext4_buddy e4b;
3767 int err;
3768
3769 if (!S_ISREG(inode->i_mode)) {
3770
3771 return;
3772 }
3773
3774 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
3775 trace_ext4_discard_preallocations(inode);
3776
3777 INIT_LIST_HEAD(&list);
3778
3779repeat:
3780
3781 spin_lock(&ei->i_prealloc_lock);
3782 while (!list_empty(&ei->i_prealloc_list)) {
3783 pa = list_entry(ei->i_prealloc_list.next,
3784 struct ext4_prealloc_space, pa_inode_list);
3785 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
3786 spin_lock(&pa->pa_lock);
3787 if (atomic_read(&pa->pa_count)) {
3788
3789
3790 spin_unlock(&pa->pa_lock);
3791 spin_unlock(&ei->i_prealloc_lock);
3792 ext4_msg(sb, KERN_ERR,
3793 "uh-oh! used pa while discarding");
3794 WARN_ON(1);
3795 schedule_timeout_uninterruptible(HZ);
3796 goto repeat;
3797
3798 }
3799 if (pa->pa_deleted == 0) {
3800 pa->pa_deleted = 1;
3801 spin_unlock(&pa->pa_lock);
3802 list_del_rcu(&pa->pa_inode_list);
3803 list_add(&pa->u.pa_tmp_list, &list);
3804 continue;
3805 }
3806
3807
3808 spin_unlock(&pa->pa_lock);
3809 spin_unlock(&ei->i_prealloc_lock);
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823 schedule_timeout_uninterruptible(HZ);
3824 goto repeat;
3825 }
3826 spin_unlock(&ei->i_prealloc_lock);
3827
3828 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3829 BUG_ON(pa->pa_type != MB_INODE_PA);
3830 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
3831
3832 err = ext4_mb_load_buddy(sb, group, &e4b);
3833 if (err) {
3834 ext4_error(sb, "Error loading buddy information for %u",
3835 group);
3836 continue;
3837 }
3838
3839 bitmap_bh = ext4_read_block_bitmap(sb, group);
3840 if (bitmap_bh == NULL) {
3841 ext4_error(sb, "Error reading block bitmap for %u",
3842 group);
3843 ext4_mb_unload_buddy(&e4b);
3844 continue;
3845 }
3846
3847 ext4_lock_group(sb, group);
3848 list_del(&pa->pa_group_list);
3849 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3850 ext4_unlock_group(sb, group);
3851
3852 ext4_mb_unload_buddy(&e4b);
3853 put_bh(bitmap_bh);
3854
3855 list_del(&pa->u.pa_tmp_list);
3856 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3857 }
3858}
3859
3860#ifdef CONFIG_EXT4_DEBUG
3861static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3862{
3863 struct super_block *sb = ac->ac_sb;
3864 ext4_group_t ngroups, i;
3865
3866 if (!mb_enable_debug ||
3867 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3868 return;
3869
3870 ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
3871 " Allocation context details:");
3872 ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
3873 ac->ac_status, ac->ac_flags);
3874 ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
3875 "goal %lu/%lu/%lu@%lu, "
3876 "best %lu/%lu/%lu@%lu cr %d",
3877 (unsigned long)ac->ac_o_ex.fe_group,
3878 (unsigned long)ac->ac_o_ex.fe_start,
3879 (unsigned long)ac->ac_o_ex.fe_len,
3880 (unsigned long)ac->ac_o_ex.fe_logical,
3881 (unsigned long)ac->ac_g_ex.fe_group,
3882 (unsigned long)ac->ac_g_ex.fe_start,
3883 (unsigned long)ac->ac_g_ex.fe_len,
3884 (unsigned long)ac->ac_g_ex.fe_logical,
3885 (unsigned long)ac->ac_b_ex.fe_group,
3886 (unsigned long)ac->ac_b_ex.fe_start,
3887 (unsigned long)ac->ac_b_ex.fe_len,
3888 (unsigned long)ac->ac_b_ex.fe_logical,
3889 (int)ac->ac_criteria);
3890 ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found",
3891 ac->ac_ex_scanned, ac->ac_found);
3892 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
3893 ngroups = ext4_get_groups_count(sb);
3894 for (i = 0; i < ngroups; i++) {
3895 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
3896 struct ext4_prealloc_space *pa;
3897 ext4_grpblk_t start;
3898 struct list_head *cur;
3899 ext4_lock_group(sb, i);
3900 list_for_each(cur, &grp->bb_prealloc_list) {
3901 pa = list_entry(cur, struct ext4_prealloc_space,
3902 pa_group_list);
3903 spin_lock(&pa->pa_lock);
3904 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3905 NULL, &start);
3906 spin_unlock(&pa->pa_lock);
3907 printk(KERN_ERR "PA:%u:%d:%u \n", i,
3908 start, pa->pa_len);
3909 }
3910 ext4_unlock_group(sb, i);
3911
3912 if (grp->bb_free == 0)
3913 continue;
3914 printk(KERN_ERR "%u: %d/%d \n",
3915 i, grp->bb_free, grp->bb_fragments);
3916 }
3917 printk(KERN_ERR "\n");
3918}
3919#else
3920static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3921{
3922 return;
3923}
3924#endif
3925
3926
3927
3928
3929
3930
3931
3932
3933static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3934{
3935 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3936 int bsbits = ac->ac_sb->s_blocksize_bits;
3937 loff_t size, isize;
3938
3939 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3940 return;
3941
3942 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3943 return;
3944
3945 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3946 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
3947 >> bsbits;
3948
3949 if ((size == isize) &&
3950 !ext4_fs_is_busy(sbi) &&
3951 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
3952 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
3953 return;
3954 }
3955
3956 if (sbi->s_mb_group_prealloc <= 0) {
3957 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
3958 return;
3959 }
3960
3961
3962 size = max(size, isize);
3963 if (size > sbi->s_mb_stream_request) {
3964 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
3965 return;
3966 }
3967
3968 BUG_ON(ac->ac_lg != NULL);
3969
3970
3971
3972
3973
3974 ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
3975
3976
3977 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
3978
3979
3980 mutex_lock(&ac->ac_lg->lg_mutex);
3981}
3982
3983static noinline_for_stack int
3984ext4_mb_initialize_context(struct ext4_allocation_context *ac,
3985 struct ext4_allocation_request *ar)
3986{
3987 struct super_block *sb = ar->inode->i_sb;
3988 struct ext4_sb_info *sbi = EXT4_SB(sb);
3989 struct ext4_super_block *es = sbi->s_es;
3990 ext4_group_t group;
3991 unsigned int len;
3992 ext4_fsblk_t goal;
3993 ext4_grpblk_t block;
3994
3995
3996 len = ar->len;
3997
3998
3999 if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
4000 len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
4001
4002
4003 goal = ar->goal;
4004 if (goal < le32_to_cpu(es->s_first_data_block) ||
4005 goal >= ext4_blocks_count(es))
4006 goal = le32_to_cpu(es->s_first_data_block);
4007 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4008
4009
4010 memset(ac, 0, sizeof(struct ext4_allocation_context));
4011 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
4012 ac->ac_status = AC_STATUS_CONTINUE;
4013 ac->ac_sb = sb;
4014 ac->ac_inode = ar->inode;
4015 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4016 ac->ac_o_ex.fe_group = group;
4017 ac->ac_o_ex.fe_start = block;
4018 ac->ac_o_ex.fe_len = len;
4019 ac->ac_g_ex = ac->ac_o_ex;
4020 ac->ac_flags = ar->flags;
4021
4022
4023
4024 ext4_mb_group_or_file(ac);
4025
4026 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4027 "left: %u/%u, right %u/%u to %swritable\n",
4028 (unsigned) ar->len, (unsigned) ar->logical,
4029 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4030 (unsigned) ar->lleft, (unsigned) ar->pleft,
4031 (unsigned) ar->lright, (unsigned) ar->pright,
4032 atomic_read(&ar->inode->i_writecount) ? "" : "non-");
4033 return 0;
4034
4035}
4036
4037static noinline_for_stack void
4038ext4_mb_discard_lg_preallocations(struct super_block *sb,
4039 struct ext4_locality_group *lg,
4040 int order, int total_entries)
4041{
4042 ext4_group_t group = 0;
4043 struct ext4_buddy e4b;
4044 struct list_head discard_list;
4045 struct ext4_prealloc_space *pa, *tmp;
4046
4047 mb_debug(1, "discard locality group preallocation\n");
4048
4049 INIT_LIST_HEAD(&discard_list);
4050
4051 spin_lock(&lg->lg_prealloc_lock);
4052 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4053 pa_inode_list) {
4054 spin_lock(&pa->pa_lock);
4055 if (atomic_read(&pa->pa_count)) {
4056
4057
4058
4059
4060
4061 spin_unlock(&pa->pa_lock);
4062 continue;
4063 }
4064 if (pa->pa_deleted) {
4065 spin_unlock(&pa->pa_lock);
4066 continue;
4067 }
4068
4069 BUG_ON(pa->pa_type != MB_GROUP_PA);
4070
4071
4072 pa->pa_deleted = 1;
4073 spin_unlock(&pa->pa_lock);
4074
4075 list_del_rcu(&pa->pa_inode_list);
4076 list_add(&pa->u.pa_tmp_list, &discard_list);
4077
4078 total_entries--;
4079 if (total_entries <= 5) {
4080
4081
4082
4083
4084
4085
4086 break;
4087 }
4088 }
4089 spin_unlock(&lg->lg_prealloc_lock);
4090
4091 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4092
4093 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4094 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4095 ext4_error(sb, "Error loading buddy information for %u",
4096 group);
4097 continue;
4098 }
4099 ext4_lock_group(sb, group);
4100 list_del(&pa->pa_group_list);
4101 ext4_mb_release_group_pa(&e4b, pa);
4102 ext4_unlock_group(sb, group);
4103
4104 ext4_mb_unload_buddy(&e4b);
4105 list_del(&pa->u.pa_tmp_list);
4106 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4107 }
4108}
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4120{
4121 int order, added = 0, lg_prealloc_count = 1;
4122 struct super_block *sb = ac->ac_sb;
4123 struct ext4_locality_group *lg = ac->ac_lg;
4124 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4125
4126 order = fls(pa->pa_free) - 1;
4127 if (order > PREALLOC_TB_SIZE - 1)
4128
4129 order = PREALLOC_TB_SIZE - 1;
4130
4131 rcu_read_lock();
4132 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4133 pa_inode_list) {
4134 spin_lock(&tmp_pa->pa_lock);
4135 if (tmp_pa->pa_deleted) {
4136 spin_unlock(&tmp_pa->pa_lock);
4137 continue;
4138 }
4139 if (!added && pa->pa_free < tmp_pa->pa_free) {
4140
4141 list_add_tail_rcu(&pa->pa_inode_list,
4142 &tmp_pa->pa_inode_list);
4143 added = 1;
4144
4145
4146
4147
4148 }
4149 spin_unlock(&tmp_pa->pa_lock);
4150 lg_prealloc_count++;
4151 }
4152 if (!added)
4153 list_add_tail_rcu(&pa->pa_inode_list,
4154 &lg->lg_prealloc_list[order]);
4155 rcu_read_unlock();
4156
4157
4158 if (lg_prealloc_count > 8) {
4159 ext4_mb_discard_lg_preallocations(sb, lg,
4160 order, lg_prealloc_count);
4161 return;
4162 }
4163 return ;
4164}
4165
4166
4167
4168
4169static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4170{
4171 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4172 struct ext4_prealloc_space *pa = ac->ac_pa;
4173 if (pa) {
4174 if (pa->pa_type == MB_GROUP_PA) {
4175
4176 spin_lock(&pa->pa_lock);
4177 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4178 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4179 pa->pa_free -= ac->ac_b_ex.fe_len;
4180 pa->pa_len -= ac->ac_b_ex.fe_len;
4181 spin_unlock(&pa->pa_lock);
4182 }
4183 }
4184 if (pa) {
4185
4186
4187
4188
4189
4190
4191 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4192 spin_lock(pa->pa_obj_lock);
4193 list_del_rcu(&pa->pa_inode_list);
4194 spin_unlock(pa->pa_obj_lock);
4195 ext4_mb_add_n_trim(ac);
4196 }
4197 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4198 }
4199 if (ac->ac_bitmap_page)
4200 page_cache_release(ac->ac_bitmap_page);
4201 if (ac->ac_buddy_page)
4202 page_cache_release(ac->ac_buddy_page);
4203 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4204 mutex_unlock(&ac->ac_lg->lg_mutex);
4205 ext4_mb_collect_stats(ac);
4206 return 0;
4207}
4208
4209static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4210{
4211 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4212 int ret;
4213 int freed = 0;
4214
4215 trace_ext4_mb_discard_preallocations(sb, needed);
4216 for (i = 0; i < ngroups && needed > 0; i++) {
4217 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4218 freed += ret;
4219 needed -= ret;
4220 }
4221
4222 return freed;
4223}
4224
4225
4226
4227
4228
4229
4230ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4231 struct ext4_allocation_request *ar, int *errp)
4232{
4233 int freed;
4234 struct ext4_allocation_context *ac = NULL;
4235 struct ext4_sb_info *sbi;
4236 struct super_block *sb;
4237 ext4_fsblk_t block = 0;
4238 unsigned int inquota = 0;
4239 unsigned int reserv_clstrs = 0;
4240
4241 sb = ar->inode->i_sb;
4242 sbi = EXT4_SB(sb);
4243
4244 trace_ext4_request_blocks(ar);
4245
4246
4247 if (IS_NOQUOTA(ar->inode))
4248 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4249
4250
4251
4252
4253
4254
4255 if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
4256 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4257 else {
4258
4259
4260
4261
4262 while (ar->len &&
4263 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4264
4265
4266 yield();
4267 ar->len = ar->len >> 1;
4268 }
4269 if (!ar->len) {
4270 *errp = -ENOSPC;
4271 return 0;
4272 }
4273 reserv_clstrs = ar->len;
4274 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4275 dquot_alloc_block_nofail(ar->inode,
4276 EXT4_C2B(sbi, ar->len));
4277 } else {
4278 while (ar->len &&
4279 dquot_alloc_block(ar->inode,
4280 EXT4_C2B(sbi, ar->len))) {
4281
4282 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4283 ar->len--;
4284 }
4285 }
4286 inquota = ar->len;
4287 if (ar->len == 0) {
4288 *errp = -EDQUOT;
4289 goto out;
4290 }
4291 }
4292
4293 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4294 if (!ac) {
4295 ar->len = 0;
4296 *errp = -ENOMEM;
4297 goto out;
4298 }
4299
4300 *errp = ext4_mb_initialize_context(ac, ar);
4301 if (*errp) {
4302 ar->len = 0;
4303 goto out;
4304 }
4305
4306 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4307 if (!ext4_mb_use_preallocated(ac)) {
4308 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4309 ext4_mb_normalize_request(ac, ar);
4310repeat:
4311
4312 *errp = ext4_mb_regular_allocator(ac);
4313 if (*errp)
4314 goto errout;
4315
4316
4317
4318
4319 if (ac->ac_status == AC_STATUS_FOUND &&
4320 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4321 ext4_mb_new_preallocation(ac);
4322 }
4323 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4324 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4325 if (*errp == -EAGAIN) {
4326
4327
4328
4329
4330 ext4_mb_release_context(ac);
4331 ac->ac_b_ex.fe_group = 0;
4332 ac->ac_b_ex.fe_start = 0;
4333 ac->ac_b_ex.fe_len = 0;
4334 ac->ac_status = AC_STATUS_CONTINUE;
4335 goto repeat;
4336 } else if (*errp)
4337 errout:
4338 ext4_discard_allocated_blocks(ac);
4339 else {
4340 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4341 ar->len = ac->ac_b_ex.fe_len;
4342 }
4343 } else {
4344 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4345 if (freed)
4346 goto repeat;
4347 *errp = -ENOSPC;
4348 }
4349
4350 if (*errp) {
4351 ac->ac_b_ex.fe_len = 0;
4352 ar->len = 0;
4353 ext4_mb_show_ac(ac);
4354 }
4355 ext4_mb_release_context(ac);
4356out:
4357 if (ac)
4358 kmem_cache_free(ext4_ac_cachep, ac);
4359 if (inquota && ar->len < inquota)
4360 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4361 if (!ar->len) {
4362 if (!ext4_test_inode_state(ar->inode,
4363 EXT4_STATE_DELALLOC_RESERVED))
4364
4365 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4366 reserv_clstrs);
4367 }
4368
4369 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4370
4371 return block;
4372}
4373
4374
4375
4376
4377
4378
4379static int can_merge(struct ext4_free_data *entry1,
4380 struct ext4_free_data *entry2)
4381{
4382 if ((entry1->efd_tid == entry2->efd_tid) &&
4383 (entry1->efd_group == entry2->efd_group) &&
4384 ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
4385 return 1;
4386 return 0;
4387}
4388
4389static noinline_for_stack int
4390ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4391 struct ext4_free_data *new_entry)
4392{
4393 ext4_group_t group = e4b->bd_group;
4394 ext4_grpblk_t cluster;
4395 struct ext4_free_data *entry;
4396 struct ext4_group_info *db = e4b->bd_info;
4397 struct super_block *sb = e4b->bd_sb;
4398 struct ext4_sb_info *sbi = EXT4_SB(sb);
4399 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4400 struct rb_node *parent = NULL, *new_node;
4401
4402 BUG_ON(!ext4_handle_valid(handle));
4403 BUG_ON(e4b->bd_bitmap_page == NULL);
4404 BUG_ON(e4b->bd_buddy_page == NULL);
4405
4406 new_node = &new_entry->efd_node;
4407 cluster = new_entry->efd_start_cluster;
4408
4409 if (!*n) {
4410
4411
4412
4413
4414
4415 page_cache_get(e4b->bd_buddy_page);
4416 page_cache_get(e4b->bd_bitmap_page);
4417 }
4418 while (*n) {
4419 parent = *n;
4420 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4421 if (cluster < entry->efd_start_cluster)
4422 n = &(*n)->rb_left;
4423 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4424 n = &(*n)->rb_right;
4425 else {
4426 ext4_grp_locked_error(sb, group, 0,
4427 ext4_group_first_block_no(sb, group) +
4428 EXT4_C2B(sbi, cluster),
4429 "Block already on to-be-freed list");
4430 return 0;
4431 }
4432 }
4433
4434 rb_link_node(new_node, parent, n);
4435 rb_insert_color(new_node, &db->bb_free_root);
4436
4437
4438 node = rb_prev(new_node);
4439 if (node) {
4440 entry = rb_entry(node, struct ext4_free_data, efd_node);
4441 if (can_merge(entry, new_entry)) {
4442 new_entry->efd_start_cluster = entry->efd_start_cluster;
4443 new_entry->efd_count += entry->efd_count;
4444 rb_erase(node, &(db->bb_free_root));
4445 ext4_journal_callback_del(handle, &entry->efd_jce);
4446 kmem_cache_free(ext4_free_data_cachep, entry);
4447 }
4448 }
4449
4450 node = rb_next(new_node);
4451 if (node) {
4452 entry = rb_entry(node, struct ext4_free_data, efd_node);
4453 if (can_merge(new_entry, entry)) {
4454 new_entry->efd_count += entry->efd_count;
4455 rb_erase(node, &(db->bb_free_root));
4456 ext4_journal_callback_del(handle, &entry->efd_jce);
4457 kmem_cache_free(ext4_free_data_cachep, entry);
4458 }
4459 }
4460
4461 ext4_journal_callback_add(handle, ext4_free_data_callback,
4462 &new_entry->efd_jce);
4463 return 0;
4464}
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474void ext4_free_blocks(handle_t *handle, struct inode *inode,
4475 struct buffer_head *bh, ext4_fsblk_t block,
4476 unsigned long count, int flags)
4477{
4478 struct buffer_head *bitmap_bh = NULL;
4479 struct super_block *sb = inode->i_sb;
4480 struct ext4_group_desc *gdp;
4481 unsigned long freed = 0;
4482 unsigned int overflow;
4483 ext4_grpblk_t bit;
4484 struct buffer_head *gd_bh;
4485 ext4_group_t block_group;
4486 struct ext4_sb_info *sbi;
4487 struct ext4_buddy e4b;
4488 unsigned int count_clusters;
4489 int err = 0;
4490 int ret;
4491
4492 if (bh) {
4493 if (block)
4494 BUG_ON(block != bh->b_blocknr);
4495 else
4496 block = bh->b_blocknr;
4497 }
4498
4499 sbi = EXT4_SB(sb);
4500 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4501 !ext4_data_block_valid(sbi, block, count)) {
4502 ext4_error(sb, "Freeing blocks not in datazone - "
4503 "block = %llu, count = %lu", block, count);
4504 goto error_return;
4505 }
4506
4507 ext4_debug("freeing block %llu\n", block);
4508 trace_ext4_free_blocks(inode, block, count, flags);
4509
4510 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4511 struct buffer_head *tbh = bh;
4512 int i;
4513
4514 BUG_ON(bh && (count > 1));
4515
4516 for (i = 0; i < count; i++) {
4517 if (!bh)
4518 tbh = sb_find_get_block(inode->i_sb,
4519 block + i);
4520 if (unlikely(!tbh))
4521 continue;
4522 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4523 inode, tbh, block + i);
4524 }
4525 }
4526
4527
4528
4529
4530
4531
4532
4533
4534 if (!ext4_should_writeback_data(inode))
4535 flags |= EXT4_FREE_BLOCKS_METADATA;
4536
4537
4538
4539
4540
4541
4542
4543
4544 overflow = block & (sbi->s_cluster_ratio - 1);
4545 if (overflow) {
4546 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4547 overflow = sbi->s_cluster_ratio - overflow;
4548 block += overflow;
4549 if (count > overflow)
4550 count -= overflow;
4551 else
4552 return;
4553 } else {
4554 block -= overflow;
4555 count += overflow;
4556 }
4557 }
4558 overflow = count & (sbi->s_cluster_ratio - 1);
4559 if (overflow) {
4560 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4561 if (count > overflow)
4562 count -= overflow;
4563 else
4564 return;
4565 } else
4566 count += sbi->s_cluster_ratio - overflow;
4567 }
4568
4569do_more:
4570 overflow = 0;
4571 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4572
4573
4574
4575
4576
4577 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4578 overflow = EXT4_C2B(sbi, bit) + count -
4579 EXT4_BLOCKS_PER_GROUP(sb);
4580 count -= overflow;
4581 }
4582 count_clusters = EXT4_B2C(sbi, count);
4583 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4584 if (!bitmap_bh) {
4585 err = -EIO;
4586 goto error_return;
4587 }
4588 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4589 if (!gdp) {
4590 err = -EIO;
4591 goto error_return;
4592 }
4593
4594 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4595 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4596 in_range(block, ext4_inode_table(sb, gdp),
4597 EXT4_SB(sb)->s_itb_per_group) ||
4598 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4599 EXT4_SB(sb)->s_itb_per_group)) {
4600
4601 ext4_error(sb, "Freeing blocks in system zone - "
4602 "Block = %llu, count = %lu", block, count);
4603
4604 goto error_return;
4605 }
4606
4607 BUFFER_TRACE(bitmap_bh, "getting write access");
4608 err = ext4_journal_get_write_access(handle, bitmap_bh);
4609 if (err)
4610 goto error_return;
4611
4612
4613
4614
4615
4616
4617 BUFFER_TRACE(gd_bh, "get_write_access");
4618 err = ext4_journal_get_write_access(handle, gd_bh);
4619 if (err)
4620 goto error_return;
4621#ifdef AGGRESSIVE_CHECK
4622 {
4623 int i;
4624 for (i = 0; i < count_clusters; i++)
4625 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4626 }
4627#endif
4628 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4629
4630 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4631 if (err)
4632 goto error_return;
4633
4634 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4635 struct ext4_free_data *new_entry;
4636
4637
4638
4639
4640 new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
4641 if (!new_entry) {
4642 ext4_mb_unload_buddy(&e4b);
4643 err = -ENOMEM;
4644 goto error_return;
4645 }
4646 new_entry->efd_start_cluster = bit;
4647 new_entry->efd_group = block_group;
4648 new_entry->efd_count = count_clusters;
4649 new_entry->efd_tid = handle->h_transaction->t_tid;
4650
4651 ext4_lock_group(sb, block_group);
4652 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4653 ext4_mb_free_metadata(handle, &e4b, new_entry);
4654 } else {
4655
4656
4657
4658
4659 ext4_lock_group(sb, block_group);
4660 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4661 mb_free_blocks(inode, &e4b, bit, count_clusters);
4662 }
4663
4664 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4665 ext4_free_group_clusters_set(sb, gdp, ret);
4666 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4667 ext4_group_desc_csum_set(sb, block_group, gdp);
4668 ext4_unlock_group(sb, block_group);
4669 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4670
4671 if (sbi->s_log_groups_per_flex) {
4672 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4673 atomic_add(count_clusters,
4674 &sbi->s_flex_groups[flex_group].free_clusters);
4675 }
4676
4677 ext4_mb_unload_buddy(&e4b);
4678
4679 freed += count;
4680
4681 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4682 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4683
4684
4685 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4686 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4687
4688
4689 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4690 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4691 if (!err)
4692 err = ret;
4693
4694 if (overflow && !err) {
4695 block += count;
4696 count = overflow;
4697 put_bh(bitmap_bh);
4698 goto do_more;
4699 }
4700error_return:
4701 brelse(bitmap_bh);
4702 ext4_std_error(sb, err);
4703 return;
4704}
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4716 ext4_fsblk_t block, unsigned long count)
4717{
4718 struct buffer_head *bitmap_bh = NULL;
4719 struct buffer_head *gd_bh;
4720 ext4_group_t block_group;
4721 ext4_grpblk_t bit;
4722 unsigned int i;
4723 struct ext4_group_desc *desc;
4724 struct ext4_sb_info *sbi = EXT4_SB(sb);
4725 struct ext4_buddy e4b;
4726 int err = 0, ret, blk_free_count;
4727 ext4_grpblk_t blocks_freed;
4728
4729 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
4730
4731 if (count == 0)
4732 return 0;
4733
4734 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4735
4736
4737
4738
4739 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4740 ext4_warning(sb, "too much blocks added to group %u\n",
4741 block_group);
4742 err = -EINVAL;
4743 goto error_return;
4744 }
4745
4746 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4747 if (!bitmap_bh) {
4748 err = -EIO;
4749 goto error_return;
4750 }
4751
4752 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
4753 if (!desc) {
4754 err = -EIO;
4755 goto error_return;
4756 }
4757
4758 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
4759 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
4760 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
4761 in_range(block + count - 1, ext4_inode_table(sb, desc),
4762 sbi->s_itb_per_group)) {
4763 ext4_error(sb, "Adding blocks in system zones - "
4764 "Block = %llu, count = %lu",
4765 block, count);
4766 err = -EINVAL;
4767 goto error_return;
4768 }
4769
4770 BUFFER_TRACE(bitmap_bh, "getting write access");
4771 err = ext4_journal_get_write_access(handle, bitmap_bh);
4772 if (err)
4773 goto error_return;
4774
4775
4776
4777
4778
4779
4780 BUFFER_TRACE(gd_bh, "get_write_access");
4781 err = ext4_journal_get_write_access(handle, gd_bh);
4782 if (err)
4783 goto error_return;
4784
4785 for (i = 0, blocks_freed = 0; i < count; i++) {
4786 BUFFER_TRACE(bitmap_bh, "clear bit");
4787 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
4788 ext4_error(sb, "bit already cleared for block %llu",
4789 (ext4_fsblk_t)(block + i));
4790 BUFFER_TRACE(bitmap_bh, "bit already cleared");
4791 } else {
4792 blocks_freed++;
4793 }
4794 }
4795
4796 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4797 if (err)
4798 goto error_return;
4799
4800
4801
4802
4803
4804
4805 ext4_lock_group(sb, block_group);
4806 mb_clear_bits(bitmap_bh->b_data, bit, count);
4807 mb_free_blocks(NULL, &e4b, bit, count);
4808 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
4809 ext4_free_group_clusters_set(sb, desc, blk_free_count);
4810 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
4811 ext4_group_desc_csum_set(sb, block_group, desc);
4812 ext4_unlock_group(sb, block_group);
4813 percpu_counter_add(&sbi->s_freeclusters_counter,
4814 EXT4_B2C(sbi, blocks_freed));
4815
4816 if (sbi->s_log_groups_per_flex) {
4817 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4818 atomic_add(EXT4_B2C(sbi, blocks_freed),
4819 &sbi->s_flex_groups[flex_group].free_clusters);
4820 }
4821
4822 ext4_mb_unload_buddy(&e4b);
4823
4824
4825 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4826 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4827
4828
4829 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4830 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4831 if (!err)
4832 err = ret;
4833
4834error_return:
4835 brelse(bitmap_bh);
4836 ext4_std_error(sb, err);
4837 return err;
4838}
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852static void ext4_trim_extent(struct super_block *sb, int start, int count,
4853 ext4_group_t group, struct ext4_buddy *e4b)
4854{
4855 struct ext4_free_extent ex;
4856
4857 trace_ext4_trim_extent(sb, group, start, count);
4858
4859 assert_spin_locked(ext4_group_lock_ptr(sb, group));
4860
4861 ex.fe_start = start;
4862 ex.fe_group = group;
4863 ex.fe_len = count;
4864
4865
4866
4867
4868
4869 mb_mark_used(e4b, &ex);
4870 ext4_unlock_group(sb, group);
4871 ext4_issue_discard(sb, group, start, count);
4872 ext4_lock_group(sb, group);
4873 mb_free_blocks(NULL, e4b, start, ex.fe_len);
4874}
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894static ext4_grpblk_t
4895ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4896 ext4_grpblk_t start, ext4_grpblk_t max,
4897 ext4_grpblk_t minblocks)
4898{
4899 void *bitmap;
4900 ext4_grpblk_t next, count = 0, free_count = 0;
4901 struct ext4_buddy e4b;
4902 int ret;
4903
4904 trace_ext4_trim_all_free(sb, group, start, max);
4905
4906 ret = ext4_mb_load_buddy(sb, group, &e4b);
4907 if (ret) {
4908 ext4_error(sb, "Error in loading buddy "
4909 "information for %u", group);
4910 return ret;
4911 }
4912 bitmap = e4b.bd_bitmap;
4913
4914 ext4_lock_group(sb, group);
4915 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
4916 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
4917 goto out;
4918
4919 start = (e4b.bd_info->bb_first_free > start) ?
4920 e4b.bd_info->bb_first_free : start;
4921
4922 while (start <= max) {
4923 start = mb_find_next_zero_bit(bitmap, max + 1, start);
4924 if (start > max)
4925 break;
4926 next = mb_find_next_bit(bitmap, max + 1, start);
4927
4928 if ((next - start) >= minblocks) {
4929 ext4_trim_extent(sb, start,
4930 next - start, group, &e4b);
4931 count += next - start;
4932 }
4933 free_count += next - start;
4934 start = next + 1;
4935
4936 if (fatal_signal_pending(current)) {
4937 count = -ERESTARTSYS;
4938 break;
4939 }
4940
4941 if (need_resched()) {
4942 ext4_unlock_group(sb, group);
4943 cond_resched();
4944 ext4_lock_group(sb, group);
4945 }
4946
4947 if ((e4b.bd_info->bb_free - free_count) < minblocks)
4948 break;
4949 }
4950
4951 if (!ret)
4952 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
4953out:
4954 ext4_unlock_group(sb, group);
4955 ext4_mb_unload_buddy(&e4b);
4956
4957 ext4_debug("trimmed %d blocks in the group %d\n",
4958 count, group);
4959
4960 return count;
4961}
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4976{
4977 struct ext4_group_info *grp;
4978 ext4_group_t group, first_group, last_group;
4979 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
4980 uint64_t start, end, minlen, trimmed = 0;
4981 ext4_fsblk_t first_data_blk =
4982 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
4983 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
4984 int ret = 0;
4985
4986 start = range->start >> sb->s_blocksize_bits;
4987 end = start + (range->len >> sb->s_blocksize_bits) - 1;
4988 minlen = range->minlen >> sb->s_blocksize_bits;
4989
4990 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
4991 start >= max_blks ||
4992 range->len < sb->s_blocksize)
4993 return -EINVAL;
4994 if (end >= max_blks)
4995 end = max_blks - 1;
4996 if (end <= first_data_blk)
4997 goto out;
4998 if (start < first_data_blk)
4999 start = first_data_blk;
5000
5001
5002 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
5003 &first_group, &first_cluster);
5004 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
5005 &last_group, &last_cluster);
5006
5007
5008 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5009
5010 for (group = first_group; group <= last_group; group++) {
5011 grp = ext4_get_group_info(sb, group);
5012
5013 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
5014 ret = ext4_mb_init_group(sb, group);
5015 if (ret)
5016 break;
5017 }
5018
5019
5020
5021
5022
5023
5024
5025 if (group == last_group)
5026 end = last_cluster;
5027
5028 if (grp->bb_free >= minlen) {
5029 cnt = ext4_trim_all_free(sb, group, first_cluster,
5030 end, minlen);
5031 if (cnt < 0) {
5032 ret = cnt;
5033 break;
5034 }
5035 trimmed += cnt;
5036 }
5037
5038
5039
5040
5041
5042 first_cluster = 0;
5043 }
5044
5045 if (!ret)
5046 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5047
5048out:
5049 range->len = trimmed * sb->s_blocksize;
5050 return ret;
5051}
5052