1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/fs.h>
22#include <linux/time.h>
23#include <linux/jbd2.h>
24#include <linux/highuid.h>
25#include <linux/pagemap.h>
26#include <linux/quotaops.h>
27#include <linux/string.h>
28#include <linux/buffer_head.h>
29#include <linux/writeback.h>
30#include <linux/pagevec.h>
31#include <linux/mpage.h>
32#include <linux/namei.h>
33#include <linux/uio.h>
34#include <linux/bio.h>
35#include <linux/workqueue.h>
36#include <linux/kernel.h>
37#include <linux/printk.h>
38#include <linux/slab.h>
39#include <linux/ratelimit.h>
40
41#include "ext4_jbd2.h"
42#include "xattr.h"
43#include "acl.h"
44#include "truncate.h"
45
46#include <trace/events/ext4.h>
47
48#define MPAGE_DA_EXTENT_TAIL 0x01
49
50static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
51 struct ext4_inode_info *ei)
52{
53 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
54 __u16 csum_lo;
55 __u16 csum_hi = 0;
56 __u32 csum;
57
58 csum_lo = raw->i_checksum_lo;
59 raw->i_checksum_lo = 0;
60 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
61 EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
62 csum_hi = raw->i_checksum_hi;
63 raw->i_checksum_hi = 0;
64 }
65
66 csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
67 EXT4_INODE_SIZE(inode->i_sb));
68
69 raw->i_checksum_lo = csum_lo;
70 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
71 EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
72 raw->i_checksum_hi = csum_hi;
73
74 return csum;
75}
76
77static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
78 struct ext4_inode_info *ei)
79{
80 __u32 provided, calculated;
81
82 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
83 cpu_to_le32(EXT4_OS_LINUX) ||
84 !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
85 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
86 return 1;
87
88 provided = le16_to_cpu(raw->i_checksum_lo);
89 calculated = ext4_inode_csum(inode, raw, ei);
90 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
91 EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
92 provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16;
93 else
94 calculated &= 0xFFFF;
95
96 return provided == calculated;
97}
98
99static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
100 struct ext4_inode_info *ei)
101{
102 __u32 csum;
103
104 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
105 cpu_to_le32(EXT4_OS_LINUX) ||
106 !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
107 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
108 return;
109
110 csum = ext4_inode_csum(inode, raw, ei);
111 raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF);
112 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
113 EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
114 raw->i_checksum_hi = cpu_to_le16(csum >> 16);
115}
116
117static inline int ext4_begin_ordered_truncate(struct inode *inode,
118 loff_t new_size)
119{
120 trace_ext4_begin_ordered_truncate(inode, new_size);
121
122
123
124
125
126
127 if (!EXT4_I(inode)->jinode)
128 return 0;
129 return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
130 EXT4_I(inode)->jinode,
131 new_size);
132}
133
134static void ext4_invalidatepage(struct page *page, unsigned long offset);
135static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
136 struct buffer_head *bh_result, int create);
137static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
138static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
139static int __ext4_journalled_writepage(struct page *page, unsigned int len);
140static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
141static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
142 struct inode *inode, struct page *page, loff_t from,
143 loff_t length, int flags);
144
145
146
147
148static int ext4_inode_is_fast_symlink(struct inode *inode)
149{
150 int ea_blocks = EXT4_I(inode)->i_file_acl ?
151 (inode->i_sb->s_blocksize >> 9) : 0;
152
153 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
154}
155
156
157
158
159
160
161int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
162 int nblocks)
163{
164 int ret;
165
166
167
168
169
170
171
172 BUG_ON(EXT4_JOURNAL(inode) == NULL);
173 jbd_debug(2, "restarting handle %p\n", handle);
174 up_write(&EXT4_I(inode)->i_data_sem);
175 ret = ext4_journal_restart(handle, nblocks);
176 down_write(&EXT4_I(inode)->i_data_sem);
177 ext4_discard_preallocations(inode);
178
179 return ret;
180}
181
182
183
184
185void ext4_evict_inode(struct inode *inode)
186{
187 handle_t *handle;
188 int err;
189
190 trace_ext4_evict_inode(inode);
191
192 ext4_ioend_wait(inode);
193
194 if (inode->i_nlink) {
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213 if (ext4_should_journal_data(inode) &&
214 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
215 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
216 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
217
218 jbd2_log_start_commit(journal, commit_tid);
219 jbd2_log_wait_commit(journal, commit_tid);
220 filemap_write_and_wait(&inode->i_data);
221 }
222 truncate_inode_pages(&inode->i_data, 0);
223 goto no_delete;
224 }
225
226 if (!is_bad_inode(inode))
227 dquot_initialize(inode);
228
229 if (ext4_should_order_data(inode))
230 ext4_begin_ordered_truncate(inode, 0);
231 truncate_inode_pages(&inode->i_data, 0);
232
233 if (is_bad_inode(inode))
234 goto no_delete;
235
236
237
238
239
240 sb_start_intwrite(inode->i_sb);
241 handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3);
242 if (IS_ERR(handle)) {
243 ext4_std_error(inode->i_sb, PTR_ERR(handle));
244
245
246
247
248
249 ext4_orphan_del(NULL, inode);
250 sb_end_intwrite(inode->i_sb);
251 goto no_delete;
252 }
253
254 if (IS_SYNC(inode))
255 ext4_handle_sync(handle);
256 inode->i_size = 0;
257 err = ext4_mark_inode_dirty(handle, inode);
258 if (err) {
259 ext4_warning(inode->i_sb,
260 "couldn't mark inode dirty (err %d)", err);
261 goto stop_handle;
262 }
263 if (inode->i_blocks)
264 ext4_truncate(inode);
265
266
267
268
269
270
271
272 if (!ext4_handle_has_enough_credits(handle, 3)) {
273 err = ext4_journal_extend(handle, 3);
274 if (err > 0)
275 err = ext4_journal_restart(handle, 3);
276 if (err != 0) {
277 ext4_warning(inode->i_sb,
278 "couldn't extend journal (err %d)", err);
279 stop_handle:
280 ext4_journal_stop(handle);
281 ext4_orphan_del(NULL, inode);
282 sb_end_intwrite(inode->i_sb);
283 goto no_delete;
284 }
285 }
286
287
288
289
290
291
292
293
294
295 ext4_orphan_del(handle, inode);
296 EXT4_I(inode)->i_dtime = get_seconds();
297
298
299
300
301
302
303
304
305 if (ext4_mark_inode_dirty(handle, inode))
306
307 ext4_clear_inode(inode);
308 else
309 ext4_free_inode(handle, inode);
310 ext4_journal_stop(handle);
311 sb_end_intwrite(inode->i_sb);
312 return;
313no_delete:
314 ext4_clear_inode(inode);
315}
316
317#ifdef CONFIG_QUOTA
318qsize_t *ext4_get_reserved_space(struct inode *inode)
319{
320 return &EXT4_I(inode)->i_reserved_quota;
321}
322#endif
323
324
325
326
327
328static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
329{
330 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
331 return ext4_ext_calc_metadata_amount(inode, lblock);
332
333 return ext4_ind_calc_metadata_amount(inode, lblock);
334}
335
336
337
338
339
340void ext4_da_update_reserve_space(struct inode *inode,
341 int used, int quota_claim)
342{
343 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
344 struct ext4_inode_info *ei = EXT4_I(inode);
345
346 spin_lock(&ei->i_block_reservation_lock);
347 trace_ext4_da_update_reserve_space(inode, used, quota_claim);
348 if (unlikely(used > ei->i_reserved_data_blocks)) {
349 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
350 "with only %d reserved data blocks",
351 __func__, inode->i_ino, used,
352 ei->i_reserved_data_blocks);
353 WARN_ON(1);
354 used = ei->i_reserved_data_blocks;
355 }
356
357 if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
358 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d "
359 "with only %d reserved metadata blocks\n", __func__,
360 inode->i_ino, ei->i_allocated_meta_blocks,
361 ei->i_reserved_meta_blocks);
362 WARN_ON(1);
363 ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
364 }
365
366
367 ei->i_reserved_data_blocks -= used;
368 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
369 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
370 used + ei->i_allocated_meta_blocks);
371 ei->i_allocated_meta_blocks = 0;
372
373 if (ei->i_reserved_data_blocks == 0) {
374
375
376
377
378
379 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
380 ei->i_reserved_meta_blocks);
381 ei->i_reserved_meta_blocks = 0;
382 ei->i_da_metadata_calc_len = 0;
383 }
384 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
385
386
387 if (quota_claim)
388 dquot_claim_block(inode, EXT4_C2B(sbi, used));
389 else {
390
391
392
393
394
395 dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
396 }
397
398
399
400
401
402
403 if ((ei->i_reserved_data_blocks == 0) &&
404 (atomic_read(&inode->i_writecount) == 0))
405 ext4_discard_preallocations(inode);
406}
407
408static int __check_block_validity(struct inode *inode, const char *func,
409 unsigned int line,
410 struct ext4_map_blocks *map)
411{
412 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
413 map->m_len)) {
414 ext4_error_inode(inode, func, line, map->m_pblk,
415 "lblock %lu mapped to illegal pblock "
416 "(length %d)", (unsigned long) map->m_lblk,
417 map->m_len);
418 return -EIO;
419 }
420 return 0;
421}
422
423#define check_block_validity(inode, map) \
424 __check_block_validity((inode), __func__, __LINE__, (map))
425
426
427
428
429
430static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
431 unsigned int max_pages)
432{
433 struct address_space *mapping = inode->i_mapping;
434 pgoff_t index;
435 struct pagevec pvec;
436 pgoff_t num = 0;
437 int i, nr_pages, done = 0;
438
439 if (max_pages == 0)
440 return 0;
441 pagevec_init(&pvec, 0);
442 while (!done) {
443 index = idx;
444 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
445 PAGECACHE_TAG_DIRTY,
446 (pgoff_t)PAGEVEC_SIZE);
447 if (nr_pages == 0)
448 break;
449 for (i = 0; i < nr_pages; i++) {
450 struct page *page = pvec.pages[i];
451 struct buffer_head *bh, *head;
452
453 lock_page(page);
454 if (unlikely(page->mapping != mapping) ||
455 !PageDirty(page) ||
456 PageWriteback(page) ||
457 page->index != idx) {
458 done = 1;
459 unlock_page(page);
460 break;
461 }
462 if (page_has_buffers(page)) {
463 bh = head = page_buffers(page);
464 do {
465 if (!buffer_delay(bh) &&
466 !buffer_unwritten(bh))
467 done = 1;
468 bh = bh->b_this_page;
469 } while (!done && (bh != head));
470 }
471 unlock_page(page);
472 if (done)
473 break;
474 idx++;
475 num++;
476 if (num >= max_pages) {
477 done = 1;
478 break;
479 }
480 }
481 pagevec_release(&pvec);
482 }
483 return num;
484}
485
486
487
488
489static void set_buffers_da_mapped(struct inode *inode,
490 struct ext4_map_blocks *map)
491{
492 struct address_space *mapping = inode->i_mapping;
493 struct pagevec pvec;
494 int i, nr_pages;
495 pgoff_t index, end;
496
497 index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
498 end = (map->m_lblk + map->m_len - 1) >>
499 (PAGE_CACHE_SHIFT - inode->i_blkbits);
500
501 pagevec_init(&pvec, 0);
502 while (index <= end) {
503 nr_pages = pagevec_lookup(&pvec, mapping, index,
504 min(end - index + 1,
505 (pgoff_t)PAGEVEC_SIZE));
506 if (nr_pages == 0)
507 break;
508 for (i = 0; i < nr_pages; i++) {
509 struct page *page = pvec.pages[i];
510 struct buffer_head *bh, *head;
511
512 if (unlikely(page->mapping != mapping) ||
513 !PageDirty(page))
514 break;
515
516 if (page_has_buffers(page)) {
517 bh = head = page_buffers(page);
518 do {
519 set_buffer_da_mapped(bh);
520 bh = bh->b_this_page;
521 } while (bh != head);
522 }
523 index++;
524 }
525 pagevec_release(&pvec);
526 }
527}
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551int ext4_map_blocks(handle_t *handle, struct inode *inode,
552 struct ext4_map_blocks *map, int flags)
553{
554 int retval;
555
556 map->m_flags = 0;
557 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
558 "logical block %lu\n", inode->i_ino, flags, map->m_len,
559 (unsigned long) map->m_lblk);
560
561
562
563
564 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
565 down_read((&EXT4_I(inode)->i_data_sem));
566 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
567 retval = ext4_ext_map_blocks(handle, inode, map, flags &
568 EXT4_GET_BLOCKS_KEEP_SIZE);
569 } else {
570 retval = ext4_ind_map_blocks(handle, inode, map, flags &
571 EXT4_GET_BLOCKS_KEEP_SIZE);
572 }
573 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
574 up_read((&EXT4_I(inode)->i_data_sem));
575
576 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
577 int ret = check_block_validity(inode, map);
578 if (ret != 0)
579 return ret;
580 }
581
582
583 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
584 return retval;
585
586
587
588
589
590
591
592
593 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
594 return retval;
595
596
597
598
599
600
601
602
603
604
605
606 map->m_flags &= ~EXT4_MAP_UNWRITTEN;
607
608
609
610
611
612
613
614 down_write((&EXT4_I(inode)->i_data_sem));
615
616
617
618
619
620
621
622 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
623 ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
624
625
626
627
628 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
629 retval = ext4_ext_map_blocks(handle, inode, map, flags);
630 } else {
631 retval = ext4_ind_map_blocks(handle, inode, map, flags);
632
633 if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
634
635
636
637
638
639 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
640 }
641
642
643
644
645
646
647
648 if ((retval > 0) &&
649 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
650 ext4_da_update_reserve_space(inode, retval, 1);
651 }
652 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
653 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
654
655
656
657
658
659 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
660 set_buffers_da_mapped(inode, map);
661 }
662
663 up_write((&EXT4_I(inode)->i_data_sem));
664 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
665 int ret = check_block_validity(inode, map);
666 if (ret != 0)
667 return ret;
668 }
669 return retval;
670}
671
672
673#define DIO_MAX_BLOCKS 4096
674
675static int _ext4_get_block(struct inode *inode, sector_t iblock,
676 struct buffer_head *bh, int flags)
677{
678 handle_t *handle = ext4_journal_current_handle();
679 struct ext4_map_blocks map;
680 int ret = 0, started = 0;
681 int dio_credits;
682
683 map.m_lblk = iblock;
684 map.m_len = bh->b_size >> inode->i_blkbits;
685
686 if (flags && !handle) {
687
688 if (map.m_len > DIO_MAX_BLOCKS)
689 map.m_len = DIO_MAX_BLOCKS;
690 dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
691 handle = ext4_journal_start(inode, dio_credits);
692 if (IS_ERR(handle)) {
693 ret = PTR_ERR(handle);
694 return ret;
695 }
696 started = 1;
697 }
698
699 ret = ext4_map_blocks(handle, inode, &map, flags);
700 if (ret > 0) {
701 map_bh(bh, inode->i_sb, map.m_pblk);
702 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
703 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
704 ret = 0;
705 }
706 if (started)
707 ext4_journal_stop(handle);
708 return ret;
709}
710
711int ext4_get_block(struct inode *inode, sector_t iblock,
712 struct buffer_head *bh, int create)
713{
714 return _ext4_get_block(inode, iblock, bh,
715 create ? EXT4_GET_BLOCKS_CREATE : 0);
716}
717
718
719
720
721struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
722 ext4_lblk_t block, int create, int *errp)
723{
724 struct ext4_map_blocks map;
725 struct buffer_head *bh;
726 int fatal = 0, err;
727
728 J_ASSERT(handle != NULL || create == 0);
729
730 map.m_lblk = block;
731 map.m_len = 1;
732 err = ext4_map_blocks(handle, inode, &map,
733 create ? EXT4_GET_BLOCKS_CREATE : 0);
734
735 if (err < 0)
736 *errp = err;
737 if (err <= 0)
738 return NULL;
739 *errp = 0;
740
741 bh = sb_getblk(inode->i_sb, map.m_pblk);
742 if (!bh) {
743 *errp = -EIO;
744 return NULL;
745 }
746 if (map.m_flags & EXT4_MAP_NEW) {
747 J_ASSERT(create != 0);
748 J_ASSERT(handle != NULL);
749
750
751
752
753
754
755
756
757 lock_buffer(bh);
758 BUFFER_TRACE(bh, "call get_create_access");
759 fatal = ext4_journal_get_create_access(handle, bh);
760 if (!fatal && !buffer_uptodate(bh)) {
761 memset(bh->b_data, 0, inode->i_sb->s_blocksize);
762 set_buffer_uptodate(bh);
763 }
764 unlock_buffer(bh);
765 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
766 err = ext4_handle_dirty_metadata(handle, inode, bh);
767 if (!fatal)
768 fatal = err;
769 } else {
770 BUFFER_TRACE(bh, "not a new buffer");
771 }
772 if (fatal) {
773 *errp = fatal;
774 brelse(bh);
775 bh = NULL;
776 }
777 return bh;
778}
779
780struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
781 ext4_lblk_t block, int create, int *err)
782{
783 struct buffer_head *bh;
784
785 bh = ext4_getblk(handle, inode, block, create, err);
786 if (!bh)
787 return bh;
788 if (buffer_uptodate(bh))
789 return bh;
790 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
791 wait_on_buffer(bh);
792 if (buffer_uptodate(bh))
793 return bh;
794 put_bh(bh);
795 *err = -EIO;
796 return NULL;
797}
798
799static int walk_page_buffers(handle_t *handle,
800 struct buffer_head *head,
801 unsigned from,
802 unsigned to,
803 int *partial,
804 int (*fn)(handle_t *handle,
805 struct buffer_head *bh))
806{
807 struct buffer_head *bh;
808 unsigned block_start, block_end;
809 unsigned blocksize = head->b_size;
810 int err, ret = 0;
811 struct buffer_head *next;
812
813 for (bh = head, block_start = 0;
814 ret == 0 && (bh != head || !block_start);
815 block_start = block_end, bh = next) {
816 next = bh->b_this_page;
817 block_end = block_start + blocksize;
818 if (block_end <= from || block_start >= to) {
819 if (partial && !buffer_uptodate(bh))
820 *partial = 1;
821 continue;
822 }
823 err = (*fn)(handle, bh);
824 if (!ret)
825 ret = err;
826 }
827 return ret;
828}
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855static int do_journal_get_write_access(handle_t *handle,
856 struct buffer_head *bh)
857{
858 int dirty = buffer_dirty(bh);
859 int ret;
860
861 if (!buffer_mapped(bh) || buffer_freed(bh))
862 return 0;
863
864
865
866
867
868
869
870
871 if (dirty)
872 clear_buffer_dirty(bh);
873 ret = ext4_journal_get_write_access(handle, bh);
874 if (!ret && dirty)
875 ret = ext4_handle_dirty_metadata(handle, NULL, bh);
876 return ret;
877}
878
879static int ext4_get_block_write(struct inode *inode, sector_t iblock,
880 struct buffer_head *bh_result, int create);
881static int ext4_write_begin(struct file *file, struct address_space *mapping,
882 loff_t pos, unsigned len, unsigned flags,
883 struct page **pagep, void **fsdata)
884{
885 struct inode *inode = mapping->host;
886 int ret, needed_blocks;
887 handle_t *handle;
888 int retries = 0;
889 struct page *page;
890 pgoff_t index;
891 unsigned from, to;
892
893 trace_ext4_write_begin(inode, pos, len, flags);
894
895
896
897
898 needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
899 index = pos >> PAGE_CACHE_SHIFT;
900 from = pos & (PAGE_CACHE_SIZE - 1);
901 to = from + len;
902
903retry:
904 handle = ext4_journal_start(inode, needed_blocks);
905 if (IS_ERR(handle)) {
906 ret = PTR_ERR(handle);
907 goto out;
908 }
909
910
911
912 flags |= AOP_FLAG_NOFS;
913
914 page = grab_cache_page_write_begin(mapping, index, flags);
915 if (!page) {
916 ext4_journal_stop(handle);
917 ret = -ENOMEM;
918 goto out;
919 }
920 *pagep = page;
921
922 if (ext4_should_dioread_nolock(inode))
923 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
924 else
925 ret = __block_write_begin(page, pos, len, ext4_get_block);
926
927 if (!ret && ext4_should_journal_data(inode)) {
928 ret = walk_page_buffers(handle, page_buffers(page),
929 from, to, NULL, do_journal_get_write_access);
930 }
931
932 if (ret) {
933 unlock_page(page);
934 page_cache_release(page);
935
936
937
938
939
940
941
942
943 if (pos + len > inode->i_size && ext4_can_truncate(inode))
944 ext4_orphan_add(handle, inode);
945
946 ext4_journal_stop(handle);
947 if (pos + len > inode->i_size) {
948 ext4_truncate_failed_write(inode);
949
950
951
952
953
954
955 if (inode->i_nlink)
956 ext4_orphan_del(NULL, inode);
957 }
958 }
959
960 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
961 goto retry;
962out:
963 return ret;
964}
965
966
967static int write_end_fn(handle_t *handle, struct buffer_head *bh)
968{
969 if (!buffer_mapped(bh) || buffer_freed(bh))
970 return 0;
971 set_buffer_uptodate(bh);
972 return ext4_handle_dirty_metadata(handle, NULL, bh);
973}
974
975static int ext4_generic_write_end(struct file *file,
976 struct address_space *mapping,
977 loff_t pos, unsigned len, unsigned copied,
978 struct page *page, void *fsdata)
979{
980 int i_size_changed = 0;
981 struct inode *inode = mapping->host;
982 handle_t *handle = ext4_journal_current_handle();
983
984 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
985
986
987
988
989
990
991
992
993 if (pos + copied > inode->i_size) {
994 i_size_write(inode, pos + copied);
995 i_size_changed = 1;
996 }
997
998 if (pos + copied > EXT4_I(inode)->i_disksize) {
999
1000
1001
1002
1003 ext4_update_i_disksize(inode, (pos + copied));
1004 i_size_changed = 1;
1005 }
1006 unlock_page(page);
1007 page_cache_release(page);
1008
1009
1010
1011
1012
1013
1014
1015 if (i_size_changed)
1016 ext4_mark_inode_dirty(handle, inode);
1017
1018 return copied;
1019}
1020
1021
1022
1023
1024
1025
1026
1027
1028static int ext4_ordered_write_end(struct file *file,
1029 struct address_space *mapping,
1030 loff_t pos, unsigned len, unsigned copied,
1031 struct page *page, void *fsdata)
1032{
1033 handle_t *handle = ext4_journal_current_handle();
1034 struct inode *inode = mapping->host;
1035 int ret = 0, ret2;
1036
1037 trace_ext4_ordered_write_end(inode, pos, len, copied);
1038 ret = ext4_jbd2_file_inode(handle, inode);
1039
1040 if (ret == 0) {
1041 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1042 page, fsdata);
1043 copied = ret2;
1044 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1045
1046
1047
1048
1049 ext4_orphan_add(handle, inode);
1050 if (ret2 < 0)
1051 ret = ret2;
1052 } else {
1053 unlock_page(page);
1054 page_cache_release(page);
1055 }
1056
1057 ret2 = ext4_journal_stop(handle);
1058 if (!ret)
1059 ret = ret2;
1060
1061 if (pos + len > inode->i_size) {
1062 ext4_truncate_failed_write(inode);
1063
1064
1065
1066
1067
1068 if (inode->i_nlink)
1069 ext4_orphan_del(NULL, inode);
1070 }
1071
1072
1073 return ret ? ret : copied;
1074}
1075
1076static int ext4_writeback_write_end(struct file *file,
1077 struct address_space *mapping,
1078 loff_t pos, unsigned len, unsigned copied,
1079 struct page *page, void *fsdata)
1080{
1081 handle_t *handle = ext4_journal_current_handle();
1082 struct inode *inode = mapping->host;
1083 int ret = 0, ret2;
1084
1085 trace_ext4_writeback_write_end(inode, pos, len, copied);
1086 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1087 page, fsdata);
1088 copied = ret2;
1089 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1090
1091
1092
1093
1094 ext4_orphan_add(handle, inode);
1095
1096 if (ret2 < 0)
1097 ret = ret2;
1098
1099 ret2 = ext4_journal_stop(handle);
1100 if (!ret)
1101 ret = ret2;
1102
1103 if (pos + len > inode->i_size) {
1104 ext4_truncate_failed_write(inode);
1105
1106
1107
1108
1109
1110 if (inode->i_nlink)
1111 ext4_orphan_del(NULL, inode);
1112 }
1113
1114 return ret ? ret : copied;
1115}
1116
1117static int ext4_journalled_write_end(struct file *file,
1118 struct address_space *mapping,
1119 loff_t pos, unsigned len, unsigned copied,
1120 struct page *page, void *fsdata)
1121{
1122 handle_t *handle = ext4_journal_current_handle();
1123 struct inode *inode = mapping->host;
1124 int ret = 0, ret2;
1125 int partial = 0;
1126 unsigned from, to;
1127 loff_t new_i_size;
1128
1129 trace_ext4_journalled_write_end(inode, pos, len, copied);
1130 from = pos & (PAGE_CACHE_SIZE - 1);
1131 to = from + len;
1132
1133 BUG_ON(!ext4_handle_valid(handle));
1134
1135 if (copied < len) {
1136 if (!PageUptodate(page))
1137 copied = 0;
1138 page_zero_new_buffers(page, from+copied, to);
1139 }
1140
1141 ret = walk_page_buffers(handle, page_buffers(page), from,
1142 to, &partial, write_end_fn);
1143 if (!partial)
1144 SetPageUptodate(page);
1145 new_i_size = pos + copied;
1146 if (new_i_size > inode->i_size)
1147 i_size_write(inode, pos+copied);
1148 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1149 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1150 if (new_i_size > EXT4_I(inode)->i_disksize) {
1151 ext4_update_i_disksize(inode, new_i_size);
1152 ret2 = ext4_mark_inode_dirty(handle, inode);
1153 if (!ret)
1154 ret = ret2;
1155 }
1156
1157 unlock_page(page);
1158 page_cache_release(page);
1159 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1160
1161
1162
1163
1164 ext4_orphan_add(handle, inode);
1165
1166 ret2 = ext4_journal_stop(handle);
1167 if (!ret)
1168 ret = ret2;
1169 if (pos + len > inode->i_size) {
1170 ext4_truncate_failed_write(inode);
1171
1172
1173
1174
1175
1176 if (inode->i_nlink)
1177 ext4_orphan_del(NULL, inode);
1178 }
1179
1180 return ret ? ret : copied;
1181}
1182
1183
1184
1185
1186static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1187{
1188 int retries = 0;
1189 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1190 struct ext4_inode_info *ei = EXT4_I(inode);
1191 unsigned int md_needed;
1192 int ret;
1193 ext4_lblk_t save_last_lblock;
1194 int save_len;
1195
1196
1197
1198
1199
1200
1201 ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
1202 if (ret)
1203 return ret;
1204
1205
1206
1207
1208
1209
1210repeat:
1211 spin_lock(&ei->i_block_reservation_lock);
1212
1213
1214
1215
1216 save_len = ei->i_da_metadata_calc_len;
1217 save_last_lblock = ei->i_da_metadata_calc_last_lblock;
1218 md_needed = EXT4_NUM_B2C(sbi,
1219 ext4_calc_metadata_amount(inode, lblock));
1220 trace_ext4_da_reserve_space(inode, md_needed);
1221
1222
1223
1224
1225
1226 if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
1227 ei->i_da_metadata_calc_len = save_len;
1228 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1229 spin_unlock(&ei->i_block_reservation_lock);
1230 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1231 yield();
1232 goto repeat;
1233 }
1234 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1235 return -ENOSPC;
1236 }
1237 ei->i_reserved_data_blocks++;
1238 ei->i_reserved_meta_blocks += md_needed;
1239 spin_unlock(&ei->i_block_reservation_lock);
1240
1241 return 0;
1242}
1243
1244static void ext4_da_release_space(struct inode *inode, int to_free)
1245{
1246 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1247 struct ext4_inode_info *ei = EXT4_I(inode);
1248
1249 if (!to_free)
1250 return;
1251
1252 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1253
1254 trace_ext4_da_release_space(inode, to_free);
1255 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1256
1257
1258
1259
1260
1261
1262 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1263 "ino %lu, to_free %d with only %d reserved "
1264 "data blocks", inode->i_ino, to_free,
1265 ei->i_reserved_data_blocks);
1266 WARN_ON(1);
1267 to_free = ei->i_reserved_data_blocks;
1268 }
1269 ei->i_reserved_data_blocks -= to_free;
1270
1271 if (ei->i_reserved_data_blocks == 0) {
1272
1273
1274
1275
1276
1277
1278
1279 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
1280 ei->i_reserved_meta_blocks);
1281 ei->i_reserved_meta_blocks = 0;
1282 ei->i_da_metadata_calc_len = 0;
1283 }
1284
1285
1286 percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
1287
1288 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1289
1290 dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
1291}
1292
1293static void ext4_da_page_release_reservation(struct page *page,
1294 unsigned long offset)
1295{
1296 int to_release = 0;
1297 struct buffer_head *head, *bh;
1298 unsigned int curr_off = 0;
1299 struct inode *inode = page->mapping->host;
1300 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1301 int num_clusters;
1302
1303 head = page_buffers(page);
1304 bh = head;
1305 do {
1306 unsigned int next_off = curr_off + bh->b_size;
1307
1308 if ((offset <= curr_off) && (buffer_delay(bh))) {
1309 to_release++;
1310 clear_buffer_delay(bh);
1311 clear_buffer_da_mapped(bh);
1312 }
1313 curr_off = next_off;
1314 } while ((bh = bh->b_this_page) != head);
1315
1316
1317
1318 num_clusters = EXT4_NUM_B2C(sbi, to_release);
1319 while (num_clusters > 0) {
1320 ext4_fsblk_t lblk;
1321 lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
1322 ((num_clusters - 1) << sbi->s_cluster_bits);
1323 if (sbi->s_cluster_ratio == 1 ||
1324 !ext4_find_delalloc_cluster(inode, lblk, 1))
1325 ext4_da_release_space(inode, 1);
1326
1327 num_clusters--;
1328 }
1329}
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348static int mpage_da_submit_io(struct mpage_da_data *mpd,
1349 struct ext4_map_blocks *map)
1350{
1351 struct pagevec pvec;
1352 unsigned long index, end;
1353 int ret = 0, err, nr_pages, i;
1354 struct inode *inode = mpd->inode;
1355 struct address_space *mapping = inode->i_mapping;
1356 loff_t size = i_size_read(inode);
1357 unsigned int len, block_start;
1358 struct buffer_head *bh, *page_bufs = NULL;
1359 int journal_data = ext4_should_journal_data(inode);
1360 sector_t pblock = 0, cur_logical = 0;
1361 struct ext4_io_submit io_submit;
1362
1363 BUG_ON(mpd->next_page <= mpd->first_page);
1364 memset(&io_submit, 0, sizeof(io_submit));
1365
1366
1367
1368
1369
1370
1371 index = mpd->first_page;
1372 end = mpd->next_page - 1;
1373
1374 pagevec_init(&pvec, 0);
1375 while (index <= end) {
1376 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
1377 if (nr_pages == 0)
1378 break;
1379 for (i = 0; i < nr_pages; i++) {
1380 int commit_write = 0, skip_page = 0;
1381 struct page *page = pvec.pages[i];
1382
1383 index = page->index;
1384 if (index > end)
1385 break;
1386
1387 if (index == size >> PAGE_CACHE_SHIFT)
1388 len = size & ~PAGE_CACHE_MASK;
1389 else
1390 len = PAGE_CACHE_SIZE;
1391 if (map) {
1392 cur_logical = index << (PAGE_CACHE_SHIFT -
1393 inode->i_blkbits);
1394 pblock = map->m_pblk + (cur_logical -
1395 map->m_lblk);
1396 }
1397 index++;
1398
1399 BUG_ON(!PageLocked(page));
1400 BUG_ON(PageWriteback(page));
1401
1402
1403
1404
1405
1406
1407
1408 if (!page_has_buffers(page)) {
1409 if (__block_write_begin(page, 0, len,
1410 noalloc_get_block_write)) {
1411 skip_page:
1412 unlock_page(page);
1413 continue;
1414 }
1415 commit_write = 1;
1416 }
1417
1418 bh = page_bufs = page_buffers(page);
1419 block_start = 0;
1420 do {
1421 if (!bh)
1422 goto skip_page;
1423 if (map && (cur_logical >= map->m_lblk) &&
1424 (cur_logical <= (map->m_lblk +
1425 (map->m_len - 1)))) {
1426 if (buffer_delay(bh)) {
1427 clear_buffer_delay(bh);
1428 bh->b_blocknr = pblock;
1429 }
1430 if (buffer_da_mapped(bh))
1431 clear_buffer_da_mapped(bh);
1432 if (buffer_unwritten(bh) ||
1433 buffer_mapped(bh))
1434 BUG_ON(bh->b_blocknr != pblock);
1435 if (map->m_flags & EXT4_MAP_UNINIT)
1436 set_buffer_uninit(bh);
1437 clear_buffer_unwritten(bh);
1438 }
1439
1440
1441
1442
1443
1444 if (ext4_bh_delay_or_unwritten(NULL, bh))
1445 skip_page = 1;
1446 bh = bh->b_this_page;
1447 block_start += bh->b_size;
1448 cur_logical++;
1449 pblock++;
1450 } while (bh != page_bufs);
1451
1452 if (skip_page)
1453 goto skip_page;
1454
1455 if (commit_write)
1456
1457 block_commit_write(page, 0, len);
1458
1459 clear_page_dirty_for_io(page);
1460
1461
1462
1463
1464
1465 if (unlikely(journal_data && PageChecked(page)))
1466 err = __ext4_journalled_writepage(page, len);
1467 else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
1468 err = ext4_bio_write_page(&io_submit, page,
1469 len, mpd->wbc);
1470 else if (buffer_uninit(page_bufs)) {
1471 ext4_set_bh_endio(page_bufs, inode);
1472 err = block_write_full_page_endio(page,
1473 noalloc_get_block_write,
1474 mpd->wbc, ext4_end_io_buffer_write);
1475 } else
1476 err = block_write_full_page(page,
1477 noalloc_get_block_write, mpd->wbc);
1478
1479 if (!err)
1480 mpd->pages_written++;
1481
1482
1483
1484
1485 if (ret == 0)
1486 ret = err;
1487 }
1488 pagevec_release(&pvec);
1489 }
1490 ext4_io_submit(&io_submit);
1491 return ret;
1492}
1493
1494static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
1495{
1496 int nr_pages, i;
1497 pgoff_t index, end;
1498 struct pagevec pvec;
1499 struct inode *inode = mpd->inode;
1500 struct address_space *mapping = inode->i_mapping;
1501
1502 index = mpd->first_page;
1503 end = mpd->next_page - 1;
1504 while (index <= end) {
1505 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
1506 if (nr_pages == 0)
1507 break;
1508 for (i = 0; i < nr_pages; i++) {
1509 struct page *page = pvec.pages[i];
1510 if (page->index > end)
1511 break;
1512 BUG_ON(!PageLocked(page));
1513 BUG_ON(PageWriteback(page));
1514 block_invalidatepage(page, 0);
1515 ClearPageUptodate(page);
1516 unlock_page(page);
1517 }
1518 index = pvec.pages[nr_pages - 1]->index + 1;
1519 pagevec_release(&pvec);
1520 }
1521 return;
1522}
1523
1524static void ext4_print_free_blocks(struct inode *inode)
1525{
1526 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1527 struct super_block *sb = inode->i_sb;
1528
1529 ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld",
1530 EXT4_C2B(EXT4_SB(inode->i_sb),
1531 ext4_count_free_clusters(inode->i_sb)));
1532 ext4_msg(sb, KERN_CRIT, "Free/Dirty block details");
1533 ext4_msg(sb, KERN_CRIT, "free_blocks=%lld",
1534 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1535 percpu_counter_sum(&sbi->s_freeclusters_counter)));
1536 ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld",
1537 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1538 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
1539 ext4_msg(sb, KERN_CRIT, "Block reservation details");
1540 ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
1541 EXT4_I(inode)->i_reserved_data_blocks);
1542 ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
1543 EXT4_I(inode)->i_reserved_meta_blocks);
1544 return;
1545}
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1557{
1558 int err, blks, get_blocks_flags;
1559 struct ext4_map_blocks map, *mapp = NULL;
1560 sector_t next = mpd->b_blocknr;
1561 unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
1562 loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
1563 handle_t *handle = NULL;
1564
1565
1566
1567
1568
1569 if ((mpd->b_size == 0) ||
1570 ((mpd->b_state & (1 << BH_Mapped)) &&
1571 !(mpd->b_state & (1 << BH_Delay)) &&
1572 !(mpd->b_state & (1 << BH_Unwritten))))
1573 goto submit_io;
1574
1575 handle = ext4_journal_current_handle();
1576 BUG_ON(!handle);
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596 map.m_lblk = next;
1597 map.m_len = max_blocks;
1598 get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
1599 if (ext4_should_dioread_nolock(mpd->inode))
1600 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
1601 if (mpd->b_state & (1 << BH_Delay))
1602 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
1603
1604 blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
1605 if (blks < 0) {
1606 struct super_block *sb = mpd->inode->i_sb;
1607
1608 err = blks;
1609
1610
1611
1612
1613
1614 if (err == -EAGAIN)
1615 goto submit_io;
1616
1617 if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
1618 mpd->retval = err;
1619 goto submit_io;
1620 }
1621
1622
1623
1624
1625
1626
1627
1628
1629 if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
1630 ext4_msg(sb, KERN_CRIT,
1631 "delayed block allocation failed for inode %lu "
1632 "at logical offset %llu with max blocks %zd "
1633 "with error %d", mpd->inode->i_ino,
1634 (unsigned long long) next,
1635 mpd->b_size >> mpd->inode->i_blkbits, err);
1636 ext4_msg(sb, KERN_CRIT,
1637 "This should not happen!! Data will be lost\n");
1638 if (err == -ENOSPC)
1639 ext4_print_free_blocks(mpd->inode);
1640 }
1641
1642 ext4_da_block_invalidatepages(mpd);
1643
1644
1645 mpd->io_done = 1;
1646 return;
1647 }
1648 BUG_ON(blks == 0);
1649
1650 mapp = ↦
1651 if (map.m_flags & EXT4_MAP_NEW) {
1652 struct block_device *bdev = mpd->inode->i_sb->s_bdev;
1653 int i;
1654
1655 for (i = 0; i < map.m_len; i++)
1656 unmap_underlying_metadata(bdev, map.m_pblk + i);
1657
1658 if (ext4_should_order_data(mpd->inode)) {
1659 err = ext4_jbd2_file_inode(handle, mpd->inode);
1660 if (err) {
1661
1662 mpd->retval = err;
1663 goto submit_io;
1664 }
1665 }
1666 }
1667
1668
1669
1670
1671 disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
1672 if (disksize > i_size_read(mpd->inode))
1673 disksize = i_size_read(mpd->inode);
1674 if (disksize > EXT4_I(mpd->inode)->i_disksize) {
1675 ext4_update_i_disksize(mpd->inode, disksize);
1676 err = ext4_mark_inode_dirty(handle, mpd->inode);
1677 if (err)
1678 ext4_error(mpd->inode->i_sb,
1679 "Failed to mark inode %lu dirty",
1680 mpd->inode->i_ino);
1681 }
1682
1683submit_io:
1684 mpage_da_submit_io(mpd, mapp);
1685 mpd->io_done = 1;
1686}
1687
1688#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
1689 (1 << BH_Delay) | (1 << BH_Unwritten))
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1701 sector_t logical, size_t b_size,
1702 unsigned long b_state)
1703{
1704 sector_t next;
1705 int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
1706
1707
1708
1709
1710
1711
1712
1713 if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
1714 goto flush_it;
1715
1716
1717 if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) {
1718 if (nrblocks >= EXT4_MAX_TRANS_DATA) {
1719
1720
1721
1722
1723
1724
1725 goto flush_it;
1726 } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
1727 EXT4_MAX_TRANS_DATA) {
1728
1729
1730
1731
1732
1733 b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
1734 mpd->inode->i_blkbits;
1735
1736 }
1737 }
1738
1739
1740
1741 if (mpd->b_size == 0) {
1742 mpd->b_blocknr = logical;
1743 mpd->b_size = b_size;
1744 mpd->b_state = b_state & BH_FLAGS;
1745 return;
1746 }
1747
1748 next = mpd->b_blocknr + nrblocks;
1749
1750
1751
1752 if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
1753 mpd->b_size += b_size;
1754 return;
1755 }
1756
1757flush_it:
1758
1759
1760
1761
1762 mpage_da_map_and_submit(mpd);
1763 return;
1764}
1765
1766static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
1767{
1768 return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
1769}
1770
1771
1772
1773
1774
1775
1776
1777static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1778 struct ext4_map_blocks *map,
1779 struct buffer_head *bh)
1780{
1781 int retval;
1782 sector_t invalid_block = ~((sector_t) 0xffff);
1783
1784 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1785 invalid_block = ~0;
1786
1787 map->m_flags = 0;
1788 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
1789 "logical block %lu\n", inode->i_ino, map->m_len,
1790 (unsigned long) map->m_lblk);
1791
1792
1793
1794
1795 down_read((&EXT4_I(inode)->i_data_sem));
1796 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1797 retval = ext4_ext_map_blocks(NULL, inode, map, 0);
1798 else
1799 retval = ext4_ind_map_blocks(NULL, inode, map, 0);
1800
1801 if (retval == 0) {
1802
1803
1804
1805
1806
1807
1808 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1809 retval = ext4_da_reserve_space(inode, iblock);
1810 if (retval)
1811
1812 goto out_unlock;
1813 }
1814
1815
1816
1817
1818 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
1819
1820 map_bh(bh, inode->i_sb, invalid_block);
1821 set_buffer_new(bh);
1822 set_buffer_delay(bh);
1823 }
1824
1825out_unlock:
1826 up_read((&EXT4_I(inode)->i_data_sem));
1827
1828 return retval;
1829}
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1844 struct buffer_head *bh, int create)
1845{
1846 struct ext4_map_blocks map;
1847 int ret = 0;
1848
1849 BUG_ON(create == 0);
1850 BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
1851
1852 map.m_lblk = iblock;
1853 map.m_len = 1;
1854
1855
1856
1857
1858
1859
1860 ret = ext4_da_map_blocks(inode, iblock, &map, bh);
1861 if (ret <= 0)
1862 return ret;
1863
1864 map_bh(bh, inode->i_sb, map.m_pblk);
1865 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
1866
1867 if (buffer_unwritten(bh)) {
1868
1869
1870
1871
1872
1873
1874 set_buffer_new(bh);
1875 set_buffer_mapped(bh);
1876 }
1877 return 0;
1878}
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
1895 struct buffer_head *bh_result, int create)
1896{
1897 BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
1898 return _ext4_get_block(inode, iblock, bh_result, 0);
1899}
1900
1901static int bget_one(handle_t *handle, struct buffer_head *bh)
1902{
1903 get_bh(bh);
1904 return 0;
1905}
1906
1907static int bput_one(handle_t *handle, struct buffer_head *bh)
1908{
1909 put_bh(bh);
1910 return 0;
1911}
1912
1913static int __ext4_journalled_writepage(struct page *page,
1914 unsigned int len)
1915{
1916 struct address_space *mapping = page->mapping;
1917 struct inode *inode = mapping->host;
1918 struct buffer_head *page_bufs;
1919 handle_t *handle = NULL;
1920 int ret = 0;
1921 int err;
1922
1923 ClearPageChecked(page);
1924 page_bufs = page_buffers(page);
1925 BUG_ON(!page_bufs);
1926 walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
1927
1928
1929 unlock_page(page);
1930
1931 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
1932 if (IS_ERR(handle)) {
1933 ret = PTR_ERR(handle);
1934 goto out;
1935 }
1936
1937 BUG_ON(!ext4_handle_valid(handle));
1938
1939 ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
1940 do_journal_get_write_access);
1941
1942 err = walk_page_buffers(handle, page_bufs, 0, len, NULL,
1943 write_end_fn);
1944 if (ret == 0)
1945 ret = err;
1946 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1947 err = ext4_journal_stop(handle);
1948 if (!ret)
1949 ret = err;
1950
1951 walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
1952 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1953out:
1954 return ret;
1955}
1956
1957static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
1958static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001static int ext4_writepage(struct page *page,
2002 struct writeback_control *wbc)
2003{
2004 int ret = 0, commit_write = 0;
2005 loff_t size;
2006 unsigned int len;
2007 struct buffer_head *page_bufs = NULL;
2008 struct inode *inode = page->mapping->host;
2009
2010 trace_ext4_writepage(page);
2011 size = i_size_read(inode);
2012 if (page->index == size >> PAGE_CACHE_SHIFT)
2013 len = size & ~PAGE_CACHE_MASK;
2014 else
2015 len = PAGE_CACHE_SIZE;
2016
2017
2018
2019
2020
2021
2022 if (!page_has_buffers(page)) {
2023 if (__block_write_begin(page, 0, len,
2024 noalloc_get_block_write)) {
2025 redirty_page:
2026 redirty_page_for_writepage(wbc, page);
2027 unlock_page(page);
2028 return 0;
2029 }
2030 commit_write = 1;
2031 }
2032 page_bufs = page_buffers(page);
2033 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2034 ext4_bh_delay_or_unwritten)) {
2035
2036
2037
2038
2039
2040
2041
2042
2043 WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
2044 PF_MEMALLOC);
2045 goto redirty_page;
2046 }
2047 if (commit_write)
2048
2049 block_commit_write(page, 0, len);
2050
2051 if (PageChecked(page) && ext4_should_journal_data(inode))
2052
2053
2054
2055
2056 return __ext4_journalled_writepage(page, len);
2057
2058 if (buffer_uninit(page_bufs)) {
2059 ext4_set_bh_endio(page_bufs, inode);
2060 ret = block_write_full_page_endio(page, noalloc_get_block_write,
2061 wbc, ext4_end_io_buffer_write);
2062 } else
2063 ret = block_write_full_page(page, noalloc_get_block_write,
2064 wbc);
2065
2066 return ret;
2067}
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077static int ext4_da_writepages_trans_blocks(struct inode *inode)
2078{
2079 int max_blocks = EXT4_I(inode)->i_reserved_data_blocks;
2080
2081
2082
2083
2084
2085
2086
2087 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) &&
2088 (max_blocks > EXT4_MAX_TRANS_DATA))
2089 max_blocks = EXT4_MAX_TRANS_DATA;
2090
2091 return ext4_chunk_trans_blocks(inode, max_blocks);
2092}
2093
2094
2095
2096
2097
2098
2099
2100static int write_cache_pages_da(struct address_space *mapping,
2101 struct writeback_control *wbc,
2102 struct mpage_da_data *mpd,
2103 pgoff_t *done_index)
2104{
2105 struct buffer_head *bh, *head;
2106 struct inode *inode = mapping->host;
2107 struct pagevec pvec;
2108 unsigned int nr_pages;
2109 sector_t logical;
2110 pgoff_t index, end;
2111 long nr_to_write = wbc->nr_to_write;
2112 int i, tag, ret = 0;
2113
2114 memset(mpd, 0, sizeof(struct mpage_da_data));
2115 mpd->wbc = wbc;
2116 mpd->inode = inode;
2117 pagevec_init(&pvec, 0);
2118 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2119 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2120
2121 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2122 tag = PAGECACHE_TAG_TOWRITE;
2123 else
2124 tag = PAGECACHE_TAG_DIRTY;
2125
2126 *done_index = index;
2127 while (index <= end) {
2128 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2129 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2130 if (nr_pages == 0)
2131 return 0;
2132
2133 for (i = 0; i < nr_pages; i++) {
2134 struct page *page = pvec.pages[i];
2135
2136
2137
2138
2139
2140
2141
2142
2143 if (page->index > end)
2144 goto out;
2145
2146 *done_index = page->index + 1;
2147
2148
2149
2150
2151
2152 if ((mpd->next_page != page->index) &&
2153 (mpd->next_page != mpd->first_page)) {
2154 mpage_da_map_and_submit(mpd);
2155 goto ret_extent_tail;
2156 }
2157
2158 lock_page(page);
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168 if (!PageDirty(page) ||
2169 (PageWriteback(page) &&
2170 (wbc->sync_mode == WB_SYNC_NONE)) ||
2171 unlikely(page->mapping != mapping)) {
2172 unlock_page(page);
2173 continue;
2174 }
2175
2176 wait_on_page_writeback(page);
2177 BUG_ON(PageWriteback(page));
2178
2179 if (mpd->next_page != page->index)
2180 mpd->first_page = page->index;
2181 mpd->next_page = page->index + 1;
2182 logical = (sector_t) page->index <<
2183 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2184
2185 if (!page_has_buffers(page)) {
2186 mpage_add_bh_to_extent(mpd, logical,
2187 PAGE_CACHE_SIZE,
2188 (1 << BH_Dirty) | (1 << BH_Uptodate));
2189 if (mpd->io_done)
2190 goto ret_extent_tail;
2191 } else {
2192
2193
2194
2195
2196 head = page_buffers(page);
2197 bh = head;
2198 do {
2199 BUG_ON(buffer_locked(bh));
2200
2201
2202
2203
2204
2205
2206 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2207 mpage_add_bh_to_extent(mpd, logical,
2208 bh->b_size,
2209 bh->b_state);
2210 if (mpd->io_done)
2211 goto ret_extent_tail;
2212 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224 if (mpd->b_size == 0)
2225 mpd->b_state = bh->b_state & BH_FLAGS;
2226 }
2227 logical++;
2228 } while ((bh = bh->b_this_page) != head);
2229 }
2230
2231 if (nr_to_write > 0) {
2232 nr_to_write--;
2233 if (nr_to_write == 0 &&
2234 wbc->sync_mode == WB_SYNC_NONE)
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245 goto out;
2246 }
2247 }
2248 pagevec_release(&pvec);
2249 cond_resched();
2250 }
2251 return 0;
2252ret_extent_tail:
2253 ret = MPAGE_DA_EXTENT_TAIL;
2254out:
2255 pagevec_release(&pvec);
2256 cond_resched();
2257 return ret;
2258}
2259
2260
2261static int ext4_da_writepages(struct address_space *mapping,
2262 struct writeback_control *wbc)
2263{
2264 pgoff_t index;
2265 int range_whole = 0;
2266 handle_t *handle = NULL;
2267 struct mpage_da_data mpd;
2268 struct inode *inode = mapping->host;
2269 int pages_written = 0;
2270 unsigned int max_pages;
2271 int range_cyclic, cycled = 1, io_done = 0;
2272 int needed_blocks, ret = 0;
2273 long desired_nr_to_write, nr_to_writebump = 0;
2274 loff_t range_start = wbc->range_start;
2275 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2276 pgoff_t done_index = 0;
2277 pgoff_t end;
2278 struct blk_plug plug;
2279
2280 trace_ext4_da_writepages(inode, wbc);
2281
2282
2283
2284
2285
2286
2287 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
2288 return 0;
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300 if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
2301 return -EROFS;
2302
2303 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2304 range_whole = 1;
2305
2306 range_cyclic = wbc->range_cyclic;
2307 if (wbc->range_cyclic) {
2308 index = mapping->writeback_index;
2309 if (index)
2310 cycled = 0;
2311 wbc->range_start = index << PAGE_CACHE_SHIFT;
2312 wbc->range_end = LLONG_MAX;
2313 wbc->range_cyclic = 0;
2314 end = -1;
2315 } else {
2316 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2317 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2318 }
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336 max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
2337 if (!range_cyclic && range_whole) {
2338 if (wbc->nr_to_write == LONG_MAX)
2339 desired_nr_to_write = wbc->nr_to_write;
2340 else
2341 desired_nr_to_write = wbc->nr_to_write * 8;
2342 } else
2343 desired_nr_to_write = ext4_num_dirty_pages(inode, index,
2344 max_pages);
2345 if (desired_nr_to_write > max_pages)
2346 desired_nr_to_write = max_pages;
2347
2348 if (wbc->nr_to_write < desired_nr_to_write) {
2349 nr_to_writebump = desired_nr_to_write - wbc->nr_to_write;
2350 wbc->nr_to_write = desired_nr_to_write;
2351 }
2352
2353retry:
2354 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2355 tag_pages_for_writeback(mapping, index, end);
2356
2357 blk_start_plug(&plug);
2358 while (!ret && wbc->nr_to_write > 0) {
2359
2360
2361
2362
2363
2364
2365
2366 BUG_ON(ext4_should_journal_data(inode));
2367 needed_blocks = ext4_da_writepages_trans_blocks(inode);
2368
2369
2370 handle = ext4_journal_start(inode, needed_blocks);
2371 if (IS_ERR(handle)) {
2372 ret = PTR_ERR(handle);
2373 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
2374 "%ld pages, ino %lu; err %d", __func__,
2375 wbc->nr_to_write, inode->i_ino, ret);
2376 blk_finish_plug(&plug);
2377 goto out_writepages;
2378 }
2379
2380
2381
2382
2383
2384
2385 ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
2386
2387
2388
2389
2390
2391 if (!mpd.io_done && mpd.next_page != mpd.first_page) {
2392 mpage_da_map_and_submit(&mpd);
2393 ret = MPAGE_DA_EXTENT_TAIL;
2394 }
2395 trace_ext4_da_write_pages(inode, &mpd);
2396 wbc->nr_to_write -= mpd.pages_written;
2397
2398 ext4_journal_stop(handle);
2399
2400 if ((mpd.retval == -ENOSPC) && sbi->s_journal) {
2401
2402
2403
2404
2405 jbd2_journal_force_commit_nested(sbi->s_journal);
2406 ret = 0;
2407 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
2408
2409
2410
2411
2412
2413 pages_written += mpd.pages_written;
2414 ret = mpd.retval;
2415 io_done = 1;
2416 } else if (wbc->nr_to_write)
2417
2418
2419
2420
2421
2422 break;
2423 }
2424 blk_finish_plug(&plug);
2425 if (!io_done && !cycled) {
2426 cycled = 1;
2427 index = 0;
2428 wbc->range_start = index << PAGE_CACHE_SHIFT;
2429 wbc->range_end = mapping->writeback_index - 1;
2430 goto retry;
2431 }
2432
2433
2434 wbc->range_cyclic = range_cyclic;
2435 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2436
2437
2438
2439
2440 mapping->writeback_index = done_index;
2441
2442out_writepages:
2443 wbc->nr_to_write -= nr_to_writebump;
2444 wbc->range_start = range_start;
2445 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2446 return ret;
2447}
2448
2449#define FALL_BACK_TO_NONDELALLOC 1
2450static int ext4_nonda_switch(struct super_block *sb)
2451{
2452 s64 free_blocks, dirty_blocks;
2453 struct ext4_sb_info *sbi = EXT4_SB(sb);
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463 free_blocks = EXT4_C2B(sbi,
2464 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
2465 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
2466
2467
2468
2469 if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
2470 !writeback_in_progress(sb->s_bdi) &&
2471 down_read_trylock(&sb->s_umount)) {
2472 writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
2473 up_read(&sb->s_umount);
2474 }
2475
2476 if (2 * free_blocks < 3 * dirty_blocks ||
2477 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
2478
2479
2480
2481
2482 return 1;
2483 }
2484 return 0;
2485}
2486
2487static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2488 loff_t pos, unsigned len, unsigned flags,
2489 struct page **pagep, void **fsdata)
2490{
2491 int ret, retries = 0;
2492 struct page *page;
2493 pgoff_t index;
2494 struct inode *inode = mapping->host;
2495 handle_t *handle;
2496
2497 index = pos >> PAGE_CACHE_SHIFT;
2498
2499 if (ext4_nonda_switch(inode->i_sb)) {
2500 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
2501 return ext4_write_begin(file, mapping, pos,
2502 len, flags, pagep, fsdata);
2503 }
2504 *fsdata = (void *)0;
2505 trace_ext4_da_write_begin(inode, pos, len, flags);
2506retry:
2507
2508
2509
2510
2511
2512
2513 handle = ext4_journal_start(inode, 1);
2514 if (IS_ERR(handle)) {
2515 ret = PTR_ERR(handle);
2516 goto out;
2517 }
2518
2519
2520 flags |= AOP_FLAG_NOFS;
2521
2522 page = grab_cache_page_write_begin(mapping, index, flags);
2523 if (!page) {
2524 ext4_journal_stop(handle);
2525 ret = -ENOMEM;
2526 goto out;
2527 }
2528 *pagep = page;
2529
2530 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
2531 if (ret < 0) {
2532 unlock_page(page);
2533 ext4_journal_stop(handle);
2534 page_cache_release(page);
2535
2536
2537
2538
2539
2540 if (pos + len > inode->i_size)
2541 ext4_truncate_failed_write(inode);
2542 }
2543
2544 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
2545 goto retry;
2546out:
2547 return ret;
2548}
2549
2550
2551
2552
2553
2554static int ext4_da_should_update_i_disksize(struct page *page,
2555 unsigned long offset)
2556{
2557 struct buffer_head *bh;
2558 struct inode *inode = page->mapping->host;
2559 unsigned int idx;
2560 int i;
2561
2562 bh = page_buffers(page);
2563 idx = offset >> inode->i_blkbits;
2564
2565 for (i = 0; i < idx; i++)
2566 bh = bh->b_this_page;
2567
2568 if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
2569 return 0;
2570 return 1;
2571}
2572
2573static int ext4_da_write_end(struct file *file,
2574 struct address_space *mapping,
2575 loff_t pos, unsigned len, unsigned copied,
2576 struct page *page, void *fsdata)
2577{
2578 struct inode *inode = mapping->host;
2579 int ret = 0, ret2;
2580 handle_t *handle = ext4_journal_current_handle();
2581 loff_t new_i_size;
2582 unsigned long start, end;
2583 int write_mode = (int)(unsigned long)fsdata;
2584
2585 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2586 switch (ext4_inode_journal_mode(inode)) {
2587 case EXT4_INODE_ORDERED_DATA_MODE:
2588 return ext4_ordered_write_end(file, mapping, pos,
2589 len, copied, page, fsdata);
2590 case EXT4_INODE_WRITEBACK_DATA_MODE:
2591 return ext4_writeback_write_end(file, mapping, pos,
2592 len, copied, page, fsdata);
2593 default:
2594 BUG();
2595 }
2596 }
2597
2598 trace_ext4_da_write_end(inode, pos, len, copied);
2599 start = pos & (PAGE_CACHE_SIZE - 1);
2600 end = start + copied - 1;
2601
2602
2603
2604
2605
2606
2607
2608 new_i_size = pos + copied;
2609 if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
2610 if (ext4_da_should_update_i_disksize(page, end)) {
2611 down_write(&EXT4_I(inode)->i_data_sem);
2612 if (new_i_size > EXT4_I(inode)->i_disksize) {
2613
2614
2615
2616
2617 if (ext4_should_order_data(inode))
2618 ret = ext4_jbd2_file_inode(handle,
2619 inode);
2620
2621 EXT4_I(inode)->i_disksize = new_i_size;
2622 }
2623 up_write(&EXT4_I(inode)->i_data_sem);
2624
2625
2626
2627
2628 ext4_mark_inode_dirty(handle, inode);
2629 }
2630 }
2631 ret2 = generic_write_end(file, mapping, pos, len, copied,
2632 page, fsdata);
2633 copied = ret2;
2634 if (ret2 < 0)
2635 ret = ret2;
2636 ret2 = ext4_journal_stop(handle);
2637 if (!ret)
2638 ret = ret2;
2639
2640 return ret ? ret : copied;
2641}
2642
2643static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
2644{
2645
2646
2647
2648 BUG_ON(!PageLocked(page));
2649 if (!page_has_buffers(page))
2650 goto out;
2651
2652 ext4_da_page_release_reservation(page, offset);
2653
2654out:
2655 ext4_invalidatepage(page, offset);
2656
2657 return;
2658}
2659
2660
2661
2662
2663int ext4_alloc_da_blocks(struct inode *inode)
2664{
2665 trace_ext4_alloc_da_blocks(inode);
2666
2667 if (!EXT4_I(inode)->i_reserved_data_blocks &&
2668 !EXT4_I(inode)->i_reserved_meta_blocks)
2669 return 0;
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702 return filemap_flush(inode->i_mapping);
2703}
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
2720{
2721 struct inode *inode = mapping->host;
2722 journal_t *journal;
2723 int err;
2724
2725 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
2726 test_opt(inode->i_sb, DELALLOC)) {
2727
2728
2729
2730
2731
2732 filemap_write_and_wait(mapping);
2733 }
2734
2735 if (EXT4_JOURNAL(inode) &&
2736 ext4_test_inode_state(inode, EXT4_STATE_JDATA)) {
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755 ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
2756 journal = EXT4_JOURNAL(inode);
2757 jbd2_journal_lock_updates(journal);
2758 err = jbd2_journal_flush(journal);
2759 jbd2_journal_unlock_updates(journal);
2760
2761 if (err)
2762 return 0;
2763 }
2764
2765 return generic_block_bmap(mapping, block, ext4_get_block);
2766}
2767
2768static int ext4_readpage(struct file *file, struct page *page)
2769{
2770 trace_ext4_readpage(page);
2771 return mpage_readpage(page, ext4_get_block);
2772}
2773
2774static int
2775ext4_readpages(struct file *file, struct address_space *mapping,
2776 struct list_head *pages, unsigned nr_pages)
2777{
2778 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
2779}
2780
2781static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
2782{
2783 struct buffer_head *head, *bh;
2784 unsigned int curr_off = 0;
2785
2786 if (!page_has_buffers(page))
2787 return;
2788 head = bh = page_buffers(page);
2789 do {
2790 if (offset <= curr_off && test_clear_buffer_uninit(bh)
2791 && bh->b_private) {
2792 ext4_free_io_end(bh->b_private);
2793 bh->b_private = NULL;
2794 bh->b_end_io = NULL;
2795 }
2796 curr_off = curr_off + bh->b_size;
2797 bh = bh->b_this_page;
2798 } while (bh != head);
2799}
2800
2801static void ext4_invalidatepage(struct page *page, unsigned long offset)
2802{
2803 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
2804
2805 trace_ext4_invalidatepage(page, offset);
2806
2807
2808
2809
2810 if (ext4_should_dioread_nolock(page->mapping->host))
2811 ext4_invalidatepage_free_endio(page, offset);
2812
2813
2814
2815 if (offset == 0)
2816 ClearPageChecked(page);
2817
2818 if (journal)
2819 jbd2_journal_invalidatepage(journal, page, offset);
2820 else
2821 block_invalidatepage(page, offset);
2822}
2823
2824static int ext4_releasepage(struct page *page, gfp_t wait)
2825{
2826 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
2827
2828 trace_ext4_releasepage(page);
2829
2830 WARN_ON(PageChecked(page));
2831 if (!page_has_buffers(page))
2832 return 0;
2833 if (journal)
2834 return jbd2_journal_try_to_free_buffers(journal, page, wait);
2835 else
2836 return try_to_free_buffers(page);
2837}
2838
2839
2840
2841
2842
2843
2844static int ext4_get_block_write(struct inode *inode, sector_t iblock,
2845 struct buffer_head *bh_result, int create)
2846{
2847 ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
2848 inode->i_ino, create);
2849 return _ext4_get_block(inode, iblock, bh_result,
2850 EXT4_GET_BLOCKS_IO_CREATE_EXT);
2851}
2852
2853static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
2854 struct buffer_head *bh_result, int flags)
2855{
2856 handle_t *handle = ext4_journal_current_handle();
2857 struct ext4_map_blocks map;
2858 int ret = 0;
2859
2860 ext4_debug("ext4_get_block_write_nolock: inode %lu, flag %d\n",
2861 inode->i_ino, flags);
2862
2863 flags = EXT4_GET_BLOCKS_NO_LOCK;
2864
2865 map.m_lblk = iblock;
2866 map.m_len = bh_result->b_size >> inode->i_blkbits;
2867
2868 ret = ext4_map_blocks(handle, inode, &map, flags);
2869 if (ret > 0) {
2870 map_bh(bh_result, inode->i_sb, map.m_pblk);
2871 bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
2872 map.m_flags;
2873 bh_result->b_size = inode->i_sb->s_blocksize * map.m_len;
2874 ret = 0;
2875 }
2876 return ret;
2877}
2878
2879static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2880 ssize_t size, void *private, int ret,
2881 bool is_async)
2882{
2883 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2884 ext4_io_end_t *io_end = iocb->private;
2885 struct workqueue_struct *wq;
2886 unsigned long flags;
2887 struct ext4_inode_info *ei;
2888
2889
2890 if (!io_end || !size)
2891 goto out;
2892
2893 ext_debug("ext4_end_io_dio(): io_end 0x%p "
2894 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
2895 iocb->private, io_end->inode->i_ino, iocb, offset,
2896 size);
2897
2898 iocb->private = NULL;
2899
2900
2901 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
2902 ext4_free_io_end(io_end);
2903out:
2904 if (is_async)
2905 aio_complete(iocb, ret, 0);
2906 inode_dio_done(inode);
2907 return;
2908 }
2909
2910 io_end->offset = offset;
2911 io_end->size = size;
2912 if (is_async) {
2913 io_end->iocb = iocb;
2914 io_end->result = ret;
2915 }
2916 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
2917
2918
2919 ei = EXT4_I(io_end->inode);
2920 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
2921 list_add_tail(&io_end->list, &ei->i_completed_io_list);
2922 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
2923
2924
2925 queue_work(wq, &io_end->work);
2926}
2927
2928static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2929{
2930 ext4_io_end_t *io_end = bh->b_private;
2931 struct workqueue_struct *wq;
2932 struct inode *inode;
2933 unsigned long flags;
2934
2935 if (!test_clear_buffer_uninit(bh) || !io_end)
2936 goto out;
2937
2938 if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
2939 ext4_msg(io_end->inode->i_sb, KERN_INFO,
2940 "sb umounted, discard end_io request for inode %lu",
2941 io_end->inode->i_ino);
2942 ext4_free_io_end(io_end);
2943 goto out;
2944 }
2945
2946
2947
2948
2949
2950 inode = io_end->inode;
2951 ext4_set_io_unwritten_flag(inode, io_end);
2952
2953
2954 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
2955 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
2956 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
2957
2958 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
2959
2960 queue_work(wq, &io_end->work);
2961out:
2962 bh->b_private = NULL;
2963 bh->b_end_io = NULL;
2964 clear_buffer_uninit(bh);
2965 end_buffer_async_write(bh, uptodate);
2966}
2967
2968static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
2969{
2970 ext4_io_end_t *io_end;
2971 struct page *page = bh->b_page;
2972 loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
2973 size_t size = bh->b_size;
2974
2975retry:
2976 io_end = ext4_init_io_end(inode, GFP_ATOMIC);
2977 if (!io_end) {
2978 pr_warn_ratelimited("%s: allocation fail\n", __func__);
2979 schedule();
2980 goto retry;
2981 }
2982 io_end->offset = offset;
2983 io_end->size = size;
2984
2985
2986
2987
2988
2989 io_end->page = page;
2990 get_page(io_end->page);
2991
2992 bh->b_private = io_end;
2993 bh->b_end_io = ext4_end_io_buffer_write;
2994 return 0;
2995}
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3017 const struct iovec *iov, loff_t offset,
3018 unsigned long nr_segs)
3019{
3020 struct file *file = iocb->ki_filp;
3021 struct inode *inode = file->f_mapping->host;
3022 ssize_t ret;
3023 size_t count = iov_length(iov, nr_segs);
3024
3025 loff_t final_size = offset + count;
3026 if (rw == WRITE && final_size <= inode->i_size) {
3027 int overwrite = 0;
3028
3029 BUG_ON(iocb->private == NULL);
3030
3031
3032 overwrite = *((int *)iocb->private);
3033
3034 if (overwrite) {
3035 down_read(&EXT4_I(inode)->i_data_sem);
3036 mutex_unlock(&inode->i_mutex);
3037 }
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059 iocb->private = NULL;
3060 EXT4_I(inode)->cur_aio_dio = NULL;
3061 if (!is_sync_kiocb(iocb)) {
3062 ext4_io_end_t *io_end =
3063 ext4_init_io_end(inode, GFP_NOFS);
3064 if (!io_end) {
3065 ret = -ENOMEM;
3066 goto retake_lock;
3067 }
3068 io_end->flag |= EXT4_IO_END_DIRECT;
3069 iocb->private = io_end;
3070
3071
3072
3073
3074
3075
3076
3077 EXT4_I(inode)->cur_aio_dio = iocb->private;
3078 }
3079
3080 if (overwrite)
3081 ret = __blockdev_direct_IO(rw, iocb, inode,
3082 inode->i_sb->s_bdev, iov,
3083 offset, nr_segs,
3084 ext4_get_block_write_nolock,
3085 ext4_end_io_dio,
3086 NULL,
3087 0);
3088 else
3089 ret = __blockdev_direct_IO(rw, iocb, inode,
3090 inode->i_sb->s_bdev, iov,
3091 offset, nr_segs,
3092 ext4_get_block_write,
3093 ext4_end_io_dio,
3094 NULL,
3095 DIO_LOCKING);
3096 if (iocb->private)
3097 EXT4_I(inode)->cur_aio_dio = NULL;
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
3113 ext4_free_io_end(iocb->private);
3114 iocb->private = NULL;
3115 } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3116 EXT4_STATE_DIO_UNWRITTEN)) {
3117 int err;
3118
3119
3120
3121
3122 err = ext4_convert_unwritten_extents(inode,
3123 offset, ret);
3124 if (err < 0)
3125 ret = err;
3126 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3127 }
3128
3129 retake_lock:
3130
3131 if (overwrite) {
3132 up_read(&EXT4_I(inode)->i_data_sem);
3133 mutex_lock(&inode->i_mutex);
3134 }
3135
3136 return ret;
3137 }
3138
3139
3140 return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
3141}
3142
3143static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3144 const struct iovec *iov, loff_t offset,
3145 unsigned long nr_segs)
3146{
3147 struct file *file = iocb->ki_filp;
3148 struct inode *inode = file->f_mapping->host;
3149 ssize_t ret;
3150
3151
3152
3153
3154 if (ext4_should_journal_data(inode))
3155 return 0;
3156
3157 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
3158 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3159 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
3160 else
3161 ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
3162 trace_ext4_direct_IO_exit(inode, offset,
3163 iov_length(iov, nr_segs), rw, ret);
3164 return ret;
3165}
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180static int ext4_journalled_set_page_dirty(struct page *page)
3181{
3182 SetPageChecked(page);
3183 return __set_page_dirty_nobuffers(page);
3184}
3185
3186static const struct address_space_operations ext4_ordered_aops = {
3187 .readpage = ext4_readpage,
3188 .readpages = ext4_readpages,
3189 .writepage = ext4_writepage,
3190 .write_begin = ext4_write_begin,
3191 .write_end = ext4_ordered_write_end,
3192 .bmap = ext4_bmap,
3193 .invalidatepage = ext4_invalidatepage,
3194 .releasepage = ext4_releasepage,
3195 .direct_IO = ext4_direct_IO,
3196 .migratepage = buffer_migrate_page,
3197 .is_partially_uptodate = block_is_partially_uptodate,
3198 .error_remove_page = generic_error_remove_page,
3199};
3200
3201static const struct address_space_operations ext4_writeback_aops = {
3202 .readpage = ext4_readpage,
3203 .readpages = ext4_readpages,
3204 .writepage = ext4_writepage,
3205 .write_begin = ext4_write_begin,
3206 .write_end = ext4_writeback_write_end,
3207 .bmap = ext4_bmap,
3208 .invalidatepage = ext4_invalidatepage,
3209 .releasepage = ext4_releasepage,
3210 .direct_IO = ext4_direct_IO,
3211 .migratepage = buffer_migrate_page,
3212 .is_partially_uptodate = block_is_partially_uptodate,
3213 .error_remove_page = generic_error_remove_page,
3214};
3215
3216static const struct address_space_operations ext4_journalled_aops = {
3217 .readpage = ext4_readpage,
3218 .readpages = ext4_readpages,
3219 .writepage = ext4_writepage,
3220 .write_begin = ext4_write_begin,
3221 .write_end = ext4_journalled_write_end,
3222 .set_page_dirty = ext4_journalled_set_page_dirty,
3223 .bmap = ext4_bmap,
3224 .invalidatepage = ext4_invalidatepage,
3225 .releasepage = ext4_releasepage,
3226 .direct_IO = ext4_direct_IO,
3227 .is_partially_uptodate = block_is_partially_uptodate,
3228 .error_remove_page = generic_error_remove_page,
3229};
3230
3231static const struct address_space_operations ext4_da_aops = {
3232 .readpage = ext4_readpage,
3233 .readpages = ext4_readpages,
3234 .writepage = ext4_writepage,
3235 .writepages = ext4_da_writepages,
3236 .write_begin = ext4_da_write_begin,
3237 .write_end = ext4_da_write_end,
3238 .bmap = ext4_bmap,
3239 .invalidatepage = ext4_da_invalidatepage,
3240 .releasepage = ext4_releasepage,
3241 .direct_IO = ext4_direct_IO,
3242 .migratepage = buffer_migrate_page,
3243 .is_partially_uptodate = block_is_partially_uptodate,
3244 .error_remove_page = generic_error_remove_page,
3245};
3246
3247void ext4_set_aops(struct inode *inode)
3248{
3249 switch (ext4_inode_journal_mode(inode)) {
3250 case EXT4_INODE_ORDERED_DATA_MODE:
3251 if (test_opt(inode->i_sb, DELALLOC))
3252 inode->i_mapping->a_ops = &ext4_da_aops;
3253 else
3254 inode->i_mapping->a_ops = &ext4_ordered_aops;
3255 break;
3256 case EXT4_INODE_WRITEBACK_DATA_MODE:
3257 if (test_opt(inode->i_sb, DELALLOC))
3258 inode->i_mapping->a_ops = &ext4_da_aops;
3259 else
3260 inode->i_mapping->a_ops = &ext4_writeback_aops;
3261 break;
3262 case EXT4_INODE_JOURNAL_DATA_MODE:
3263 inode->i_mapping->a_ops = &ext4_journalled_aops;
3264 break;
3265 default:
3266 BUG();
3267 }
3268}
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279int ext4_discard_partial_page_buffers(handle_t *handle,
3280 struct address_space *mapping, loff_t from,
3281 loff_t length, int flags)
3282{
3283 struct inode *inode = mapping->host;
3284 struct page *page;
3285 int err = 0;
3286
3287 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3288 mapping_gfp_mask(mapping) & ~__GFP_FS);
3289 if (!page)
3290 return -ENOMEM;
3291
3292 err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page,
3293 from, length, flags);
3294
3295 unlock_page(page);
3296 page_cache_release(page);
3297 return err;
3298}
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3334 struct inode *inode, struct page *page, loff_t from,
3335 loff_t length, int flags)
3336{
3337 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3338 unsigned int offset = from & (PAGE_CACHE_SIZE-1);
3339 unsigned int blocksize, max, pos;
3340 ext4_lblk_t iblock;
3341 struct buffer_head *bh;
3342 int err = 0;
3343
3344 blocksize = inode->i_sb->s_blocksize;
3345 max = PAGE_CACHE_SIZE - offset;
3346
3347 if (index != page->index)
3348 return -EINVAL;
3349
3350
3351
3352
3353
3354 if (length > max || length < 0)
3355 length = max;
3356
3357 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3358
3359 if (!page_has_buffers(page))
3360 create_empty_buffers(page, blocksize, 0);
3361
3362
3363 bh = page_buffers(page);
3364 pos = blocksize;
3365 while (offset >= pos) {
3366 bh = bh->b_this_page;
3367 iblock++;
3368 pos += blocksize;
3369 }
3370
3371 pos = offset;
3372 while (pos < offset + length) {
3373 unsigned int end_of_block, range_to_discard;
3374
3375 err = 0;
3376
3377
3378 range_to_discard = offset + length - pos;
3379
3380
3381 end_of_block = blocksize - (pos & (blocksize-1));
3382
3383
3384
3385
3386
3387 if (range_to_discard > end_of_block)
3388 range_to_discard = end_of_block;
3389
3390
3391
3392
3393
3394
3395 if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED &&
3396 buffer_mapped(bh))
3397 goto next;
3398
3399
3400 if (range_to_discard == blocksize) {
3401 clear_buffer_dirty(bh);
3402 bh->b_bdev = NULL;
3403 clear_buffer_mapped(bh);
3404 clear_buffer_req(bh);
3405 clear_buffer_new(bh);
3406 clear_buffer_delay(bh);
3407 clear_buffer_unwritten(bh);
3408 clear_buffer_uptodate(bh);
3409 zero_user(page, pos, range_to_discard);
3410 BUFFER_TRACE(bh, "Buffer discarded");
3411 goto next;
3412 }
3413
3414
3415
3416
3417
3418
3419
3420
3421 if (!buffer_mapped(bh)) {
3422
3423
3424
3425
3426 BUFFER_TRACE(bh, "unmapped");
3427 ext4_get_block(inode, iblock, bh, 0);
3428
3429 if (!buffer_mapped(bh)) {
3430 BUFFER_TRACE(bh, "still unmapped");
3431 goto next;
3432 }
3433 }
3434
3435
3436 if (PageUptodate(page))
3437 set_buffer_uptodate(bh);
3438
3439 if (!buffer_uptodate(bh)) {
3440 err = -EIO;
3441 ll_rw_block(READ, 1, &bh);
3442 wait_on_buffer(bh);
3443
3444 if (!buffer_uptodate(bh))
3445 goto next;
3446 }
3447
3448 if (ext4_should_journal_data(inode)) {
3449 BUFFER_TRACE(bh, "get write access");
3450 err = ext4_journal_get_write_access(handle, bh);
3451 if (err)
3452 goto next;
3453 }
3454
3455 zero_user(page, pos, range_to_discard);
3456
3457 err = 0;
3458 if (ext4_should_journal_data(inode)) {
3459 err = ext4_handle_dirty_metadata(handle, inode, bh);
3460 } else
3461 mark_buffer_dirty(bh);
3462
3463 BUFFER_TRACE(bh, "Partial buffer zeroed");
3464next:
3465 bh = bh->b_this_page;
3466 iblock++;
3467 pos += range_to_discard;
3468 }
3469
3470 return err;
3471}
3472
3473int ext4_can_truncate(struct inode *inode)
3474{
3475 if (S_ISREG(inode->i_mode))
3476 return 1;
3477 if (S_ISDIR(inode->i_mode))
3478 return 1;
3479 if (S_ISLNK(inode->i_mode))
3480 return !ext4_inode_is_fast_symlink(inode);
3481 return 0;
3482}
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
3496{
3497 struct inode *inode = file->f_path.dentry->d_inode;
3498 if (!S_ISREG(inode->i_mode))
3499 return -EOPNOTSUPP;
3500
3501 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
3502
3503 return -EOPNOTSUPP;
3504 }
3505
3506 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
3507
3508 return -EOPNOTSUPP;
3509 }
3510
3511 return ext4_ext_punch_hole(file, offset, length);
3512}
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542void ext4_truncate(struct inode *inode)
3543{
3544 trace_ext4_truncate_enter(inode);
3545
3546 if (!ext4_can_truncate(inode))
3547 return;
3548
3549 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
3550
3551 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3552 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
3553
3554 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3555 ext4_ext_truncate(inode);
3556 else
3557 ext4_ind_truncate(inode);
3558
3559 trace_ext4_truncate_exit(inode);
3560}
3561
3562
3563
3564
3565
3566
3567
3568static int __ext4_get_inode_loc(struct inode *inode,
3569 struct ext4_iloc *iloc, int in_mem)
3570{
3571 struct ext4_group_desc *gdp;
3572 struct buffer_head *bh;
3573 struct super_block *sb = inode->i_sb;
3574 ext4_fsblk_t block;
3575 int inodes_per_block, inode_offset;
3576
3577 iloc->bh = NULL;
3578 if (!ext4_valid_inum(sb, inode->i_ino))
3579 return -EIO;
3580
3581 iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
3582 gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
3583 if (!gdp)
3584 return -EIO;
3585
3586
3587
3588
3589 inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
3590 inode_offset = ((inode->i_ino - 1) %
3591 EXT4_INODES_PER_GROUP(sb));
3592 block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
3593 iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
3594
3595 bh = sb_getblk(sb, block);
3596 if (!bh) {
3597 EXT4_ERROR_INODE_BLOCK(inode, block,
3598 "unable to read itable block");
3599 return -EIO;
3600 }
3601 if (!buffer_uptodate(bh)) {
3602 lock_buffer(bh);
3603
3604
3605
3606
3607
3608
3609
3610 if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
3611 set_buffer_uptodate(bh);
3612
3613 if (buffer_uptodate(bh)) {
3614
3615 unlock_buffer(bh);
3616 goto has_buffer;
3617 }
3618
3619
3620
3621
3622
3623
3624 if (in_mem) {
3625 struct buffer_head *bitmap_bh;
3626 int i, start;
3627
3628 start = inode_offset & ~(inodes_per_block - 1);
3629
3630
3631 bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
3632 if (!bitmap_bh)
3633 goto make_io;
3634
3635
3636
3637
3638
3639
3640 if (!buffer_uptodate(bitmap_bh)) {
3641 brelse(bitmap_bh);
3642 goto make_io;
3643 }
3644 for (i = start; i < start + inodes_per_block; i++) {
3645 if (i == inode_offset)
3646 continue;
3647 if (ext4_test_bit(i, bitmap_bh->b_data))
3648 break;
3649 }
3650 brelse(bitmap_bh);
3651 if (i == start + inodes_per_block) {
3652
3653 memset(bh->b_data, 0, bh->b_size);
3654 set_buffer_uptodate(bh);
3655 unlock_buffer(bh);
3656 goto has_buffer;
3657 }
3658 }
3659
3660make_io:
3661
3662
3663
3664
3665 if (EXT4_SB(sb)->s_inode_readahead_blks) {
3666 ext4_fsblk_t b, end, table;
3667 unsigned num;
3668
3669 table = ext4_inode_table(sb, gdp);
3670
3671 b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
3672 if (table > b)
3673 b = table;
3674 end = b + EXT4_SB(sb)->s_inode_readahead_blks;
3675 num = EXT4_INODES_PER_GROUP(sb);
3676 if (ext4_has_group_desc_csum(sb))
3677 num -= ext4_itable_unused_count(sb, gdp);
3678 table += num / inodes_per_block;
3679 if (end > table)
3680 end = table;
3681 while (b <= end)
3682 sb_breadahead(sb, b++);
3683 }
3684
3685
3686
3687
3688
3689
3690 trace_ext4_load_inode(inode);
3691 get_bh(bh);
3692 bh->b_end_io = end_buffer_read_sync;
3693 submit_bh(READ | REQ_META | REQ_PRIO, bh);
3694 wait_on_buffer(bh);
3695 if (!buffer_uptodate(bh)) {
3696 EXT4_ERROR_INODE_BLOCK(inode, block,
3697 "unable to read itable block");
3698 brelse(bh);
3699 return -EIO;
3700 }
3701 }
3702has_buffer:
3703 iloc->bh = bh;
3704 return 0;
3705}
3706
3707int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
3708{
3709
3710 return __ext4_get_inode_loc(inode, iloc,
3711 !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
3712}
3713
3714void ext4_set_inode_flags(struct inode *inode)
3715{
3716 unsigned int flags = EXT4_I(inode)->i_flags;
3717
3718 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
3719 if (flags & EXT4_SYNC_FL)
3720 inode->i_flags |= S_SYNC;
3721 if (flags & EXT4_APPEND_FL)
3722 inode->i_flags |= S_APPEND;
3723 if (flags & EXT4_IMMUTABLE_FL)
3724 inode->i_flags |= S_IMMUTABLE;
3725 if (flags & EXT4_NOATIME_FL)
3726 inode->i_flags |= S_NOATIME;
3727 if (flags & EXT4_DIRSYNC_FL)
3728 inode->i_flags |= S_DIRSYNC;
3729}
3730
3731
3732void ext4_get_inode_flags(struct ext4_inode_info *ei)
3733{
3734 unsigned int vfs_fl;
3735 unsigned long old_fl, new_fl;
3736
3737 do {
3738 vfs_fl = ei->vfs_inode.i_flags;
3739 old_fl = ei->i_flags;
3740 new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
3741 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
3742 EXT4_DIRSYNC_FL);
3743 if (vfs_fl & S_SYNC)
3744 new_fl |= EXT4_SYNC_FL;
3745 if (vfs_fl & S_APPEND)
3746 new_fl |= EXT4_APPEND_FL;
3747 if (vfs_fl & S_IMMUTABLE)
3748 new_fl |= EXT4_IMMUTABLE_FL;
3749 if (vfs_fl & S_NOATIME)
3750 new_fl |= EXT4_NOATIME_FL;
3751 if (vfs_fl & S_DIRSYNC)
3752 new_fl |= EXT4_DIRSYNC_FL;
3753 } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
3754}
3755
3756static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
3757 struct ext4_inode_info *ei)
3758{
3759 blkcnt_t i_blocks ;
3760 struct inode *inode = &(ei->vfs_inode);
3761 struct super_block *sb = inode->i_sb;
3762
3763 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3764 EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
3765
3766 i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
3767 le32_to_cpu(raw_inode->i_blocks_lo);
3768 if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
3769
3770 return i_blocks << (inode->i_blkbits - 9);
3771 } else {
3772 return i_blocks;
3773 }
3774 } else {
3775 return le32_to_cpu(raw_inode->i_blocks_lo);
3776 }
3777}
3778
3779struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3780{
3781 struct ext4_iloc iloc;
3782 struct ext4_inode *raw_inode;
3783 struct ext4_inode_info *ei;
3784 struct inode *inode;
3785 journal_t *journal = EXT4_SB(sb)->s_journal;
3786 long ret;
3787 int block;
3788 uid_t i_uid;
3789 gid_t i_gid;
3790
3791 inode = iget_locked(sb, ino);
3792 if (!inode)
3793 return ERR_PTR(-ENOMEM);
3794 if (!(inode->i_state & I_NEW))
3795 return inode;
3796
3797 ei = EXT4_I(inode);
3798 iloc.bh = NULL;
3799
3800 ret = __ext4_get_inode_loc(inode, &iloc, 0);
3801 if (ret < 0)
3802 goto bad_inode;
3803 raw_inode = ext4_raw_inode(&iloc);
3804
3805 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
3806 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
3807 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
3808 EXT4_INODE_SIZE(inode->i_sb)) {
3809 EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)",
3810 EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize,
3811 EXT4_INODE_SIZE(inode->i_sb));
3812 ret = -EIO;
3813 goto bad_inode;
3814 }
3815 } else
3816 ei->i_extra_isize = 0;
3817
3818
3819 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3820 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3821 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3822 __u32 csum;
3823 __le32 inum = cpu_to_le32(inode->i_ino);
3824 __le32 gen = raw_inode->i_generation;
3825 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
3826 sizeof(inum));
3827 ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
3828 sizeof(gen));
3829 }
3830
3831 if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
3832 EXT4_ERROR_INODE(inode, "checksum invalid");
3833 ret = -EIO;
3834 goto bad_inode;
3835 }
3836
3837 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
3838 i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
3839 i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
3840 if (!(test_opt(inode->i_sb, NO_UID32))) {
3841 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3842 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3843 }
3844 i_uid_write(inode, i_uid);
3845 i_gid_write(inode, i_gid);
3846 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
3847
3848 ext4_clear_state_flags(ei);
3849 ei->i_dir_start_lookup = 0;
3850 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
3851
3852
3853
3854
3855
3856 if (inode->i_nlink == 0) {
3857 if (inode->i_mode == 0 ||
3858 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
3859
3860 ret = -ESTALE;
3861 goto bad_inode;
3862 }
3863
3864
3865
3866
3867 }
3868 ei->i_flags = le32_to_cpu(raw_inode->i_flags);
3869 inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
3870 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
3871 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
3872 ei->i_file_acl |=
3873 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
3874 inode->i_size = ext4_isize(raw_inode);
3875 ei->i_disksize = inode->i_size;
3876#ifdef CONFIG_QUOTA
3877 ei->i_reserved_quota = 0;
3878#endif
3879 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
3880 ei->i_block_group = iloc.block_group;
3881 ei->i_last_alloc_group = ~0;
3882
3883
3884
3885
3886 for (block = 0; block < EXT4_N_BLOCKS; block++)
3887 ei->i_data[block] = raw_inode->i_block[block];
3888 INIT_LIST_HEAD(&ei->i_orphan);
3889
3890
3891
3892
3893
3894
3895
3896
3897 if (journal) {
3898 transaction_t *transaction;
3899 tid_t tid;
3900
3901 read_lock(&journal->j_state_lock);
3902 if (journal->j_running_transaction)
3903 transaction = journal->j_running_transaction;
3904 else
3905 transaction = journal->j_committing_transaction;
3906 if (transaction)
3907 tid = transaction->t_tid;
3908 else
3909 tid = journal->j_commit_sequence;
3910 read_unlock(&journal->j_state_lock);
3911 ei->i_sync_tid = tid;
3912 ei->i_datasync_tid = tid;
3913 }
3914
3915 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
3916 if (ei->i_extra_isize == 0) {
3917
3918 ei->i_extra_isize = sizeof(struct ext4_inode) -
3919 EXT4_GOOD_OLD_INODE_SIZE;
3920 } else {
3921 __le32 *magic = (void *)raw_inode +
3922 EXT4_GOOD_OLD_INODE_SIZE +
3923 ei->i_extra_isize;
3924 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
3925 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
3926 }
3927 }
3928
3929 EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
3930 EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
3931 EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
3932 EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
3933
3934 inode->i_version = le32_to_cpu(raw_inode->i_disk_version);
3935 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
3936 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
3937 inode->i_version |=
3938 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
3939 }
3940
3941 ret = 0;
3942 if (ei->i_file_acl &&
3943 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
3944 EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
3945 ei->i_file_acl);
3946 ret = -EIO;
3947 goto bad_inode;
3948 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
3949 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
3950 (S_ISLNK(inode->i_mode) &&
3951 !ext4_inode_is_fast_symlink(inode)))
3952
3953 ret = ext4_ext_check_inode(inode);
3954 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
3955 (S_ISLNK(inode->i_mode) &&
3956 !ext4_inode_is_fast_symlink(inode))) {
3957
3958 ret = ext4_ind_check_inode(inode);
3959 }
3960 if (ret)
3961 goto bad_inode;
3962
3963 if (S_ISREG(inode->i_mode)) {
3964 inode->i_op = &ext4_file_inode_operations;
3965 inode->i_fop = &ext4_file_operations;
3966 ext4_set_aops(inode);
3967 } else if (S_ISDIR(inode->i_mode)) {
3968 inode->i_op = &ext4_dir_inode_operations;
3969 inode->i_fop = &ext4_dir_operations;
3970 } else if (S_ISLNK(inode->i_mode)) {
3971 if (ext4_inode_is_fast_symlink(inode)) {
3972 inode->i_op = &ext4_fast_symlink_inode_operations;
3973 nd_terminate_link(ei->i_data, inode->i_size,
3974 sizeof(ei->i_data) - 1);
3975 } else {
3976 inode->i_op = &ext4_symlink_inode_operations;
3977 ext4_set_aops(inode);
3978 }
3979 } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
3980 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
3981 inode->i_op = &ext4_special_inode_operations;
3982 if (raw_inode->i_block[0])
3983 init_special_inode(inode, inode->i_mode,
3984 old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
3985 else
3986 init_special_inode(inode, inode->i_mode,
3987 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
3988 } else {
3989 ret = -EIO;
3990 EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
3991 goto bad_inode;
3992 }
3993 brelse(iloc.bh);
3994 ext4_set_inode_flags(inode);
3995 unlock_new_inode(inode);
3996 return inode;
3997
3998bad_inode:
3999 brelse(iloc.bh);
4000 iget_failed(inode);
4001 return ERR_PTR(ret);
4002}
4003
4004static int ext4_inode_blocks_set(handle_t *handle,
4005 struct ext4_inode *raw_inode,
4006 struct ext4_inode_info *ei)
4007{
4008 struct inode *inode = &(ei->vfs_inode);
4009 u64 i_blocks = inode->i_blocks;
4010 struct super_block *sb = inode->i_sb;
4011
4012 if (i_blocks <= ~0U) {
4013
4014
4015
4016
4017 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4018 raw_inode->i_blocks_high = 0;
4019 ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
4020 return 0;
4021 }
4022 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
4023 return -EFBIG;
4024
4025 if (i_blocks <= 0xffffffffffffULL) {
4026
4027
4028
4029
4030 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4031 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
4032 ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
4033 } else {
4034 ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
4035
4036 i_blocks = i_blocks >> (inode->i_blkbits - 9);
4037 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4038 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
4039 }
4040 return 0;
4041}
4042
4043
4044
4045
4046
4047
4048
4049
4050static int ext4_do_update_inode(handle_t *handle,
4051 struct inode *inode,
4052 struct ext4_iloc *iloc)
4053{
4054 struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
4055 struct ext4_inode_info *ei = EXT4_I(inode);
4056 struct buffer_head *bh = iloc->bh;
4057 int err = 0, rc, block;
4058 int need_datasync = 0;
4059 uid_t i_uid;
4060 gid_t i_gid;
4061
4062
4063
4064 if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
4065 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
4066
4067 ext4_get_inode_flags(ei);
4068 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
4069 i_uid = i_uid_read(inode);
4070 i_gid = i_gid_read(inode);
4071 if (!(test_opt(inode->i_sb, NO_UID32))) {
4072 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
4073 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
4074
4075
4076
4077
4078 if (!ei->i_dtime) {
4079 raw_inode->i_uid_high =
4080 cpu_to_le16(high_16_bits(i_uid));
4081 raw_inode->i_gid_high =
4082 cpu_to_le16(high_16_bits(i_gid));
4083 } else {
4084 raw_inode->i_uid_high = 0;
4085 raw_inode->i_gid_high = 0;
4086 }
4087 } else {
4088 raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
4089 raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid));
4090 raw_inode->i_uid_high = 0;
4091 raw_inode->i_gid_high = 0;
4092 }
4093 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
4094
4095 EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
4096 EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
4097 EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
4098 EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
4099
4100 if (ext4_inode_blocks_set(handle, raw_inode, ei))
4101 goto out_brelse;
4102 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
4103 raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
4104 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
4105 cpu_to_le32(EXT4_OS_HURD))
4106 raw_inode->i_file_acl_high =
4107 cpu_to_le16(ei->i_file_acl >> 32);
4108 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
4109 if (ei->i_disksize != ext4_isize(raw_inode)) {
4110 ext4_isize_set(raw_inode, ei->i_disksize);
4111 need_datasync = 1;
4112 }
4113 if (ei->i_disksize > 0x7fffffffULL) {
4114 struct super_block *sb = inode->i_sb;
4115 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
4116 EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
4117 EXT4_SB(sb)->s_es->s_rev_level ==
4118 cpu_to_le32(EXT4_GOOD_OLD_REV)) {
4119
4120
4121
4122 err = ext4_journal_get_write_access(handle,
4123 EXT4_SB(sb)->s_sbh);
4124 if (err)
4125 goto out_brelse;
4126 ext4_update_dynamic_rev(sb);
4127 EXT4_SET_RO_COMPAT_FEATURE(sb,
4128 EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
4129 ext4_handle_sync(handle);
4130 err = ext4_handle_dirty_super(handle, sb);
4131 }
4132 }
4133 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
4134 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
4135 if (old_valid_dev(inode->i_rdev)) {
4136 raw_inode->i_block[0] =
4137 cpu_to_le32(old_encode_dev(inode->i_rdev));
4138 raw_inode->i_block[1] = 0;
4139 } else {
4140 raw_inode->i_block[0] = 0;
4141 raw_inode->i_block[1] =
4142 cpu_to_le32(new_encode_dev(inode->i_rdev));
4143 raw_inode->i_block[2] = 0;
4144 }
4145 } else
4146 for (block = 0; block < EXT4_N_BLOCKS; block++)
4147 raw_inode->i_block[block] = ei->i_data[block];
4148
4149 raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
4150 if (ei->i_extra_isize) {
4151 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
4152 raw_inode->i_version_hi =
4153 cpu_to_le32(inode->i_version >> 32);
4154 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
4155 }
4156
4157 ext4_inode_csum_set(inode, raw_inode, ei);
4158
4159 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4160 rc = ext4_handle_dirty_metadata(handle, NULL, bh);
4161 if (!err)
4162 err = rc;
4163 ext4_clear_inode_state(inode, EXT4_STATE_NEW);
4164
4165 ext4_update_inode_fsync_trans(handle, inode, need_datasync);
4166out_brelse:
4167 brelse(bh);
4168 ext4_std_error(inode->i_sb, err);
4169 return err;
4170}
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4208{
4209 int err;
4210
4211 if (current->flags & PF_MEMALLOC)
4212 return 0;
4213
4214 if (EXT4_SB(inode->i_sb)->s_journal) {
4215 if (ext4_journal_current_handle()) {
4216 jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
4217 dump_stack();
4218 return -EIO;
4219 }
4220
4221 if (wbc->sync_mode != WB_SYNC_ALL)
4222 return 0;
4223
4224 err = ext4_force_commit(inode->i_sb);
4225 } else {
4226 struct ext4_iloc iloc;
4227
4228 err = __ext4_get_inode_loc(inode, &iloc, 0);
4229 if (err)
4230 return err;
4231 if (wbc->sync_mode == WB_SYNC_ALL)
4232 sync_dirty_buffer(iloc.bh);
4233 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
4234 EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
4235 "IO error syncing inode");
4236 err = -EIO;
4237 }
4238 brelse(iloc.bh);
4239 }
4240 return err;
4241}
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4268{
4269 struct inode *inode = dentry->d_inode;
4270 int error, rc = 0;
4271 int orphan = 0;
4272 const unsigned int ia_valid = attr->ia_valid;
4273
4274 error = inode_change_ok(inode, attr);
4275 if (error)
4276 return error;
4277
4278 if (is_quota_modification(inode, attr))
4279 dquot_initialize(inode);
4280 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
4281 (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
4282 handle_t *handle;
4283
4284
4285
4286 handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
4287 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
4288 if (IS_ERR(handle)) {
4289 error = PTR_ERR(handle);
4290 goto err_out;
4291 }
4292 error = dquot_transfer(inode, attr);
4293 if (error) {
4294 ext4_journal_stop(handle);
4295 return error;
4296 }
4297
4298
4299 if (attr->ia_valid & ATTR_UID)
4300 inode->i_uid = attr->ia_uid;
4301 if (attr->ia_valid & ATTR_GID)
4302 inode->i_gid = attr->ia_gid;
4303 error = ext4_mark_inode_dirty(handle, inode);
4304 ext4_journal_stop(handle);
4305 }
4306
4307 if (attr->ia_valid & ATTR_SIZE) {
4308 inode_dio_wait(inode);
4309
4310 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4311 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4312
4313 if (attr->ia_size > sbi->s_bitmap_maxbytes)
4314 return -EFBIG;
4315 }
4316 }
4317
4318 if (S_ISREG(inode->i_mode) &&
4319 attr->ia_valid & ATTR_SIZE &&
4320 (attr->ia_size < inode->i_size)) {
4321 handle_t *handle;
4322
4323 handle = ext4_journal_start(inode, 3);
4324 if (IS_ERR(handle)) {
4325 error = PTR_ERR(handle);
4326 goto err_out;
4327 }
4328 if (ext4_handle_valid(handle)) {
4329 error = ext4_orphan_add(handle, inode);
4330 orphan = 1;
4331 }
4332 EXT4_I(inode)->i_disksize = attr->ia_size;
4333 rc = ext4_mark_inode_dirty(handle, inode);
4334 if (!error)
4335 error = rc;
4336 ext4_journal_stop(handle);
4337
4338 if (ext4_should_order_data(inode)) {
4339 error = ext4_begin_ordered_truncate(inode,
4340 attr->ia_size);
4341 if (error) {
4342
4343 handle = ext4_journal_start(inode, 3);
4344 if (IS_ERR(handle)) {
4345 ext4_orphan_del(NULL, inode);
4346 goto err_out;
4347 }
4348 ext4_orphan_del(handle, inode);
4349 orphan = 0;
4350 ext4_journal_stop(handle);
4351 goto err_out;
4352 }
4353 }
4354 }
4355
4356 if (attr->ia_valid & ATTR_SIZE) {
4357 if (attr->ia_size != i_size_read(inode))
4358 truncate_setsize(inode, attr->ia_size);
4359 ext4_truncate(inode);
4360 }
4361
4362 if (!rc) {
4363 setattr_copy(inode, attr);
4364 mark_inode_dirty(inode);
4365 }
4366
4367
4368
4369
4370
4371 if (orphan && inode->i_nlink)
4372 ext4_orphan_del(NULL, inode);
4373
4374 if (!rc && (ia_valid & ATTR_MODE))
4375 rc = ext4_acl_chmod(inode);
4376
4377err_out:
4378 ext4_std_error(inode->i_sb, error);
4379 if (!error)
4380 error = rc;
4381 return error;
4382}
4383
4384int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
4385 struct kstat *stat)
4386{
4387 struct inode *inode;
4388 unsigned long delalloc_blocks;
4389
4390 inode = dentry->d_inode;
4391 generic_fillattr(inode, stat);
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403 delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
4404 EXT4_I(inode)->i_reserved_data_blocks);
4405
4406 stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
4407 return 0;
4408}
4409
4410static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4411{
4412 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4413 return ext4_ind_trans_blocks(inode, nrblocks, chunk);
4414 return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
4415}
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4429{
4430 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
4431 int gdpblocks;
4432 int idxblocks;
4433 int ret = 0;
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443 idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
4444
4445 ret = idxblocks;
4446
4447
4448
4449
4450
4451 groups = idxblocks;
4452 if (chunk)
4453 groups += 1;
4454 else
4455 groups += nrblocks;
4456
4457 gdpblocks = groups;
4458 if (groups > ngroups)
4459 groups = ngroups;
4460 if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
4461 gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
4462
4463
4464 ret += groups + gdpblocks;
4465
4466
4467 ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
4468
4469 return ret;
4470}
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482int ext4_writepage_trans_blocks(struct inode *inode)
4483{
4484 int bpp = ext4_journal_blocks_per_page(inode);
4485 int ret;
4486
4487 ret = ext4_meta_trans_blocks(inode, bpp, 0);
4488
4489
4490 if (ext4_should_journal_data(inode))
4491 ret += bpp;
4492 return ret;
4493}
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
4505{
4506 return ext4_meta_trans_blocks(inode, nrblocks, 1);
4507}
4508
4509
4510
4511
4512
4513int ext4_mark_iloc_dirty(handle_t *handle,
4514 struct inode *inode, struct ext4_iloc *iloc)
4515{
4516 int err = 0;
4517
4518 if (IS_I_VERSION(inode))
4519 inode_inc_iversion(inode);
4520
4521
4522 get_bh(iloc->bh);
4523
4524
4525 err = ext4_do_update_inode(handle, inode, iloc);
4526 put_bh(iloc->bh);
4527 return err;
4528}
4529
4530
4531
4532
4533
4534
4535int
4536ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
4537 struct ext4_iloc *iloc)
4538{
4539 int err;
4540
4541 err = ext4_get_inode_loc(inode, iloc);
4542 if (!err) {
4543 BUFFER_TRACE(iloc->bh, "get_write_access");
4544 err = ext4_journal_get_write_access(handle, iloc->bh);
4545 if (err) {
4546 brelse(iloc->bh);
4547 iloc->bh = NULL;
4548 }
4549 }
4550 ext4_std_error(inode->i_sb, err);
4551 return err;
4552}
4553
4554
4555
4556
4557
4558static int ext4_expand_extra_isize(struct inode *inode,
4559 unsigned int new_extra_isize,
4560 struct ext4_iloc iloc,
4561 handle_t *handle)
4562{
4563 struct ext4_inode *raw_inode;
4564 struct ext4_xattr_ibody_header *header;
4565
4566 if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
4567 return 0;
4568
4569 raw_inode = ext4_raw_inode(&iloc);
4570
4571 header = IHDR(inode, raw_inode);
4572
4573
4574 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
4575 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
4576 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
4577 new_extra_isize);
4578 EXT4_I(inode)->i_extra_isize = new_extra_isize;
4579 return 0;
4580 }
4581
4582
4583 return ext4_expand_extra_isize_ea(inode, new_extra_isize,
4584 raw_inode, handle);
4585}
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
4601{
4602 struct ext4_iloc iloc;
4603 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4604 static unsigned int mnt_count;
4605 int err, ret;
4606
4607 might_sleep();
4608 trace_ext4_mark_inode_dirty(inode, _RET_IP_);
4609 err = ext4_reserve_inode_write(handle, inode, &iloc);
4610 if (ext4_handle_valid(handle) &&
4611 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
4612 !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
4613
4614
4615
4616
4617
4618
4619
4620 if ((jbd2_journal_extend(handle,
4621 EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
4622 ret = ext4_expand_extra_isize(inode,
4623 sbi->s_want_extra_isize,
4624 iloc, handle);
4625 if (ret) {
4626 ext4_set_inode_state(inode,
4627 EXT4_STATE_NO_EXPAND);
4628 if (mnt_count !=
4629 le16_to_cpu(sbi->s_es->s_mnt_count)) {
4630 ext4_warning(inode->i_sb,
4631 "Unable to expand inode %lu. Delete"
4632 " some EAs or run e2fsck.",
4633 inode->i_ino);
4634 mnt_count =
4635 le16_to_cpu(sbi->s_es->s_mnt_count);
4636 }
4637 }
4638 }
4639 }
4640 if (!err)
4641 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
4642 return err;
4643}
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659void ext4_dirty_inode(struct inode *inode, int flags)
4660{
4661 handle_t *handle;
4662
4663 handle = ext4_journal_start(inode, 2);
4664 if (IS_ERR(handle))
4665 goto out;
4666
4667 ext4_mark_inode_dirty(handle, inode);
4668
4669 ext4_journal_stop(handle);
4670out:
4671 return;
4672}
4673
4674#if 0
4675
4676
4677
4678
4679
4680
4681
4682static int ext4_pin_inode(handle_t *handle, struct inode *inode)
4683{
4684 struct ext4_iloc iloc;
4685
4686 int err = 0;
4687 if (handle) {
4688 err = ext4_get_inode_loc(inode, &iloc);
4689 if (!err) {
4690 BUFFER_TRACE(iloc.bh, "get_write_access");
4691 err = jbd2_journal_get_write_access(handle, iloc.bh);
4692 if (!err)
4693 err = ext4_handle_dirty_metadata(handle,
4694 NULL,
4695 iloc.bh);
4696 brelse(iloc.bh);
4697 }
4698 }
4699 ext4_std_error(inode->i_sb, err);
4700 return err;
4701}
4702#endif
4703
4704int ext4_change_inode_journal_flag(struct inode *inode, int val)
4705{
4706 journal_t *journal;
4707 handle_t *handle;
4708 int err;
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720 journal = EXT4_JOURNAL(inode);
4721 if (!journal)
4722 return 0;
4723 if (is_journal_aborted(journal))
4724 return -EROFS;
4725
4726
4727
4728
4729
4730
4731 if (val && test_opt(inode->i_sb, DELALLOC)) {
4732 err = ext4_alloc_da_blocks(inode);
4733 if (err < 0)
4734 return err;
4735 }
4736
4737 jbd2_journal_lock_updates(journal);
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747 if (val)
4748 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4749 else {
4750 jbd2_journal_flush(journal);
4751 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4752 }
4753 ext4_set_aops(inode);
4754
4755 jbd2_journal_unlock_updates(journal);
4756
4757
4758
4759 handle = ext4_journal_start(inode, 1);
4760 if (IS_ERR(handle))
4761 return PTR_ERR(handle);
4762
4763 err = ext4_mark_inode_dirty(handle, inode);
4764 ext4_handle_sync(handle);
4765 ext4_journal_stop(handle);
4766 ext4_std_error(inode->i_sb, err);
4767
4768 return err;
4769}
4770
4771static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
4772{
4773 return !buffer_mapped(bh);
4774}
4775
4776int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4777{
4778 struct page *page = vmf->page;
4779 loff_t size;
4780 unsigned long len;
4781 int ret;
4782 struct file *file = vma->vm_file;
4783 struct inode *inode = file->f_path.dentry->d_inode;
4784 struct address_space *mapping = inode->i_mapping;
4785 handle_t *handle;
4786 get_block_t *get_block;
4787 int retries = 0;
4788
4789 sb_start_pagefault(inode->i_sb);
4790 file_update_time(vma->vm_file);
4791
4792 if (test_opt(inode->i_sb, DELALLOC) &&
4793 !ext4_should_journal_data(inode) &&
4794 !ext4_nonda_switch(inode->i_sb)) {
4795 do {
4796 ret = __block_page_mkwrite(vma, vmf,
4797 ext4_da_get_block_prep);
4798 } while (ret == -ENOSPC &&
4799 ext4_should_retry_alloc(inode->i_sb, &retries));
4800 goto out_ret;
4801 }
4802
4803 lock_page(page);
4804 size = i_size_read(inode);
4805
4806 if (page->mapping != mapping || page_offset(page) > size) {
4807 unlock_page(page);
4808 ret = VM_FAULT_NOPAGE;
4809 goto out;
4810 }
4811
4812 if (page->index == size >> PAGE_CACHE_SHIFT)
4813 len = size & ~PAGE_CACHE_MASK;
4814 else
4815 len = PAGE_CACHE_SIZE;
4816
4817
4818
4819
4820 if (page_has_buffers(page)) {
4821 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
4822 ext4_bh_unmapped)) {
4823
4824 wait_on_page_writeback(page);
4825 ret = VM_FAULT_LOCKED;
4826 goto out;
4827 }
4828 }
4829 unlock_page(page);
4830
4831 if (ext4_should_dioread_nolock(inode))
4832 get_block = ext4_get_block_write;
4833 else
4834 get_block = ext4_get_block;
4835retry_alloc:
4836 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
4837 if (IS_ERR(handle)) {
4838 ret = VM_FAULT_SIGBUS;
4839 goto out;
4840 }
4841 ret = __block_page_mkwrite(vma, vmf, get_block);
4842 if (!ret && ext4_should_journal_data(inode)) {
4843 if (walk_page_buffers(handle, page_buffers(page), 0,
4844 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
4845 unlock_page(page);
4846 ret = VM_FAULT_SIGBUS;
4847 ext4_journal_stop(handle);
4848 goto out;
4849 }
4850 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
4851 }
4852 ext4_journal_stop(handle);
4853 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
4854 goto retry_alloc;
4855out_ret:
4856 ret = block_page_mkwrite_return(ret);
4857out:
4858 sb_end_pagefault(inode->i_sb);
4859 return ret;
4860}
4861