1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/config.h>
22#include <linux/kernel.h>
23#include <linux/syscalls.h>
24#include <linux/fs.h>
25#include <linux/mm.h>
26#include <linux/percpu.h>
27#include <linux/slab.h>
28#include <linux/smp_lock.h>
29#include <linux/capability.h>
30#include <linux/blkdev.h>
31#include <linux/file.h>
32#include <linux/quotaops.h>
33#include <linux/highmem.h>
34#include <linux/module.h>
35#include <linux/writeback.h>
36#include <linux/hash.h>
37#include <linux/suspend.h>
38#include <linux/buffer_head.h>
39#include <linux/bio.h>
40#include <linux/notifier.h>
41#include <linux/cpu.h>
42#include <linux/bitops.h>
43#include <linux/mpage.h>
44#include <linux/bit_spinlock.h>
45
46static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
47static void invalidate_bh_lrus(void);
48
49#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
50
51inline void
52init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
53{
54 bh->b_end_io = handler;
55 bh->b_private = private;
56}
57
58static int sync_buffer(void *word)
59{
60 struct block_device *bd;
61 struct buffer_head *bh
62 = container_of(word, struct buffer_head, b_state);
63
64 smp_mb();
65 bd = bh->b_bdev;
66 if (bd)
67 blk_run_address_space(bd->bd_inode->i_mapping);
68 io_schedule();
69 return 0;
70}
71
72void fastcall __lock_buffer(struct buffer_head *bh)
73{
74 wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer,
75 TASK_UNINTERRUPTIBLE);
76}
77EXPORT_SYMBOL(__lock_buffer);
78
79void fastcall unlock_buffer(struct buffer_head *bh)
80{
81 clear_buffer_locked(bh);
82 smp_mb__after_clear_bit();
83 wake_up_bit(&bh->b_state, BH_Lock);
84}
85
86
87
88
89
90
91void __wait_on_buffer(struct buffer_head * bh)
92{
93 wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
94}
95
96static void
97__clear_page_buffers(struct page *page)
98{
99 ClearPagePrivate(page);
100 set_page_private(page, 0);
101 page_cache_release(page);
102}
103
104static void buffer_io_error(struct buffer_head *bh)
105{
106 char b[BDEVNAME_SIZE];
107
108 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
109 bdevname(bh->b_bdev, b),
110 (unsigned long long)bh->b_blocknr);
111}
112
113
114
115
116
117void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
118{
119 if (uptodate) {
120 set_buffer_uptodate(bh);
121 } else {
122
123 clear_buffer_uptodate(bh);
124 }
125 unlock_buffer(bh);
126 put_bh(bh);
127}
128
129void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
130{
131 char b[BDEVNAME_SIZE];
132
133 if (uptodate) {
134 set_buffer_uptodate(bh);
135 } else {
136 if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
137 buffer_io_error(bh);
138 printk(KERN_WARNING "lost page write due to "
139 "I/O error on %s\n",
140 bdevname(bh->b_bdev, b));
141 }
142 set_buffer_write_io_error(bh);
143 clear_buffer_uptodate(bh);
144 }
145 unlock_buffer(bh);
146 put_bh(bh);
147}
148
149
150
151
152
153int sync_blockdev(struct block_device *bdev)
154{
155 int ret = 0;
156
157 if (bdev)
158 ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
159 return ret;
160}
161EXPORT_SYMBOL(sync_blockdev);
162
163static void __fsync_super(struct super_block *sb)
164{
165 sync_inodes_sb(sb, 0);
166 DQUOT_SYNC(sb);
167 lock_super(sb);
168 if (sb->s_dirt && sb->s_op->write_super)
169 sb->s_op->write_super(sb);
170 unlock_super(sb);
171 if (sb->s_op->sync_fs)
172 sb->s_op->sync_fs(sb, 1);
173 sync_blockdev(sb->s_bdev);
174 sync_inodes_sb(sb, 1);
175}
176
177
178
179
180
181
182int fsync_super(struct super_block *sb)
183{
184 __fsync_super(sb);
185 return sync_blockdev(sb->s_bdev);
186}
187
188
189
190
191
192
193int fsync_bdev(struct block_device *bdev)
194{
195 struct super_block *sb = get_super(bdev);
196 if (sb) {
197 int res = fsync_super(sb);
198 drop_super(sb);
199 return res;
200 }
201 return sync_blockdev(bdev);
202}
203
204
205
206
207
208
209
210
211
212
213struct super_block *freeze_bdev(struct block_device *bdev)
214{
215 struct super_block *sb;
216
217 mutex_lock(&bdev->bd_mount_mutex);
218 sb = get_super(bdev);
219 if (sb && !(sb->s_flags & MS_RDONLY)) {
220 sb->s_frozen = SB_FREEZE_WRITE;
221 smp_wmb();
222
223 __fsync_super(sb);
224
225 sb->s_frozen = SB_FREEZE_TRANS;
226 smp_wmb();
227
228 sync_blockdev(sb->s_bdev);
229
230 if (sb->s_op->write_super_lockfs)
231 sb->s_op->write_super_lockfs(sb);
232 }
233
234 sync_blockdev(bdev);
235 return sb;
236}
237EXPORT_SYMBOL(freeze_bdev);
238
239
240
241
242
243
244
245
246void thaw_bdev(struct block_device *bdev, struct super_block *sb)
247{
248 if (sb) {
249 BUG_ON(sb->s_bdev != bdev);
250
251 if (sb->s_op->unlockfs)
252 sb->s_op->unlockfs(sb);
253 sb->s_frozen = SB_UNFROZEN;
254 smp_wmb();
255 wake_up(&sb->s_wait_unfrozen);
256 drop_super(sb);
257 }
258
259 mutex_unlock(&bdev->bd_mount_mutex);
260}
261EXPORT_SYMBOL(thaw_bdev);
262
263
264
265
266
267static void do_sync(unsigned long wait)
268{
269 wakeup_pdflush(0);
270 sync_inodes(0);
271 DQUOT_SYNC(NULL);
272 sync_supers();
273 sync_filesystems(0);
274 sync_filesystems(wait);
275 sync_inodes(wait);
276 if (!wait)
277 printk("Emergency Sync complete\n");
278 if (unlikely(laptop_mode))
279 laptop_sync_completion();
280}
281
282asmlinkage long sys_sync(void)
283{
284 do_sync(1);
285 return 0;
286}
287
288void emergency_sync(void)
289{
290 pdflush_operation(do_sync, 0);
291}
292
293
294
295
296
297
298
299int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
300{
301 struct inode * inode = dentry->d_inode;
302 struct super_block * sb;
303 int ret, err;
304
305
306 ret = write_inode_now(inode, 0);
307
308
309 sb = inode->i_sb;
310 lock_super(sb);
311 if (sb->s_op->write_super)
312 sb->s_op->write_super(sb);
313 unlock_super(sb);
314
315
316 err = sync_blockdev(sb->s_bdev);
317 if (!ret)
318 ret = err;
319 return ret;
320}
321
322long do_fsync(struct file *file, int datasync)
323{
324 int ret;
325 int err;
326 struct address_space *mapping = file->f_mapping;
327
328 if (!file->f_op || !file->f_op->fsync) {
329
330 ret = -EINVAL;
331 goto out;
332 }
333
334 current->flags |= PF_SYNCWRITE;
335 ret = filemap_fdatawrite(mapping);
336
337
338
339
340
341 mutex_lock(&mapping->host->i_mutex);
342 err = file->f_op->fsync(file, file->f_dentry, datasync);
343 if (!ret)
344 ret = err;
345 mutex_unlock(&mapping->host->i_mutex);
346 err = filemap_fdatawait(mapping);
347 if (!ret)
348 ret = err;
349 current->flags &= ~PF_SYNCWRITE;
350out:
351 return ret;
352}
353
354static long __do_fsync(unsigned int fd, int datasync)
355{
356 struct file *file;
357 int ret = -EBADF;
358
359 file = fget(fd);
360 if (file) {
361 ret = do_fsync(file, datasync);
362 fput(file);
363 }
364 return ret;
365}
366
367asmlinkage long sys_fsync(unsigned int fd)
368{
369 return __do_fsync(fd, 0);
370}
371
372asmlinkage long sys_fdatasync(unsigned int fd)
373{
374 return __do_fsync(fd, 1);
375}
376
377
378
379
380
381
382
383
384
385
386
387
388static struct buffer_head *
389__find_get_block_slow(struct block_device *bdev, sector_t block)
390{
391 struct inode *bd_inode = bdev->bd_inode;
392 struct address_space *bd_mapping = bd_inode->i_mapping;
393 struct buffer_head *ret = NULL;
394 pgoff_t index;
395 struct buffer_head *bh;
396 struct buffer_head *head;
397 struct page *page;
398 int all_mapped = 1;
399
400 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
401 page = find_get_page(bd_mapping, index);
402 if (!page)
403 goto out;
404
405 spin_lock(&bd_mapping->private_lock);
406 if (!page_has_buffers(page))
407 goto out_unlock;
408 head = page_buffers(page);
409 bh = head;
410 do {
411 if (bh->b_blocknr == block) {
412 ret = bh;
413 get_bh(bh);
414 goto out_unlock;
415 }
416 if (!buffer_mapped(bh))
417 all_mapped = 0;
418 bh = bh->b_this_page;
419 } while (bh != head);
420
421
422
423
424
425
426 if (all_mapped) {
427 printk("__find_get_block_slow() failed. "
428 "block=%llu, b_blocknr=%llu\n",
429 (unsigned long long)block,
430 (unsigned long long)bh->b_blocknr);
431 printk("b_state=0x%08lx, b_size=%zu\n",
432 bh->b_state, bh->b_size);
433 printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits);
434 }
435out_unlock:
436 spin_unlock(&bd_mapping->private_lock);
437 page_cache_release(page);
438out:
439 return ret;
440}
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
475{
476 struct address_space *mapping = bdev->bd_inode->i_mapping;
477
478 if (mapping->nrpages == 0)
479 return;
480
481 invalidate_bh_lrus();
482
483
484
485
486
487 invalidate_inode_pages(mapping);
488}
489
490
491
492
493static void free_more_memory(void)
494{
495 struct zone **zones;
496 pg_data_t *pgdat;
497
498 wakeup_pdflush(1024);
499 yield();
500
501 for_each_online_pgdat(pgdat) {
502 zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
503 if (*zones)
504 try_to_free_pages(zones, GFP_NOFS);
505 }
506}
507
508
509
510
511
512static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
513{
514 unsigned long flags;
515 struct buffer_head *first;
516 struct buffer_head *tmp;
517 struct page *page;
518 int page_uptodate = 1;
519
520 BUG_ON(!buffer_async_read(bh));
521
522 page = bh->b_page;
523 if (uptodate) {
524 set_buffer_uptodate(bh);
525 } else {
526 clear_buffer_uptodate(bh);
527 if (printk_ratelimit())
528 buffer_io_error(bh);
529 SetPageError(page);
530 }
531
532
533
534
535
536
537 first = page_buffers(page);
538 local_irq_save(flags);
539 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
540 clear_buffer_async_read(bh);
541 unlock_buffer(bh);
542 tmp = bh;
543 do {
544 if (!buffer_uptodate(tmp))
545 page_uptodate = 0;
546 if (buffer_async_read(tmp)) {
547 BUG_ON(!buffer_locked(tmp));
548 goto still_busy;
549 }
550 tmp = tmp->b_this_page;
551 } while (tmp != bh);
552 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
553 local_irq_restore(flags);
554
555
556
557
558
559 if (page_uptodate && !PageError(page))
560 SetPageUptodate(page);
561 unlock_page(page);
562 return;
563
564still_busy:
565 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
566 local_irq_restore(flags);
567 return;
568}
569
570
571
572
573
574void end_buffer_async_write(struct buffer_head *bh, int uptodate)
575{
576 char b[BDEVNAME_SIZE];
577 unsigned long flags;
578 struct buffer_head *first;
579 struct buffer_head *tmp;
580 struct page *page;
581
582 BUG_ON(!buffer_async_write(bh));
583
584 page = bh->b_page;
585 if (uptodate) {
586 set_buffer_uptodate(bh);
587 } else {
588 if (printk_ratelimit()) {
589 buffer_io_error(bh);
590 printk(KERN_WARNING "lost page write due to "
591 "I/O error on %s\n",
592 bdevname(bh->b_bdev, b));
593 }
594 set_bit(AS_EIO, &page->mapping->flags);
595 clear_buffer_uptodate(bh);
596 SetPageError(page);
597 }
598
599 first = page_buffers(page);
600 local_irq_save(flags);
601 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
602
603 clear_buffer_async_write(bh);
604 unlock_buffer(bh);
605 tmp = bh->b_this_page;
606 while (tmp != bh) {
607 if (buffer_async_write(tmp)) {
608 BUG_ON(!buffer_locked(tmp));
609 goto still_busy;
610 }
611 tmp = tmp->b_this_page;
612 }
613 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
614 local_irq_restore(flags);
615 end_page_writeback(page);
616 return;
617
618still_busy:
619 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
620 local_irq_restore(flags);
621 return;
622}
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645static void mark_buffer_async_read(struct buffer_head *bh)
646{
647 bh->b_end_io = end_buffer_async_read;
648 set_buffer_async_read(bh);
649}
650
651void mark_buffer_async_write(struct buffer_head *bh)
652{
653 bh->b_end_io = end_buffer_async_write;
654 set_buffer_async_write(bh);
655}
656EXPORT_SYMBOL(mark_buffer_async_write);
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711static inline void __remove_assoc_queue(struct buffer_head *bh)
712{
713 list_del_init(&bh->b_assoc_buffers);
714}
715
716int inode_has_buffers(struct inode *inode)
717{
718 return !list_empty(&inode->i_data.private_list);
719}
720
721
722
723
724
725
726
727
728
729
730
731static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
732{
733 struct buffer_head *bh;
734 struct list_head *p;
735 int err = 0;
736
737 spin_lock(lock);
738repeat:
739 list_for_each_prev(p, list) {
740 bh = BH_ENTRY(p);
741 if (buffer_locked(bh)) {
742 get_bh(bh);
743 spin_unlock(lock);
744 wait_on_buffer(bh);
745 if (!buffer_uptodate(bh))
746 err = -EIO;
747 brelse(bh);
748 spin_lock(lock);
749 goto repeat;
750 }
751 }
752 spin_unlock(lock);
753 return err;
754}
755
756
757
758
759
760
761
762
763
764
765
766
767
768int sync_mapping_buffers(struct address_space *mapping)
769{
770 struct address_space *buffer_mapping = mapping->assoc_mapping;
771
772 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
773 return 0;
774
775 return fsync_buffers_list(&buffer_mapping->private_lock,
776 &mapping->private_list);
777}
778EXPORT_SYMBOL(sync_mapping_buffers);
779
780
781
782
783
784
785
786void write_boundary_block(struct block_device *bdev,
787 sector_t bblock, unsigned blocksize)
788{
789 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
790 if (bh) {
791 if (buffer_dirty(bh))
792 ll_rw_block(WRITE, 1, &bh);
793 put_bh(bh);
794 }
795}
796
797void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
798{
799 struct address_space *mapping = inode->i_mapping;
800 struct address_space *buffer_mapping = bh->b_page->mapping;
801
802 mark_buffer_dirty(bh);
803 if (!mapping->assoc_mapping) {
804 mapping->assoc_mapping = buffer_mapping;
805 } else {
806 BUG_ON(mapping->assoc_mapping != buffer_mapping);
807 }
808 if (list_empty(&bh->b_assoc_buffers)) {
809 spin_lock(&buffer_mapping->private_lock);
810 list_move_tail(&bh->b_assoc_buffers,
811 &mapping->private_list);
812 spin_unlock(&buffer_mapping->private_lock);
813 }
814}
815EXPORT_SYMBOL(mark_buffer_dirty_inode);
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842int __set_page_dirty_buffers(struct page *page)
843{
844 struct address_space * const mapping = page->mapping;
845
846 spin_lock(&mapping->private_lock);
847 if (page_has_buffers(page)) {
848 struct buffer_head *head = page_buffers(page);
849 struct buffer_head *bh = head;
850
851 do {
852 set_buffer_dirty(bh);
853 bh = bh->b_this_page;
854 } while (bh != head);
855 }
856 spin_unlock(&mapping->private_lock);
857
858 if (!TestSetPageDirty(page)) {
859 write_lock_irq(&mapping->tree_lock);
860 if (page->mapping) {
861 if (mapping_cap_account_dirty(mapping))
862 inc_page_state(nr_dirty);
863 radix_tree_tag_set(&mapping->page_tree,
864 page_index(page),
865 PAGECACHE_TAG_DIRTY);
866 }
867 write_unlock_irq(&mapping->tree_lock);
868 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
869 return 1;
870 }
871 return 0;
872}
873EXPORT_SYMBOL(__set_page_dirty_buffers);
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
895{
896 struct buffer_head *bh;
897 struct list_head tmp;
898 int err = 0, err2;
899
900 INIT_LIST_HEAD(&tmp);
901
902 spin_lock(lock);
903 while (!list_empty(list)) {
904 bh = BH_ENTRY(list->next);
905 list_del_init(&bh->b_assoc_buffers);
906 if (buffer_dirty(bh) || buffer_locked(bh)) {
907 list_add(&bh->b_assoc_buffers, &tmp);
908 if (buffer_dirty(bh)) {
909 get_bh(bh);
910 spin_unlock(lock);
911
912
913
914
915
916
917 ll_rw_block(SWRITE, 1, &bh);
918 brelse(bh);
919 spin_lock(lock);
920 }
921 }
922 }
923
924 while (!list_empty(&tmp)) {
925 bh = BH_ENTRY(tmp.prev);
926 __remove_assoc_queue(bh);
927 get_bh(bh);
928 spin_unlock(lock);
929 wait_on_buffer(bh);
930 if (!buffer_uptodate(bh))
931 err = -EIO;
932 brelse(bh);
933 spin_lock(lock);
934 }
935
936 spin_unlock(lock);
937 err2 = osync_buffers_list(lock, list);
938 if (err)
939 return err;
940 else
941 return err2;
942}
943
944
945
946
947
948
949
950
951
952
953void invalidate_inode_buffers(struct inode *inode)
954{
955 if (inode_has_buffers(inode)) {
956 struct address_space *mapping = &inode->i_data;
957 struct list_head *list = &mapping->private_list;
958 struct address_space *buffer_mapping = mapping->assoc_mapping;
959
960 spin_lock(&buffer_mapping->private_lock);
961 while (!list_empty(list))
962 __remove_assoc_queue(BH_ENTRY(list->next));
963 spin_unlock(&buffer_mapping->private_lock);
964 }
965}
966
967
968
969
970
971
972
973int remove_inode_buffers(struct inode *inode)
974{
975 int ret = 1;
976
977 if (inode_has_buffers(inode)) {
978 struct address_space *mapping = &inode->i_data;
979 struct list_head *list = &mapping->private_list;
980 struct address_space *buffer_mapping = mapping->assoc_mapping;
981
982 spin_lock(&buffer_mapping->private_lock);
983 while (!list_empty(list)) {
984 struct buffer_head *bh = BH_ENTRY(list->next);
985 if (buffer_dirty(bh)) {
986 ret = 0;
987 break;
988 }
989 __remove_assoc_queue(bh);
990 }
991 spin_unlock(&buffer_mapping->private_lock);
992 }
993 return ret;
994}
995
996
997
998
999
1000
1001
1002
1003
1004
1005struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
1006 int retry)
1007{
1008 struct buffer_head *bh, *head;
1009 long offset;
1010
1011try_again:
1012 head = NULL;
1013 offset = PAGE_SIZE;
1014 while ((offset -= size) >= 0) {
1015 bh = alloc_buffer_head(GFP_NOFS);
1016 if (!bh)
1017 goto no_grow;
1018
1019 bh->b_bdev = NULL;
1020 bh->b_this_page = head;
1021 bh->b_blocknr = -1;
1022 head = bh;
1023
1024 bh->b_state = 0;
1025 atomic_set(&bh->b_count, 0);
1026 bh->b_private = NULL;
1027 bh->b_size = size;
1028
1029
1030 set_bh_page(bh, page, offset);
1031
1032 init_buffer(bh, NULL, NULL);
1033 }
1034 return head;
1035
1036
1037
1038no_grow:
1039 if (head) {
1040 do {
1041 bh = head;
1042 head = head->b_this_page;
1043 free_buffer_head(bh);
1044 } while (head);
1045 }
1046
1047
1048
1049
1050
1051
1052
1053 if (!retry)
1054 return NULL;
1055
1056
1057
1058
1059
1060
1061
1062 free_more_memory();
1063 goto try_again;
1064}
1065EXPORT_SYMBOL_GPL(alloc_page_buffers);
1066
1067static inline void
1068link_dev_buffers(struct page *page, struct buffer_head *head)
1069{
1070 struct buffer_head *bh, *tail;
1071
1072 bh = head;
1073 do {
1074 tail = bh;
1075 bh = bh->b_this_page;
1076 } while (bh);
1077 tail->b_this_page = head;
1078 attach_page_buffers(page, head);
1079}
1080
1081
1082
1083
1084static void
1085init_page_buffers(struct page *page, struct block_device *bdev,
1086 sector_t block, int size)
1087{
1088 struct buffer_head *head = page_buffers(page);
1089 struct buffer_head *bh = head;
1090 int uptodate = PageUptodate(page);
1091
1092 do {
1093 if (!buffer_mapped(bh)) {
1094 init_buffer(bh, NULL, NULL);
1095 bh->b_bdev = bdev;
1096 bh->b_blocknr = block;
1097 if (uptodate)
1098 set_buffer_uptodate(bh);
1099 set_buffer_mapped(bh);
1100 }
1101 block++;
1102 bh = bh->b_this_page;
1103 } while (bh != head);
1104}
1105
1106
1107
1108
1109
1110
1111static struct page *
1112grow_dev_page(struct block_device *bdev, sector_t block,
1113 pgoff_t index, int size)
1114{
1115 struct inode *inode = bdev->bd_inode;
1116 struct page *page;
1117 struct buffer_head *bh;
1118
1119 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
1120 if (!page)
1121 return NULL;
1122
1123 BUG_ON(!PageLocked(page));
1124
1125 if (page_has_buffers(page)) {
1126 bh = page_buffers(page);
1127 if (bh->b_size == size) {
1128 init_page_buffers(page, bdev, block, size);
1129 return page;
1130 }
1131 if (!try_to_free_buffers(page))
1132 goto failed;
1133 }
1134
1135
1136
1137
1138 bh = alloc_page_buffers(page, size, 0);
1139 if (!bh)
1140 goto failed;
1141
1142
1143
1144
1145
1146
1147 spin_lock(&inode->i_mapping->private_lock);
1148 link_dev_buffers(page, bh);
1149 init_page_buffers(page, bdev, block, size);
1150 spin_unlock(&inode->i_mapping->private_lock);
1151 return page;
1152
1153failed:
1154 BUG();
1155 unlock_page(page);
1156 page_cache_release(page);
1157 return NULL;
1158}
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169static int
1170grow_buffers(struct block_device *bdev, sector_t block, int size)
1171{
1172 struct page *page;
1173 pgoff_t index;
1174 int sizebits;
1175
1176 sizebits = -1;
1177 do {
1178 sizebits++;
1179 } while ((size << sizebits) < PAGE_SIZE);
1180
1181 index = block >> sizebits;
1182 block = index << sizebits;
1183
1184
1185 page = grow_dev_page(bdev, block, index, size);
1186 if (!page)
1187 return 0;
1188 unlock_page(page);
1189 page_cache_release(page);
1190 return 1;
1191}
1192
1193static struct buffer_head *
1194__getblk_slow(struct block_device *bdev, sector_t block, int size)
1195{
1196
1197 if (unlikely(size & (bdev_hardsect_size(bdev)-1) ||
1198 (size < 512 || size > PAGE_SIZE))) {
1199 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1200 size);
1201 printk(KERN_ERR "hardsect size: %d\n",
1202 bdev_hardsect_size(bdev));
1203
1204 dump_stack();
1205 return NULL;
1206 }
1207
1208 for (;;) {
1209 struct buffer_head * bh;
1210
1211 bh = __find_get_block(bdev, block, size);
1212 if (bh)
1213 return bh;
1214
1215 if (!grow_buffers(bdev, block, size))
1216 free_more_memory();
1217 }
1218}
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255void fastcall mark_buffer_dirty(struct buffer_head *bh)
1256{
1257 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
1258 __set_page_dirty_nobuffers(bh->b_page);
1259}
1260
1261
1262
1263
1264
1265
1266
1267
1268void __brelse(struct buffer_head * buf)
1269{
1270 if (atomic_read(&buf->b_count)) {
1271 put_bh(buf);
1272 return;
1273 }
1274 printk(KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1275 WARN_ON(1);
1276}
1277
1278
1279
1280
1281
1282void __bforget(struct buffer_head *bh)
1283{
1284 clear_buffer_dirty(bh);
1285 if (!list_empty(&bh->b_assoc_buffers)) {
1286 struct address_space *buffer_mapping = bh->b_page->mapping;
1287
1288 spin_lock(&buffer_mapping->private_lock);
1289 list_del_init(&bh->b_assoc_buffers);
1290 spin_unlock(&buffer_mapping->private_lock);
1291 }
1292 __brelse(bh);
1293}
1294
1295static struct buffer_head *__bread_slow(struct buffer_head *bh)
1296{
1297 lock_buffer(bh);
1298 if (buffer_uptodate(bh)) {
1299 unlock_buffer(bh);
1300 return bh;
1301 } else {
1302 get_bh(bh);
1303 bh->b_end_io = end_buffer_read_sync;
1304 submit_bh(READ, bh);
1305 wait_on_buffer(bh);
1306 if (buffer_uptodate(bh))
1307 return bh;
1308 }
1309 brelse(bh);
1310 return NULL;
1311}
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327#define BH_LRU_SIZE 8
1328
1329struct bh_lru {
1330 struct buffer_head *bhs[BH_LRU_SIZE];
1331};
1332
1333static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1334
1335#ifdef CONFIG_SMP
1336#define bh_lru_lock() local_irq_disable()
1337#define bh_lru_unlock() local_irq_enable()
1338#else
1339#define bh_lru_lock() preempt_disable()
1340#define bh_lru_unlock() preempt_enable()
1341#endif
1342
1343static inline void check_irqs_on(void)
1344{
1345#ifdef irqs_disabled
1346 BUG_ON(irqs_disabled());
1347#endif
1348}
1349
1350
1351
1352
1353static void bh_lru_install(struct buffer_head *bh)
1354{
1355 struct buffer_head *evictee = NULL;
1356 struct bh_lru *lru;
1357
1358 check_irqs_on();
1359 bh_lru_lock();
1360 lru = &__get_cpu_var(bh_lrus);
1361 if (lru->bhs[0] != bh) {
1362 struct buffer_head *bhs[BH_LRU_SIZE];
1363 int in;
1364 int out = 0;
1365
1366 get_bh(bh);
1367 bhs[out++] = bh;
1368 for (in = 0; in < BH_LRU_SIZE; in++) {
1369 struct buffer_head *bh2 = lru->bhs[in];
1370
1371 if (bh2 == bh) {
1372 __brelse(bh2);
1373 } else {
1374 if (out >= BH_LRU_SIZE) {
1375 BUG_ON(evictee != NULL);
1376 evictee = bh2;
1377 } else {
1378 bhs[out++] = bh2;
1379 }
1380 }
1381 }
1382 while (out < BH_LRU_SIZE)
1383 bhs[out++] = NULL;
1384 memcpy(lru->bhs, bhs, sizeof(bhs));
1385 }
1386 bh_lru_unlock();
1387
1388 if (evictee)
1389 __brelse(evictee);
1390}
1391
1392
1393
1394
1395static struct buffer_head *
1396lookup_bh_lru(struct block_device *bdev, sector_t block, int size)
1397{
1398 struct buffer_head *ret = NULL;
1399 struct bh_lru *lru;
1400 int i;
1401
1402 check_irqs_on();
1403 bh_lru_lock();
1404 lru = &__get_cpu_var(bh_lrus);
1405 for (i = 0; i < BH_LRU_SIZE; i++) {
1406 struct buffer_head *bh = lru->bhs[i];
1407
1408 if (bh && bh->b_bdev == bdev &&
1409 bh->b_blocknr == block && bh->b_size == size) {
1410 if (i) {
1411 while (i) {
1412 lru->bhs[i] = lru->bhs[i - 1];
1413 i--;
1414 }
1415 lru->bhs[0] = bh;
1416 }
1417 get_bh(bh);
1418 ret = bh;
1419 break;
1420 }
1421 }
1422 bh_lru_unlock();
1423 return ret;
1424}
1425
1426
1427
1428
1429
1430
1431struct buffer_head *
1432__find_get_block(struct block_device *bdev, sector_t block, int size)
1433{
1434 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1435
1436 if (bh == NULL) {
1437 bh = __find_get_block_slow(bdev, block);
1438 if (bh)
1439 bh_lru_install(bh);
1440 }
1441 if (bh)
1442 touch_buffer(bh);
1443 return bh;
1444}
1445EXPORT_SYMBOL(__find_get_block);
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459struct buffer_head *
1460__getblk(struct block_device *bdev, sector_t block, int size)
1461{
1462 struct buffer_head *bh = __find_get_block(bdev, block, size);
1463
1464 might_sleep();
1465 if (bh == NULL)
1466 bh = __getblk_slow(bdev, block, size);
1467 return bh;
1468}
1469EXPORT_SYMBOL(__getblk);
1470
1471
1472
1473
1474void __breadahead(struct block_device *bdev, sector_t block, int size)
1475{
1476 struct buffer_head *bh = __getblk(bdev, block, size);
1477 if (likely(bh)) {
1478 ll_rw_block(READA, 1, &bh);
1479 brelse(bh);
1480 }
1481}
1482EXPORT_SYMBOL(__breadahead);
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493struct buffer_head *
1494__bread(struct block_device *bdev, sector_t block, int size)
1495{
1496 struct buffer_head *bh = __getblk(bdev, block, size);
1497
1498 if (likely(bh) && !buffer_uptodate(bh))
1499 bh = __bread_slow(bh);
1500 return bh;
1501}
1502EXPORT_SYMBOL(__bread);
1503
1504
1505
1506
1507
1508
1509static void invalidate_bh_lru(void *arg)
1510{
1511 struct bh_lru *b = &get_cpu_var(bh_lrus);
1512 int i;
1513
1514 for (i = 0; i < BH_LRU_SIZE; i++) {
1515 brelse(b->bhs[i]);
1516 b->bhs[i] = NULL;
1517 }
1518 put_cpu_var(bh_lrus);
1519}
1520
1521static void invalidate_bh_lrus(void)
1522{
1523 on_each_cpu(invalidate_bh_lru, NULL, 1, 1);
1524}
1525
1526void set_bh_page(struct buffer_head *bh,
1527 struct page *page, unsigned long offset)
1528{
1529 bh->b_page = page;
1530 BUG_ON(offset >= PAGE_SIZE);
1531 if (PageHighMem(page))
1532
1533
1534
1535 bh->b_data = (char *)(0 + offset);
1536 else
1537 bh->b_data = page_address(page) + offset;
1538}
1539EXPORT_SYMBOL(set_bh_page);
1540
1541
1542
1543
1544static void discard_buffer(struct buffer_head * bh)
1545{
1546 lock_buffer(bh);
1547 clear_buffer_dirty(bh);
1548 bh->b_bdev = NULL;
1549 clear_buffer_mapped(bh);
1550 clear_buffer_req(bh);
1551 clear_buffer_new(bh);
1552 clear_buffer_delay(bh);
1553 unlock_buffer(bh);
1554}
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571int try_to_release_page(struct page *page, gfp_t gfp_mask)
1572{
1573 struct address_space * const mapping = page->mapping;
1574
1575 BUG_ON(!PageLocked(page));
1576 if (PageWriteback(page))
1577 return 0;
1578
1579 if (mapping && mapping->a_ops->releasepage)
1580 return mapping->a_ops->releasepage(page, gfp_mask);
1581 return try_to_free_buffers(page);
1582}
1583EXPORT_SYMBOL(try_to_release_page);
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600void block_invalidatepage(struct page *page, unsigned long offset)
1601{
1602 struct buffer_head *head, *bh, *next;
1603 unsigned int curr_off = 0;
1604
1605 BUG_ON(!PageLocked(page));
1606 if (!page_has_buffers(page))
1607 goto out;
1608
1609 head = page_buffers(page);
1610 bh = head;
1611 do {
1612 unsigned int next_off = curr_off + bh->b_size;
1613 next = bh->b_this_page;
1614
1615
1616
1617
1618 if (offset <= curr_off)
1619 discard_buffer(bh);
1620 curr_off = next_off;
1621 bh = next;
1622 } while (bh != head);
1623
1624
1625
1626
1627
1628
1629 if (offset == 0)
1630 try_to_release_page(page, 0);
1631out:
1632 return;
1633}
1634EXPORT_SYMBOL(block_invalidatepage);
1635
1636void do_invalidatepage(struct page *page, unsigned long offset)
1637{
1638 void (*invalidatepage)(struct page *, unsigned long);
1639 invalidatepage = page->mapping->a_ops->invalidatepage ? :
1640 block_invalidatepage;
1641 (*invalidatepage)(page, offset);
1642}
1643
1644
1645
1646
1647
1648
1649void create_empty_buffers(struct page *page,
1650 unsigned long blocksize, unsigned long b_state)
1651{
1652 struct buffer_head *bh, *head, *tail;
1653
1654 head = alloc_page_buffers(page, blocksize, 1);
1655 bh = head;
1656 do {
1657 bh->b_state |= b_state;
1658 tail = bh;
1659 bh = bh->b_this_page;
1660 } while (bh);
1661 tail->b_this_page = head;
1662
1663 spin_lock(&page->mapping->private_lock);
1664 if (PageUptodate(page) || PageDirty(page)) {
1665 bh = head;
1666 do {
1667 if (PageDirty(page))
1668 set_buffer_dirty(bh);
1669 if (PageUptodate(page))
1670 set_buffer_uptodate(bh);
1671 bh = bh->b_this_page;
1672 } while (bh != head);
1673 }
1674 attach_page_buffers(page, head);
1675 spin_unlock(&page->mapping->private_lock);
1676}
1677EXPORT_SYMBOL(create_empty_buffers);
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1696{
1697 struct buffer_head *old_bh;
1698
1699 might_sleep();
1700
1701 old_bh = __find_get_block_slow(bdev, block);
1702 if (old_bh) {
1703 clear_buffer_dirty(old_bh);
1704 wait_on_buffer(old_bh);
1705 clear_buffer_req(old_bh);
1706 __brelse(old_bh);
1707 }
1708}
1709EXPORT_SYMBOL(unmap_underlying_metadata);
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736static int __block_write_full_page(struct inode *inode, struct page *page,
1737 get_block_t *get_block, struct writeback_control *wbc)
1738{
1739 int err;
1740 sector_t block;
1741 sector_t last_block;
1742 struct buffer_head *bh, *head;
1743 const unsigned blocksize = 1 << inode->i_blkbits;
1744 int nr_underway = 0;
1745
1746 BUG_ON(!PageLocked(page));
1747
1748 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1749
1750 if (!page_has_buffers(page)) {
1751 create_empty_buffers(page, blocksize,
1752 (1 << BH_Dirty)|(1 << BH_Uptodate));
1753 }
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1766 head = page_buffers(page);
1767 bh = head;
1768
1769
1770
1771
1772
1773 do {
1774 if (block > last_block) {
1775
1776
1777
1778
1779
1780
1781
1782
1783 clear_buffer_dirty(bh);
1784 set_buffer_uptodate(bh);
1785 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1786 WARN_ON(bh->b_size != blocksize);
1787 err = get_block(inode, block, bh, 1);
1788 if (err)
1789 goto recover;
1790 if (buffer_new(bh)) {
1791
1792 clear_buffer_new(bh);
1793 unmap_underlying_metadata(bh->b_bdev,
1794 bh->b_blocknr);
1795 }
1796 }
1797 bh = bh->b_this_page;
1798 block++;
1799 } while (bh != head);
1800
1801 do {
1802 if (!buffer_mapped(bh))
1803 continue;
1804
1805
1806
1807
1808
1809
1810
1811 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1812 lock_buffer(bh);
1813 } else if (test_set_buffer_locked(bh)) {
1814 redirty_page_for_writepage(wbc, page);
1815 continue;
1816 }
1817 if (test_clear_buffer_dirty(bh)) {
1818 mark_buffer_async_write(bh);
1819 } else {
1820 unlock_buffer(bh);
1821 }
1822 } while ((bh = bh->b_this_page) != head);
1823
1824
1825
1826
1827
1828 BUG_ON(PageWriteback(page));
1829 set_page_writeback(page);
1830
1831 do {
1832 struct buffer_head *next = bh->b_this_page;
1833 if (buffer_async_write(bh)) {
1834 submit_bh(WRITE, bh);
1835 nr_underway++;
1836 }
1837 bh = next;
1838 } while (bh != head);
1839 unlock_page(page);
1840
1841 err = 0;
1842done:
1843 if (nr_underway == 0) {
1844
1845
1846
1847
1848
1849 int uptodate = 1;
1850 do {
1851 if (!buffer_uptodate(bh)) {
1852 uptodate = 0;
1853 break;
1854 }
1855 bh = bh->b_this_page;
1856 } while (bh != head);
1857 if (uptodate)
1858 SetPageUptodate(page);
1859 end_page_writeback(page);
1860
1861
1862
1863
1864 wbc->pages_skipped++;
1865 }
1866 return err;
1867
1868recover:
1869
1870
1871
1872
1873
1874
1875 bh = head;
1876
1877 do {
1878 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1879 lock_buffer(bh);
1880 mark_buffer_async_write(bh);
1881 } else {
1882
1883
1884
1885
1886 clear_buffer_dirty(bh);
1887 }
1888 } while ((bh = bh->b_this_page) != head);
1889 SetPageError(page);
1890 BUG_ON(PageWriteback(page));
1891 set_page_writeback(page);
1892 unlock_page(page);
1893 do {
1894 struct buffer_head *next = bh->b_this_page;
1895 if (buffer_async_write(bh)) {
1896 clear_buffer_dirty(bh);
1897 submit_bh(WRITE, bh);
1898 nr_underway++;
1899 }
1900 bh = next;
1901 } while (bh != head);
1902 goto done;
1903}
1904
1905static int __block_prepare_write(struct inode *inode, struct page *page,
1906 unsigned from, unsigned to, get_block_t *get_block)
1907{
1908 unsigned block_start, block_end;
1909 sector_t block;
1910 int err = 0;
1911 unsigned blocksize, bbits;
1912 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1913
1914 BUG_ON(!PageLocked(page));
1915 BUG_ON(from > PAGE_CACHE_SIZE);
1916 BUG_ON(to > PAGE_CACHE_SIZE);
1917 BUG_ON(from > to);
1918
1919 blocksize = 1 << inode->i_blkbits;
1920 if (!page_has_buffers(page))
1921 create_empty_buffers(page, blocksize, 0);
1922 head = page_buffers(page);
1923
1924 bbits = inode->i_blkbits;
1925 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1926
1927 for(bh = head, block_start = 0; bh != head || !block_start;
1928 block++, block_start=block_end, bh = bh->b_this_page) {
1929 block_end = block_start + blocksize;
1930 if (block_end <= from || block_start >= to) {
1931 if (PageUptodate(page)) {
1932 if (!buffer_uptodate(bh))
1933 set_buffer_uptodate(bh);
1934 }
1935 continue;
1936 }
1937 if (buffer_new(bh))
1938 clear_buffer_new(bh);
1939 if (!buffer_mapped(bh)) {
1940 WARN_ON(bh->b_size != blocksize);
1941 err = get_block(inode, block, bh, 1);
1942 if (err)
1943 break;
1944 if (buffer_new(bh)) {
1945 unmap_underlying_metadata(bh->b_bdev,
1946 bh->b_blocknr);
1947 if (PageUptodate(page)) {
1948 set_buffer_uptodate(bh);
1949 continue;
1950 }
1951 if (block_end > to || block_start < from) {
1952 void *kaddr;
1953
1954 kaddr = kmap_atomic(page, KM_USER0);
1955 if (block_end > to)
1956 memset(kaddr+to, 0,
1957 block_end-to);
1958 if (block_start < from)
1959 memset(kaddr+block_start,
1960 0, from-block_start);
1961 flush_dcache_page(page);
1962 kunmap_atomic(kaddr, KM_USER0);
1963 }
1964 continue;
1965 }
1966 }
1967 if (PageUptodate(page)) {
1968 if (!buffer_uptodate(bh))
1969 set_buffer_uptodate(bh);
1970 continue;
1971 }
1972 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1973 (block_start < from || block_end > to)) {
1974 ll_rw_block(READ, 1, &bh);
1975 *wait_bh++=bh;
1976 }
1977 }
1978
1979
1980
1981 while(wait_bh > wait) {
1982 wait_on_buffer(*--wait_bh);
1983 if (!buffer_uptodate(*wait_bh))
1984 err = -EIO;
1985 }
1986 if (!err) {
1987 bh = head;
1988 do {
1989 if (buffer_new(bh))
1990 clear_buffer_new(bh);
1991 } while ((bh = bh->b_this_page) != head);
1992 return 0;
1993 }
1994
1995
1996
1997
1998
1999
2000 bh = head;
2001 block_start = 0;
2002 do {
2003 block_end = block_start+blocksize;
2004 if (block_end <= from)
2005 goto next_bh;
2006 if (block_start >= to)
2007 break;
2008 if (buffer_new(bh)) {
2009 void *kaddr;
2010
2011 clear_buffer_new(bh);
2012 kaddr = kmap_atomic(page, KM_USER0);
2013 memset(kaddr+block_start, 0, bh->b_size);
2014 kunmap_atomic(kaddr, KM_USER0);
2015 set_buffer_uptodate(bh);
2016 mark_buffer_dirty(bh);
2017 }
2018next_bh:
2019 block_start = block_end;
2020 bh = bh->b_this_page;
2021 } while (bh != head);
2022 return err;
2023}
2024
2025static int __block_commit_write(struct inode *inode, struct page *page,
2026 unsigned from, unsigned to)
2027{
2028 unsigned block_start, block_end;
2029 int partial = 0;
2030 unsigned blocksize;
2031 struct buffer_head *bh, *head;
2032
2033 blocksize = 1 << inode->i_blkbits;
2034
2035 for(bh = head = page_buffers(page), block_start = 0;
2036 bh != head || !block_start;
2037 block_start=block_end, bh = bh->b_this_page) {
2038 block_end = block_start + blocksize;
2039 if (block_end <= from || block_start >= to) {
2040 if (!buffer_uptodate(bh))
2041 partial = 1;
2042 } else {
2043 set_buffer_uptodate(bh);
2044 mark_buffer_dirty(bh);
2045 }
2046 }
2047
2048
2049
2050
2051
2052
2053
2054 if (!partial)
2055 SetPageUptodate(page);
2056 return 0;
2057}
2058
2059
2060
2061
2062
2063
2064
2065
2066int block_read_full_page(struct page *page, get_block_t *get_block)
2067{
2068 struct inode *inode = page->mapping->host;
2069 sector_t iblock, lblock;
2070 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2071 unsigned int blocksize;
2072 int nr, i;
2073 int fully_mapped = 1;
2074
2075 BUG_ON(!PageLocked(page));
2076 blocksize = 1 << inode->i_blkbits;
2077 if (!page_has_buffers(page))
2078 create_empty_buffers(page, blocksize, 0);
2079 head = page_buffers(page);
2080
2081 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2082 lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
2083 bh = head;
2084 nr = 0;
2085 i = 0;
2086
2087 do {
2088 if (buffer_uptodate(bh))
2089 continue;
2090
2091 if (!buffer_mapped(bh)) {
2092 int err = 0;
2093
2094 fully_mapped = 0;
2095 if (iblock < lblock) {
2096 WARN_ON(bh->b_size != blocksize);
2097 err = get_block(inode, iblock, bh, 0);
2098 if (err)
2099 SetPageError(page);
2100 }
2101 if (!buffer_mapped(bh)) {
2102 void *kaddr = kmap_atomic(page, KM_USER0);
2103 memset(kaddr + i * blocksize, 0, blocksize);
2104 flush_dcache_page(page);
2105 kunmap_atomic(kaddr, KM_USER0);
2106 if (!err)
2107 set_buffer_uptodate(bh);
2108 continue;
2109 }
2110
2111
2112
2113
2114 if (buffer_uptodate(bh))
2115 continue;
2116 }
2117 arr[nr++] = bh;
2118 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2119
2120 if (fully_mapped)
2121 SetPageMappedToDisk(page);
2122
2123 if (!nr) {
2124
2125
2126
2127
2128 if (!PageError(page))
2129 SetPageUptodate(page);
2130 unlock_page(page);
2131 return 0;
2132 }
2133
2134
2135 for (i = 0; i < nr; i++) {
2136 bh = arr[i];
2137 lock_buffer(bh);
2138 mark_buffer_async_read(bh);
2139 }
2140
2141
2142
2143
2144
2145
2146 for (i = 0; i < nr; i++) {
2147 bh = arr[i];
2148 if (buffer_uptodate(bh))
2149 end_buffer_async_read(bh, 1);
2150 else
2151 submit_bh(READ, bh);
2152 }
2153 return 0;
2154}
2155
2156
2157
2158
2159
2160static int __generic_cont_expand(struct inode *inode, loff_t size,
2161 pgoff_t index, unsigned int offset)
2162{
2163 struct address_space *mapping = inode->i_mapping;
2164 struct page *page;
2165 unsigned long limit;
2166 int err;
2167
2168 err = -EFBIG;
2169 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
2170 if (limit != RLIM_INFINITY && size > (loff_t)limit) {
2171 send_sig(SIGXFSZ, current, 0);
2172 goto out;
2173 }
2174 if (size > inode->i_sb->s_maxbytes)
2175 goto out;
2176
2177 err = -ENOMEM;
2178 page = grab_cache_page(mapping, index);
2179 if (!page)
2180 goto out;
2181 err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
2182 if (err) {
2183
2184
2185
2186
2187 unlock_page(page);
2188 page_cache_release(page);
2189 vmtruncate(inode, inode->i_size);
2190 goto out;
2191 }
2192
2193 err = mapping->a_ops->commit_write(NULL, page, offset, offset);
2194
2195 unlock_page(page);
2196 page_cache_release(page);
2197 if (err > 0)
2198 err = 0;
2199out:
2200 return err;
2201}
2202
2203int generic_cont_expand(struct inode *inode, loff_t size)
2204{
2205 pgoff_t index;
2206 unsigned int offset;
2207
2208 offset = (size & (PAGE_CACHE_SIZE - 1));
2209
2210
2211
2212
2213
2214 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
2215
2216 offset++;
2217 }
2218 index = size >> PAGE_CACHE_SHIFT;
2219
2220 return __generic_cont_expand(inode, size, index, offset);
2221}
2222
2223int generic_cont_expand_simple(struct inode *inode, loff_t size)
2224{
2225 loff_t pos = size - 1;
2226 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2227 unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1;
2228
2229
2230 return __generic_cont_expand(inode, size, index, offset);
2231}
2232
2233
2234
2235
2236
2237
2238int cont_prepare_write(struct page *page, unsigned offset,
2239 unsigned to, get_block_t *get_block, loff_t *bytes)
2240{
2241 struct address_space *mapping = page->mapping;
2242 struct inode *inode = mapping->host;
2243 struct page *new_page;
2244 pgoff_t pgpos;
2245 long status;
2246 unsigned zerofrom;
2247 unsigned blocksize = 1 << inode->i_blkbits;
2248 void *kaddr;
2249
2250 while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
2251 status = -ENOMEM;
2252 new_page = grab_cache_page(mapping, pgpos);
2253 if (!new_page)
2254 goto out;
2255
2256 if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
2257 unlock_page(new_page);
2258 page_cache_release(new_page);
2259 continue;
2260 }
2261 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2262 if (zerofrom & (blocksize-1)) {
2263 *bytes |= (blocksize-1);
2264 (*bytes)++;
2265 }
2266 status = __block_prepare_write(inode, new_page, zerofrom,
2267 PAGE_CACHE_SIZE, get_block);
2268 if (status)
2269 goto out_unmap;
2270 kaddr = kmap_atomic(new_page, KM_USER0);
2271 memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
2272 flush_dcache_page(new_page);
2273 kunmap_atomic(kaddr, KM_USER0);
2274 generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE);
2275 unlock_page(new_page);
2276 page_cache_release(new_page);
2277 }
2278
2279 if (page->index < pgpos) {
2280
2281 zerofrom = offset;
2282 } else {
2283
2284 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2285
2286
2287 if (to > zerofrom && (zerofrom & (blocksize-1))) {
2288 *bytes |= (blocksize-1);
2289 (*bytes)++;
2290 }
2291
2292
2293 if (offset <= zerofrom)
2294 zerofrom = offset;
2295 }
2296 status = __block_prepare_write(inode, page, zerofrom, to, get_block);
2297 if (status)
2298 goto out1;
2299 if (zerofrom < offset) {
2300 kaddr = kmap_atomic(page, KM_USER0);
2301 memset(kaddr+zerofrom, 0, offset-zerofrom);
2302 flush_dcache_page(page);
2303 kunmap_atomic(kaddr, KM_USER0);
2304 __block_commit_write(inode, page, zerofrom, offset);
2305 }
2306 return 0;
2307out1:
2308 ClearPageUptodate(page);
2309 return status;
2310
2311out_unmap:
2312 ClearPageUptodate(new_page);
2313 unlock_page(new_page);
2314 page_cache_release(new_page);
2315out:
2316 return status;
2317}
2318
2319int block_prepare_write(struct page *page, unsigned from, unsigned to,
2320 get_block_t *get_block)
2321{
2322 struct inode *inode = page->mapping->host;
2323 int err = __block_prepare_write(inode, page, from, to, get_block);
2324 if (err)
2325 ClearPageUptodate(page);
2326 return err;
2327}
2328
2329int block_commit_write(struct page *page, unsigned from, unsigned to)
2330{
2331 struct inode *inode = page->mapping->host;
2332 __block_commit_write(inode,page,from,to);
2333 return 0;
2334}
2335
2336int generic_commit_write(struct file *file, struct page *page,
2337 unsigned from, unsigned to)
2338{
2339 struct inode *inode = page->mapping->host;
2340 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2341 __block_commit_write(inode,page,from,to);
2342
2343
2344
2345
2346 if (pos > inode->i_size) {
2347 i_size_write(inode, pos);
2348 mark_inode_dirty(inode);
2349 }
2350 return 0;
2351}
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2365{
2366 if (uptodate) {
2367 set_buffer_uptodate(bh);
2368 } else {
2369
2370 clear_buffer_uptodate(bh);
2371 }
2372 unlock_buffer(bh);
2373}
2374
2375
2376
2377
2378
2379int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
2380 get_block_t *get_block)
2381{
2382 struct inode *inode = page->mapping->host;
2383 const unsigned blkbits = inode->i_blkbits;
2384 const unsigned blocksize = 1 << blkbits;
2385 struct buffer_head map_bh;
2386 struct buffer_head *read_bh[MAX_BUF_PER_PAGE];
2387 unsigned block_in_page;
2388 unsigned block_start;
2389 sector_t block_in_file;
2390 char *kaddr;
2391 int nr_reads = 0;
2392 int i;
2393 int ret = 0;
2394 int is_mapped_to_disk = 1;
2395 int dirtied_it = 0;
2396
2397 if (PageMappedToDisk(page))
2398 return 0;
2399
2400 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2401 map_bh.b_page = page;
2402
2403
2404
2405
2406
2407
2408 for (block_start = 0, block_in_page = 0;
2409 block_start < PAGE_CACHE_SIZE;
2410 block_in_page++, block_start += blocksize) {
2411 unsigned block_end = block_start + blocksize;
2412 int create;
2413
2414 map_bh.b_state = 0;
2415 create = 1;
2416 if (block_start >= to)
2417 create = 0;
2418 map_bh.b_size = blocksize;
2419 ret = get_block(inode, block_in_file + block_in_page,
2420 &map_bh, create);
2421 if (ret)
2422 goto failed;
2423 if (!buffer_mapped(&map_bh))
2424 is_mapped_to_disk = 0;
2425 if (buffer_new(&map_bh))
2426 unmap_underlying_metadata(map_bh.b_bdev,
2427 map_bh.b_blocknr);
2428 if (PageUptodate(page))
2429 continue;
2430 if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) {
2431 kaddr = kmap_atomic(page, KM_USER0);
2432 if (block_start < from) {
2433 memset(kaddr+block_start, 0, from-block_start);
2434 dirtied_it = 1;
2435 }
2436 if (block_end > to) {
2437 memset(kaddr + to, 0, block_end - to);
2438 dirtied_it = 1;
2439 }
2440 flush_dcache_page(page);
2441 kunmap_atomic(kaddr, KM_USER0);
2442 continue;
2443 }
2444 if (buffer_uptodate(&map_bh))
2445 continue;
2446 if (block_start < from || block_end > to) {
2447 struct buffer_head *bh = alloc_buffer_head(GFP_NOFS);
2448
2449 if (!bh) {
2450 ret = -ENOMEM;
2451 goto failed;
2452 }
2453 bh->b_state = map_bh.b_state;
2454 atomic_set(&bh->b_count, 0);
2455 bh->b_this_page = NULL;
2456 bh->b_page = page;
2457 bh->b_blocknr = map_bh.b_blocknr;
2458 bh->b_size = blocksize;
2459 bh->b_data = (char *)(long)block_start;
2460 bh->b_bdev = map_bh.b_bdev;
2461 bh->b_private = NULL;
2462 read_bh[nr_reads++] = bh;
2463 }
2464 }
2465
2466 if (nr_reads) {
2467 struct buffer_head *bh;
2468
2469
2470
2471
2472
2473
2474 for (i = 0; i < nr_reads; i++) {
2475 bh = read_bh[i];
2476 lock_buffer(bh);
2477 bh->b_end_io = end_buffer_read_nobh;
2478 submit_bh(READ, bh);
2479 }
2480 for (i = 0; i < nr_reads; i++) {
2481 bh = read_bh[i];
2482 wait_on_buffer(bh);
2483 if (!buffer_uptodate(bh))
2484 ret = -EIO;
2485 free_buffer_head(bh);
2486 read_bh[i] = NULL;
2487 }
2488 if (ret)
2489 goto failed;
2490 }
2491
2492 if (is_mapped_to_disk)
2493 SetPageMappedToDisk(page);
2494 SetPageUptodate(page);
2495
2496
2497
2498
2499
2500
2501
2502
2503 if (dirtied_it)
2504 set_page_dirty(page);
2505
2506 return 0;
2507
2508failed:
2509 for (i = 0; i < nr_reads; i++) {
2510 if (read_bh[i])
2511 free_buffer_head(read_bh[i]);
2512 }
2513
2514
2515
2516
2517
2518 kaddr = kmap_atomic(page, KM_USER0);
2519 memset(kaddr, 0, PAGE_CACHE_SIZE);
2520 kunmap_atomic(kaddr, KM_USER0);
2521 SetPageUptodate(page);
2522 set_page_dirty(page);
2523 return ret;
2524}
2525EXPORT_SYMBOL(nobh_prepare_write);
2526
2527int nobh_commit_write(struct file *file, struct page *page,
2528 unsigned from, unsigned to)
2529{
2530 struct inode *inode = page->mapping->host;
2531 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2532
2533 set_page_dirty(page);
2534 if (pos > inode->i_size) {
2535 i_size_write(inode, pos);
2536 mark_inode_dirty(inode);
2537 }
2538 return 0;
2539}
2540EXPORT_SYMBOL(nobh_commit_write);
2541
2542
2543
2544
2545
2546
2547int nobh_writepage(struct page *page, get_block_t *get_block,
2548 struct writeback_control *wbc)
2549{
2550 struct inode * const inode = page->mapping->host;
2551 loff_t i_size = i_size_read(inode);
2552 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2553 unsigned offset;
2554 void *kaddr;
2555 int ret;
2556
2557
2558 if (page->index < end_index)
2559 goto out;
2560
2561
2562 offset = i_size & (PAGE_CACHE_SIZE-1);
2563 if (page->index >= end_index+1 || !offset) {
2564
2565
2566
2567
2568
2569#if 0
2570
2571 if (page->mapping->a_ops->invalidatepage)
2572 page->mapping->a_ops->invalidatepage(page, offset);
2573#endif
2574 unlock_page(page);
2575 return 0;
2576 }
2577
2578
2579
2580
2581
2582
2583
2584
2585 kaddr = kmap_atomic(page, KM_USER0);
2586 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
2587 flush_dcache_page(page);
2588 kunmap_atomic(kaddr, KM_USER0);
2589out:
2590 ret = mpage_writepage(page, get_block, wbc);
2591 if (ret == -EAGAIN)
2592 ret = __block_write_full_page(inode, page, get_block, wbc);
2593 return ret;
2594}
2595EXPORT_SYMBOL(nobh_writepage);
2596
2597
2598
2599
2600int nobh_truncate_page(struct address_space *mapping, loff_t from)
2601{
2602 struct inode *inode = mapping->host;
2603 unsigned blocksize = 1 << inode->i_blkbits;
2604 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2605 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2606 unsigned to;
2607 struct page *page;
2608 struct address_space_operations *a_ops = mapping->a_ops;
2609 char *kaddr;
2610 int ret = 0;
2611
2612 if ((offset & (blocksize - 1)) == 0)
2613 goto out;
2614
2615 ret = -ENOMEM;
2616 page = grab_cache_page(mapping, index);
2617 if (!page)
2618 goto out;
2619
2620 to = (offset + blocksize) & ~(blocksize - 1);
2621 ret = a_ops->prepare_write(NULL, page, offset, to);
2622 if (ret == 0) {
2623 kaddr = kmap_atomic(page, KM_USER0);
2624 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
2625 flush_dcache_page(page);
2626 kunmap_atomic(kaddr, KM_USER0);
2627 set_page_dirty(page);
2628 }
2629 unlock_page(page);
2630 page_cache_release(page);
2631out:
2632 return ret;
2633}
2634EXPORT_SYMBOL(nobh_truncate_page);
2635
2636int block_truncate_page(struct address_space *mapping,
2637 loff_t from, get_block_t *get_block)
2638{
2639 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2640 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2641 unsigned blocksize;
2642 sector_t iblock;
2643 unsigned length, pos;
2644 struct inode *inode = mapping->host;
2645 struct page *page;
2646 struct buffer_head *bh;
2647 void *kaddr;
2648 int err;
2649
2650 blocksize = 1 << inode->i_blkbits;
2651 length = offset & (blocksize - 1);
2652
2653
2654 if (!length)
2655 return 0;
2656
2657 length = blocksize - length;
2658 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2659
2660 page = grab_cache_page(mapping, index);
2661 err = -ENOMEM;
2662 if (!page)
2663 goto out;
2664
2665 if (!page_has_buffers(page))
2666 create_empty_buffers(page, blocksize, 0);
2667
2668
2669 bh = page_buffers(page);
2670 pos = blocksize;
2671 while (offset >= pos) {
2672 bh = bh->b_this_page;
2673 iblock++;
2674 pos += blocksize;
2675 }
2676
2677 err = 0;
2678 if (!buffer_mapped(bh)) {
2679 WARN_ON(bh->b_size != blocksize);
2680 err = get_block(inode, iblock, bh, 0);
2681 if (err)
2682 goto unlock;
2683
2684 if (!buffer_mapped(bh))
2685 goto unlock;
2686 }
2687
2688
2689 if (PageUptodate(page))
2690 set_buffer_uptodate(bh);
2691
2692 if (!buffer_uptodate(bh) && !buffer_delay(bh)) {
2693 err = -EIO;
2694 ll_rw_block(READ, 1, &bh);
2695 wait_on_buffer(bh);
2696
2697 if (!buffer_uptodate(bh))
2698 goto unlock;
2699 }
2700
2701 kaddr = kmap_atomic(page, KM_USER0);
2702 memset(kaddr + offset, 0, length);
2703 flush_dcache_page(page);
2704 kunmap_atomic(kaddr, KM_USER0);
2705
2706 mark_buffer_dirty(bh);
2707 err = 0;
2708
2709unlock:
2710 unlock_page(page);
2711 page_cache_release(page);
2712out:
2713 return err;
2714}
2715
2716
2717
2718
2719int block_write_full_page(struct page *page, get_block_t *get_block,
2720 struct writeback_control *wbc)
2721{
2722 struct inode * const inode = page->mapping->host;
2723 loff_t i_size = i_size_read(inode);
2724 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2725 unsigned offset;
2726 void *kaddr;
2727
2728
2729 if (page->index < end_index)
2730 return __block_write_full_page(inode, page, get_block, wbc);
2731
2732
2733 offset = i_size & (PAGE_CACHE_SIZE-1);
2734 if (page->index >= end_index+1 || !offset) {
2735
2736
2737
2738
2739
2740 do_invalidatepage(page, 0);
2741 unlock_page(page);
2742 return 0;
2743 }
2744
2745
2746
2747
2748
2749
2750
2751
2752 kaddr = kmap_atomic(page, KM_USER0);
2753 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
2754 flush_dcache_page(page);
2755 kunmap_atomic(kaddr, KM_USER0);
2756 return __block_write_full_page(inode, page, get_block, wbc);
2757}
2758
2759sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2760 get_block_t *get_block)
2761{
2762 struct buffer_head tmp;
2763 struct inode *inode = mapping->host;
2764 tmp.b_state = 0;
2765 tmp.b_blocknr = 0;
2766 tmp.b_size = 1 << inode->i_blkbits;
2767 get_block(inode, block, &tmp, 0);
2768 return tmp.b_blocknr;
2769}
2770
2771static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err)
2772{
2773 struct buffer_head *bh = bio->bi_private;
2774
2775 if (bio->bi_size)
2776 return 1;
2777
2778 if (err == -EOPNOTSUPP) {
2779 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2780 set_bit(BH_Eopnotsupp, &bh->b_state);
2781 }
2782
2783 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2784 bio_put(bio);
2785 return 0;
2786}
2787
2788int submit_bh(int rw, struct buffer_head * bh)
2789{
2790 struct bio *bio;
2791 int ret = 0;
2792
2793 BUG_ON(!buffer_locked(bh));
2794 BUG_ON(!buffer_mapped(bh));
2795 BUG_ON(!bh->b_end_io);
2796
2797 if (buffer_ordered(bh) && (rw == WRITE))
2798 rw = WRITE_BARRIER;
2799
2800
2801
2802
2803
2804 if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER))
2805 clear_buffer_write_io_error(bh);
2806
2807
2808
2809
2810
2811 bio = bio_alloc(GFP_NOIO, 1);
2812
2813 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
2814 bio->bi_bdev = bh->b_bdev;
2815 bio->bi_io_vec[0].bv_page = bh->b_page;
2816 bio->bi_io_vec[0].bv_len = bh->b_size;
2817 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
2818
2819 bio->bi_vcnt = 1;
2820 bio->bi_idx = 0;
2821 bio->bi_size = bh->b_size;
2822
2823 bio->bi_end_io = end_bio_bh_io_sync;
2824 bio->bi_private = bh;
2825
2826 bio_get(bio);
2827 submit_bio(rw, bio);
2828
2829 if (bio_flagged(bio, BIO_EOPNOTSUPP))
2830 ret = -EOPNOTSUPP;
2831
2832 bio_put(bio);
2833 return ret;
2834}
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2863{
2864 int i;
2865
2866 for (i = 0; i < nr; i++) {
2867 struct buffer_head *bh = bhs[i];
2868
2869 if (rw == SWRITE)
2870 lock_buffer(bh);
2871 else if (test_set_buffer_locked(bh))
2872 continue;
2873
2874 if (rw == WRITE || rw == SWRITE) {
2875 if (test_clear_buffer_dirty(bh)) {
2876 bh->b_end_io = end_buffer_write_sync;
2877 get_bh(bh);
2878 submit_bh(WRITE, bh);
2879 continue;
2880 }
2881 } else {
2882 if (!buffer_uptodate(bh)) {
2883 bh->b_end_io = end_buffer_read_sync;
2884 get_bh(bh);
2885 submit_bh(rw, bh);
2886 continue;
2887 }
2888 }
2889 unlock_buffer(bh);
2890 }
2891}
2892
2893
2894
2895
2896
2897
2898int sync_dirty_buffer(struct buffer_head *bh)
2899{
2900 int ret = 0;
2901
2902 WARN_ON(atomic_read(&bh->b_count) < 1);
2903 lock_buffer(bh);
2904 if (test_clear_buffer_dirty(bh)) {
2905 get_bh(bh);
2906 bh->b_end_io = end_buffer_write_sync;
2907 ret = submit_bh(WRITE, bh);
2908 wait_on_buffer(bh);
2909 if (buffer_eopnotsupp(bh)) {
2910 clear_buffer_eopnotsupp(bh);
2911 ret = -EOPNOTSUPP;
2912 }
2913 if (!ret && !buffer_uptodate(bh))
2914 ret = -EIO;
2915 } else {
2916 unlock_buffer(bh);
2917 }
2918 return ret;
2919}
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941static inline int buffer_busy(struct buffer_head *bh)
2942{
2943 return atomic_read(&bh->b_count) |
2944 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
2945}
2946
2947static int
2948drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
2949{
2950 struct buffer_head *head = page_buffers(page);
2951 struct buffer_head *bh;
2952
2953 bh = head;
2954 do {
2955 if (buffer_write_io_error(bh) && page->mapping)
2956 set_bit(AS_EIO, &page->mapping->flags);
2957 if (buffer_busy(bh))
2958 goto failed;
2959 bh = bh->b_this_page;
2960 } while (bh != head);
2961
2962 do {
2963 struct buffer_head *next = bh->b_this_page;
2964
2965 if (!list_empty(&bh->b_assoc_buffers))
2966 __remove_assoc_queue(bh);
2967 bh = next;
2968 } while (bh != head);
2969 *buffers_to_free = head;
2970 __clear_page_buffers(page);
2971 return 1;
2972failed:
2973 return 0;
2974}
2975
2976int try_to_free_buffers(struct page *page)
2977{
2978 struct address_space * const mapping = page->mapping;
2979 struct buffer_head *buffers_to_free = NULL;
2980 int ret = 0;
2981
2982 BUG_ON(!PageLocked(page));
2983 if (PageWriteback(page))
2984 return 0;
2985
2986 if (mapping == NULL) {
2987 ret = drop_buffers(page, &buffers_to_free);
2988 goto out;
2989 }
2990
2991 spin_lock(&mapping->private_lock);
2992 ret = drop_buffers(page, &buffers_to_free);
2993 if (ret) {
2994
2995
2996
2997
2998
2999
3000
3001
3002 clear_page_dirty(page);
3003 }
3004 spin_unlock(&mapping->private_lock);
3005out:
3006 if (buffers_to_free) {
3007 struct buffer_head *bh = buffers_to_free;
3008
3009 do {
3010 struct buffer_head *next = bh->b_this_page;
3011 free_buffer_head(bh);
3012 bh = next;
3013 } while (bh != buffers_to_free);
3014 }
3015 return ret;
3016}
3017EXPORT_SYMBOL(try_to_free_buffers);
3018
3019void block_sync_page(struct page *page)
3020{
3021 struct address_space *mapping;
3022
3023 smp_mb();
3024 mapping = page_mapping(page);
3025 if (mapping)
3026 blk_run_backing_dev(mapping->backing_dev_info, page);
3027}
3028
3029
3030
3031
3032
3033
3034
3035
3036asmlinkage long sys_bdflush(int func, long data)
3037{
3038 static int msg_count;
3039
3040 if (!capable(CAP_SYS_ADMIN))
3041 return -EPERM;
3042
3043 if (msg_count < 5) {
3044 msg_count++;
3045 printk(KERN_INFO
3046 "warning: process `%s' used the obsolete bdflush"
3047 " system call\n", current->comm);
3048 printk(KERN_INFO "Fix your initscripts?\n");
3049 }
3050
3051 if (func == 1)
3052 do_exit(0);
3053 return 0;
3054}
3055
3056
3057
3058
3059static kmem_cache_t *bh_cachep;
3060
3061
3062
3063
3064
3065static int max_buffer_heads;
3066
3067int buffer_heads_over_limit;
3068
3069struct bh_accounting {
3070 int nr;
3071 int ratelimit;
3072};
3073
3074static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3075
3076static void recalc_bh_state(void)
3077{
3078 int i;
3079 int tot = 0;
3080
3081 if (__get_cpu_var(bh_accounting).ratelimit++ < 4096)
3082 return;
3083 __get_cpu_var(bh_accounting).ratelimit = 0;
3084 for_each_online_cpu(i)
3085 tot += per_cpu(bh_accounting, i).nr;
3086 buffer_heads_over_limit = (tot > max_buffer_heads);
3087}
3088
3089struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3090{
3091 struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
3092 if (ret) {
3093 get_cpu_var(bh_accounting).nr++;
3094 recalc_bh_state();
3095 put_cpu_var(bh_accounting);
3096 }
3097 return ret;
3098}
3099EXPORT_SYMBOL(alloc_buffer_head);
3100
3101void free_buffer_head(struct buffer_head *bh)
3102{
3103 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3104 kmem_cache_free(bh_cachep, bh);
3105 get_cpu_var(bh_accounting).nr--;
3106 recalc_bh_state();
3107 put_cpu_var(bh_accounting);
3108}
3109EXPORT_SYMBOL(free_buffer_head);
3110
3111static void
3112init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long flags)
3113{
3114 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
3115 SLAB_CTOR_CONSTRUCTOR) {
3116 struct buffer_head * bh = (struct buffer_head *)data;
3117
3118 memset(bh, 0, sizeof(*bh));
3119 INIT_LIST_HEAD(&bh->b_assoc_buffers);
3120 }
3121}
3122
3123#ifdef CONFIG_HOTPLUG_CPU
3124static void buffer_exit_cpu(int cpu)
3125{
3126 int i;
3127 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3128
3129 for (i = 0; i < BH_LRU_SIZE; i++) {
3130 brelse(b->bhs[i]);
3131 b->bhs[i] = NULL;
3132 }
3133 get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr;
3134 per_cpu(bh_accounting, cpu).nr = 0;
3135 put_cpu_var(bh_accounting);
3136}
3137
3138static int buffer_cpu_notify(struct notifier_block *self,
3139 unsigned long action, void *hcpu)
3140{
3141 if (action == CPU_DEAD)
3142 buffer_exit_cpu((unsigned long)hcpu);
3143 return NOTIFY_OK;
3144}
3145#endif
3146
3147void __init buffer_init(void)
3148{
3149 int nrpages;
3150
3151 bh_cachep = kmem_cache_create("buffer_head",
3152 sizeof(struct buffer_head), 0,
3153 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3154 SLAB_MEM_SPREAD),
3155 init_buffer_head,
3156 NULL);
3157
3158
3159
3160
3161 nrpages = (nr_free_buffer_pages() * 10) / 100;
3162 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3163 hotcpu_notifier(buffer_cpu_notify, 0);
3164}
3165
3166EXPORT_SYMBOL(__bforget);
3167EXPORT_SYMBOL(__brelse);
3168EXPORT_SYMBOL(__wait_on_buffer);
3169EXPORT_SYMBOL(block_commit_write);
3170EXPORT_SYMBOL(block_prepare_write);
3171EXPORT_SYMBOL(block_read_full_page);
3172EXPORT_SYMBOL(block_sync_page);
3173EXPORT_SYMBOL(block_truncate_page);
3174EXPORT_SYMBOL(block_write_full_page);
3175EXPORT_SYMBOL(cont_prepare_write);
3176EXPORT_SYMBOL(end_buffer_async_write);
3177EXPORT_SYMBOL(end_buffer_read_sync);
3178EXPORT_SYMBOL(end_buffer_write_sync);
3179EXPORT_SYMBOL(file_fsync);
3180EXPORT_SYMBOL(fsync_bdev);
3181EXPORT_SYMBOL(generic_block_bmap);
3182EXPORT_SYMBOL(generic_commit_write);
3183EXPORT_SYMBOL(generic_cont_expand);
3184EXPORT_SYMBOL(generic_cont_expand_simple);
3185EXPORT_SYMBOL(init_buffer);
3186EXPORT_SYMBOL(invalidate_bdev);
3187EXPORT_SYMBOL(ll_rw_block);
3188EXPORT_SYMBOL(mark_buffer_dirty);
3189EXPORT_SYMBOL(submit_bh);
3190EXPORT_SYMBOL(sync_dirty_buffer);
3191EXPORT_SYMBOL(unlock_buffer);
3192