1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/module.h>
33#include <linux/writeback.h>
34#include <linux/hash.h>
35#include <linux/suspend.h>
36#include <linux/buffer_head.h>
37#include <linux/task_io_accounting_ops.h>
38#include <linux/bio.h>
39#include <linux/notifier.h>
40#include <linux/cpu.h>
41#include <linux/bitops.h>
42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h>
44
45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
46
47#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
48
49inline void
50init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
51{
52 bh->b_end_io = handler;
53 bh->b_private = private;
54}
55
56static int sync_buffer(void *word)
57{
58 struct block_device *bd;
59 struct buffer_head *bh
60 = container_of(word, struct buffer_head, b_state);
61
62 smp_mb();
63 bd = bh->b_bdev;
64 if (bd)
65 blk_run_address_space(bd->bd_inode->i_mapping);
66 io_schedule();
67 return 0;
68}
69
70void __lock_buffer(struct buffer_head *bh)
71{
72 wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer,
73 TASK_UNINTERRUPTIBLE);
74}
75EXPORT_SYMBOL(__lock_buffer);
76
77void unlock_buffer(struct buffer_head *bh)
78{
79 smp_mb__before_clear_bit();
80 clear_buffer_locked(bh);
81 smp_mb__after_clear_bit();
82 wake_up_bit(&bh->b_state, BH_Lock);
83}
84
85
86
87
88
89
90void __wait_on_buffer(struct buffer_head * bh)
91{
92 wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
93}
94
95static void
96__clear_page_buffers(struct page *page)
97{
98 ClearPagePrivate(page);
99 set_page_private(page, 0);
100 page_cache_release(page);
101}
102
103static void buffer_io_error(struct buffer_head *bh)
104{
105 char b[BDEVNAME_SIZE];
106
107 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
108 bdevname(bh->b_bdev, b),
109 (unsigned long long)bh->b_blocknr);
110}
111
112
113
114
115
116
117
118
119
120static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
121{
122 if (uptodate) {
123 set_buffer_uptodate(bh);
124 } else {
125
126 clear_buffer_uptodate(bh);
127 }
128 unlock_buffer(bh);
129}
130
131
132
133
134
135void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
136{
137 __end_buffer_read_notouch(bh, uptodate);
138 put_bh(bh);
139}
140
141void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
142{
143 char b[BDEVNAME_SIZE];
144
145 if (uptodate) {
146 set_buffer_uptodate(bh);
147 } else {
148 if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
149 buffer_io_error(bh);
150 printk(KERN_WARNING "lost page write due to "
151 "I/O error on %s\n",
152 bdevname(bh->b_bdev, b));
153 }
154 set_buffer_write_io_error(bh);
155 clear_buffer_uptodate(bh);
156 }
157 unlock_buffer(bh);
158 put_bh(bh);
159}
160
161
162
163
164
165int sync_blockdev(struct block_device *bdev)
166{
167 int ret = 0;
168
169 if (bdev)
170 ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
171 return ret;
172}
173EXPORT_SYMBOL(sync_blockdev);
174
175
176
177
178
179
180int fsync_bdev(struct block_device *bdev)
181{
182 struct super_block *sb = get_super(bdev);
183 if (sb) {
184 int res = fsync_super(sb);
185 drop_super(sb);
186 return res;
187 }
188 return sync_blockdev(bdev);
189}
190
191
192
193
194
195
196
197
198
199
200struct super_block *freeze_bdev(struct block_device *bdev)
201{
202 struct super_block *sb;
203
204 down(&bdev->bd_mount_sem);
205 sb = get_super(bdev);
206 if (sb && !(sb->s_flags & MS_RDONLY)) {
207 sb->s_frozen = SB_FREEZE_WRITE;
208 smp_wmb();
209
210 __fsync_super(sb);
211
212 sb->s_frozen = SB_FREEZE_TRANS;
213 smp_wmb();
214
215 sync_blockdev(sb->s_bdev);
216
217 if (sb->s_op->write_super_lockfs)
218 sb->s_op->write_super_lockfs(sb);
219 }
220
221 sync_blockdev(bdev);
222 return sb;
223}
224EXPORT_SYMBOL(freeze_bdev);
225
226
227
228
229
230
231
232
233void thaw_bdev(struct block_device *bdev, struct super_block *sb)
234{
235 if (sb) {
236 BUG_ON(sb->s_bdev != bdev);
237
238 if (sb->s_op->unlockfs)
239 sb->s_op->unlockfs(sb);
240 sb->s_frozen = SB_UNFROZEN;
241 smp_wmb();
242 wake_up(&sb->s_wait_unfrozen);
243 drop_super(sb);
244 }
245
246 up(&bdev->bd_mount_sem);
247}
248EXPORT_SYMBOL(thaw_bdev);
249
250
251
252
253
254
255
256
257
258
259
260
261static struct buffer_head *
262__find_get_block_slow(struct block_device *bdev, sector_t block)
263{
264 struct inode *bd_inode = bdev->bd_inode;
265 struct address_space *bd_mapping = bd_inode->i_mapping;
266 struct buffer_head *ret = NULL;
267 pgoff_t index;
268 struct buffer_head *bh;
269 struct buffer_head *head;
270 struct page *page;
271 int all_mapped = 1;
272
273 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
274 page = find_get_page(bd_mapping, index);
275 if (!page)
276 goto out;
277
278 spin_lock(&bd_mapping->private_lock);
279 if (!page_has_buffers(page))
280 goto out_unlock;
281 head = page_buffers(page);
282 bh = head;
283 do {
284 if (bh->b_blocknr == block) {
285 ret = bh;
286 get_bh(bh);
287 goto out_unlock;
288 }
289 if (!buffer_mapped(bh))
290 all_mapped = 0;
291 bh = bh->b_this_page;
292 } while (bh != head);
293
294
295
296
297
298
299 if (all_mapped) {
300 printk("__find_get_block_slow() failed. "
301 "block=%llu, b_blocknr=%llu\n",
302 (unsigned long long)block,
303 (unsigned long long)bh->b_blocknr);
304 printk("b_state=0x%08lx, b_size=%zu\n",
305 bh->b_state, bh->b_size);
306 printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits);
307 }
308out_unlock:
309 spin_unlock(&bd_mapping->private_lock);
310 page_cache_release(page);
311out:
312 return ret;
313}
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347void invalidate_bdev(struct block_device *bdev)
348{
349 struct address_space *mapping = bdev->bd_inode->i_mapping;
350
351 if (mapping->nrpages == 0)
352 return;
353
354 invalidate_bh_lrus();
355 invalidate_mapping_pages(mapping, 0, -1);
356}
357
358
359
360
361static void free_more_memory(void)
362{
363 struct zone *zone;
364 int nid;
365
366 wakeup_pdflush(1024);
367 yield();
368
369 for_each_online_node(nid) {
370 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
371 gfp_zone(GFP_NOFS), NULL,
372 &zone);
373 if (zone)
374 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
375 GFP_NOFS);
376 }
377}
378
379
380
381
382
383static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
384{
385 unsigned long flags;
386 struct buffer_head *first;
387 struct buffer_head *tmp;
388 struct page *page;
389 int page_uptodate = 1;
390
391 BUG_ON(!buffer_async_read(bh));
392
393 page = bh->b_page;
394 if (uptodate) {
395 set_buffer_uptodate(bh);
396 } else {
397 clear_buffer_uptodate(bh);
398 if (printk_ratelimit())
399 buffer_io_error(bh);
400 SetPageError(page);
401 }
402
403
404
405
406
407
408 first = page_buffers(page);
409 local_irq_save(flags);
410 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
411 clear_buffer_async_read(bh);
412 unlock_buffer(bh);
413 tmp = bh;
414 do {
415 if (!buffer_uptodate(tmp))
416 page_uptodate = 0;
417 if (buffer_async_read(tmp)) {
418 BUG_ON(!buffer_locked(tmp));
419 goto still_busy;
420 }
421 tmp = tmp->b_this_page;
422 } while (tmp != bh);
423 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
424 local_irq_restore(flags);
425
426
427
428
429
430 if (page_uptodate && !PageError(page))
431 SetPageUptodate(page);
432 unlock_page(page);
433 return;
434
435still_busy:
436 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
437 local_irq_restore(flags);
438 return;
439}
440
441
442
443
444
445static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
446{
447 char b[BDEVNAME_SIZE];
448 unsigned long flags;
449 struct buffer_head *first;
450 struct buffer_head *tmp;
451 struct page *page;
452
453 BUG_ON(!buffer_async_write(bh));
454
455 page = bh->b_page;
456 if (uptodate) {
457 set_buffer_uptodate(bh);
458 } else {
459 if (printk_ratelimit()) {
460 buffer_io_error(bh);
461 printk(KERN_WARNING "lost page write due to "
462 "I/O error on %s\n",
463 bdevname(bh->b_bdev, b));
464 }
465 set_bit(AS_EIO, &page->mapping->flags);
466 set_buffer_write_io_error(bh);
467 clear_buffer_uptodate(bh);
468 SetPageError(page);
469 }
470
471 first = page_buffers(page);
472 local_irq_save(flags);
473 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
474
475 clear_buffer_async_write(bh);
476 unlock_buffer(bh);
477 tmp = bh->b_this_page;
478 while (tmp != bh) {
479 if (buffer_async_write(tmp)) {
480 BUG_ON(!buffer_locked(tmp));
481 goto still_busy;
482 }
483 tmp = tmp->b_this_page;
484 }
485 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
486 local_irq_restore(flags);
487 end_page_writeback(page);
488 return;
489
490still_busy:
491 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
492 local_irq_restore(flags);
493 return;
494}
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517static void mark_buffer_async_read(struct buffer_head *bh)
518{
519 bh->b_end_io = end_buffer_async_read;
520 set_buffer_async_read(bh);
521}
522
523void mark_buffer_async_write(struct buffer_head *bh)
524{
525 bh->b_end_io = end_buffer_async_write;
526 set_buffer_async_write(bh);
527}
528EXPORT_SYMBOL(mark_buffer_async_write);
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583static inline void __remove_assoc_queue(struct buffer_head *bh)
584{
585 list_del_init(&bh->b_assoc_buffers);
586 WARN_ON(!bh->b_assoc_map);
587 if (buffer_write_io_error(bh))
588 set_bit(AS_EIO, &bh->b_assoc_map->flags);
589 bh->b_assoc_map = NULL;
590}
591
592int inode_has_buffers(struct inode *inode)
593{
594 return !list_empty(&inode->i_data.private_list);
595}
596
597
598
599
600
601
602
603
604
605
606
607static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
608{
609 struct buffer_head *bh;
610 struct list_head *p;
611 int err = 0;
612
613 spin_lock(lock);
614repeat:
615 list_for_each_prev(p, list) {
616 bh = BH_ENTRY(p);
617 if (buffer_locked(bh)) {
618 get_bh(bh);
619 spin_unlock(lock);
620 wait_on_buffer(bh);
621 if (!buffer_uptodate(bh))
622 err = -EIO;
623 brelse(bh);
624 spin_lock(lock);
625 goto repeat;
626 }
627 }
628 spin_unlock(lock);
629 return err;
630}
631
632
633
634
635
636
637
638
639
640
641
642
643int sync_mapping_buffers(struct address_space *mapping)
644{
645 struct address_space *buffer_mapping = mapping->assoc_mapping;
646
647 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
648 return 0;
649
650 return fsync_buffers_list(&buffer_mapping->private_lock,
651 &mapping->private_list);
652}
653EXPORT_SYMBOL(sync_mapping_buffers);
654
655
656
657
658
659
660
661void write_boundary_block(struct block_device *bdev,
662 sector_t bblock, unsigned blocksize)
663{
664 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
665 if (bh) {
666 if (buffer_dirty(bh))
667 ll_rw_block(WRITE, 1, &bh);
668 put_bh(bh);
669 }
670}
671
672void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
673{
674 struct address_space *mapping = inode->i_mapping;
675 struct address_space *buffer_mapping = bh->b_page->mapping;
676
677 mark_buffer_dirty(bh);
678 if (!mapping->assoc_mapping) {
679 mapping->assoc_mapping = buffer_mapping;
680 } else {
681 BUG_ON(mapping->assoc_mapping != buffer_mapping);
682 }
683 if (!bh->b_assoc_map) {
684 spin_lock(&buffer_mapping->private_lock);
685 list_move_tail(&bh->b_assoc_buffers,
686 &mapping->private_list);
687 bh->b_assoc_map = mapping;
688 spin_unlock(&buffer_mapping->private_lock);
689 }
690}
691EXPORT_SYMBOL(mark_buffer_dirty_inode);
692
693
694
695
696
697
698
699
700static int __set_page_dirty(struct page *page,
701 struct address_space *mapping, int warn)
702{
703 if (unlikely(!mapping))
704 return !TestSetPageDirty(page);
705
706 if (TestSetPageDirty(page))
707 return 0;
708
709 write_lock_irq(&mapping->tree_lock);
710 if (page->mapping) {
711 WARN_ON_ONCE(warn && !PageUptodate(page));
712
713 if (mapping_cap_account_dirty(mapping)) {
714 __inc_zone_page_state(page, NR_FILE_DIRTY);
715 __inc_bdi_stat(mapping->backing_dev_info,
716 BDI_RECLAIMABLE);
717 task_io_account_write(PAGE_CACHE_SIZE);
718 }
719 radix_tree_tag_set(&mapping->page_tree,
720 page_index(page), PAGECACHE_TAG_DIRTY);
721 }
722 write_unlock_irq(&mapping->tree_lock);
723 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
724
725 return 1;
726}
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753int __set_page_dirty_buffers(struct page *page)
754{
755 struct address_space *mapping = page_mapping(page);
756
757 if (unlikely(!mapping))
758 return !TestSetPageDirty(page);
759
760 spin_lock(&mapping->private_lock);
761 if (page_has_buffers(page)) {
762 struct buffer_head *head = page_buffers(page);
763 struct buffer_head *bh = head;
764
765 do {
766 set_buffer_dirty(bh);
767 bh = bh->b_this_page;
768 } while (bh != head);
769 }
770 spin_unlock(&mapping->private_lock);
771
772 return __set_page_dirty(page, mapping, 1);
773}
774EXPORT_SYMBOL(__set_page_dirty_buffers);
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
796{
797 struct buffer_head *bh;
798 struct list_head tmp;
799 struct address_space *mapping;
800 int err = 0, err2;
801
802 INIT_LIST_HEAD(&tmp);
803
804 spin_lock(lock);
805 while (!list_empty(list)) {
806 bh = BH_ENTRY(list->next);
807 mapping = bh->b_assoc_map;
808 __remove_assoc_queue(bh);
809
810
811 smp_mb();
812 if (buffer_dirty(bh) || buffer_locked(bh)) {
813 list_add(&bh->b_assoc_buffers, &tmp);
814 bh->b_assoc_map = mapping;
815 if (buffer_dirty(bh)) {
816 get_bh(bh);
817 spin_unlock(lock);
818
819
820
821
822
823
824 ll_rw_block(SWRITE_SYNC, 1, &bh);
825 brelse(bh);
826 spin_lock(lock);
827 }
828 }
829 }
830
831 while (!list_empty(&tmp)) {
832 bh = BH_ENTRY(tmp.prev);
833 get_bh(bh);
834 mapping = bh->b_assoc_map;
835 __remove_assoc_queue(bh);
836
837
838 smp_mb();
839 if (buffer_dirty(bh)) {
840 list_add(&bh->b_assoc_buffers,
841 &mapping->private_list);
842 bh->b_assoc_map = mapping;
843 }
844 spin_unlock(lock);
845 wait_on_buffer(bh);
846 if (!buffer_uptodate(bh))
847 err = -EIO;
848 brelse(bh);
849 spin_lock(lock);
850 }
851
852 spin_unlock(lock);
853 err2 = osync_buffers_list(lock, list);
854 if (err)
855 return err;
856 else
857 return err2;
858}
859
860
861
862
863
864
865
866
867
868
869void invalidate_inode_buffers(struct inode *inode)
870{
871 if (inode_has_buffers(inode)) {
872 struct address_space *mapping = &inode->i_data;
873 struct list_head *list = &mapping->private_list;
874 struct address_space *buffer_mapping = mapping->assoc_mapping;
875
876 spin_lock(&buffer_mapping->private_lock);
877 while (!list_empty(list))
878 __remove_assoc_queue(BH_ENTRY(list->next));
879 spin_unlock(&buffer_mapping->private_lock);
880 }
881}
882
883
884
885
886
887
888
889int remove_inode_buffers(struct inode *inode)
890{
891 int ret = 1;
892
893 if (inode_has_buffers(inode)) {
894 struct address_space *mapping = &inode->i_data;
895 struct list_head *list = &mapping->private_list;
896 struct address_space *buffer_mapping = mapping->assoc_mapping;
897
898 spin_lock(&buffer_mapping->private_lock);
899 while (!list_empty(list)) {
900 struct buffer_head *bh = BH_ENTRY(list->next);
901 if (buffer_dirty(bh)) {
902 ret = 0;
903 break;
904 }
905 __remove_assoc_queue(bh);
906 }
907 spin_unlock(&buffer_mapping->private_lock);
908 }
909 return ret;
910}
911
912
913
914
915
916
917
918
919
920
921struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
922 int retry)
923{
924 struct buffer_head *bh, *head;
925 long offset;
926
927try_again:
928 head = NULL;
929 offset = PAGE_SIZE;
930 while ((offset -= size) >= 0) {
931 bh = alloc_buffer_head(GFP_NOFS);
932 if (!bh)
933 goto no_grow;
934
935 bh->b_bdev = NULL;
936 bh->b_this_page = head;
937 bh->b_blocknr = -1;
938 head = bh;
939
940 bh->b_state = 0;
941 atomic_set(&bh->b_count, 0);
942 bh->b_private = NULL;
943 bh->b_size = size;
944
945
946 set_bh_page(bh, page, offset);
947
948 init_buffer(bh, NULL, NULL);
949 }
950 return head;
951
952
953
954no_grow:
955 if (head) {
956 do {
957 bh = head;
958 head = head->b_this_page;
959 free_buffer_head(bh);
960 } while (head);
961 }
962
963
964
965
966
967
968
969 if (!retry)
970 return NULL;
971
972
973
974
975
976
977
978 free_more_memory();
979 goto try_again;
980}
981EXPORT_SYMBOL_GPL(alloc_page_buffers);
982
983static inline void
984link_dev_buffers(struct page *page, struct buffer_head *head)
985{
986 struct buffer_head *bh, *tail;
987
988 bh = head;
989 do {
990 tail = bh;
991 bh = bh->b_this_page;
992 } while (bh);
993 tail->b_this_page = head;
994 attach_page_buffers(page, head);
995}
996
997
998
999
1000static void
1001init_page_buffers(struct page *page, struct block_device *bdev,
1002 sector_t block, int size)
1003{
1004 struct buffer_head *head = page_buffers(page);
1005 struct buffer_head *bh = head;
1006 int uptodate = PageUptodate(page);
1007
1008 do {
1009 if (!buffer_mapped(bh)) {
1010 init_buffer(bh, NULL, NULL);
1011 bh->b_bdev = bdev;
1012 bh->b_blocknr = block;
1013 if (uptodate)
1014 set_buffer_uptodate(bh);
1015 set_buffer_mapped(bh);
1016 }
1017 block++;
1018 bh = bh->b_this_page;
1019 } while (bh != head);
1020}
1021
1022
1023
1024
1025
1026
1027static struct page *
1028grow_dev_page(struct block_device *bdev, sector_t block,
1029 pgoff_t index, int size)
1030{
1031 struct inode *inode = bdev->bd_inode;
1032 struct page *page;
1033 struct buffer_head *bh;
1034
1035 page = find_or_create_page(inode->i_mapping, index,
1036 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
1037 if (!page)
1038 return NULL;
1039
1040 BUG_ON(!PageLocked(page));
1041
1042 if (page_has_buffers(page)) {
1043 bh = page_buffers(page);
1044 if (bh->b_size == size) {
1045 init_page_buffers(page, bdev, block, size);
1046 return page;
1047 }
1048 if (!try_to_free_buffers(page))
1049 goto failed;
1050 }
1051
1052
1053
1054
1055 bh = alloc_page_buffers(page, size, 0);
1056 if (!bh)
1057 goto failed;
1058
1059
1060
1061
1062
1063
1064 spin_lock(&inode->i_mapping->private_lock);
1065 link_dev_buffers(page, bh);
1066 init_page_buffers(page, bdev, block, size);
1067 spin_unlock(&inode->i_mapping->private_lock);
1068 return page;
1069
1070failed:
1071 BUG();
1072 unlock_page(page);
1073 page_cache_release(page);
1074 return NULL;
1075}
1076
1077
1078
1079
1080
1081static int
1082grow_buffers(struct block_device *bdev, sector_t block, int size)
1083{
1084 struct page *page;
1085 pgoff_t index;
1086 int sizebits;
1087
1088 sizebits = -1;
1089 do {
1090 sizebits++;
1091 } while ((size << sizebits) < PAGE_SIZE);
1092
1093 index = block >> sizebits;
1094
1095
1096
1097
1098
1099 if (unlikely(index != block >> sizebits)) {
1100 char b[BDEVNAME_SIZE];
1101
1102 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1103 "device %s\n",
1104 __func__, (unsigned long long)block,
1105 bdevname(bdev, b));
1106 return -EIO;
1107 }
1108 block = index << sizebits;
1109
1110 page = grow_dev_page(bdev, block, index, size);
1111 if (!page)
1112 return 0;
1113 unlock_page(page);
1114 page_cache_release(page);
1115 return 1;
1116}
1117
1118static struct buffer_head *
1119__getblk_slow(struct block_device *bdev, sector_t block, int size)
1120{
1121
1122 if (unlikely(size & (bdev_hardsect_size(bdev)-1) ||
1123 (size < 512 || size > PAGE_SIZE))) {
1124 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1125 size);
1126 printk(KERN_ERR "hardsect size: %d\n",
1127 bdev_hardsect_size(bdev));
1128
1129 dump_stack();
1130 return NULL;
1131 }
1132
1133 for (;;) {
1134 struct buffer_head * bh;
1135 int ret;
1136
1137 bh = __find_get_block(bdev, block, size);
1138 if (bh)
1139 return bh;
1140
1141 ret = grow_buffers(bdev, block, size);
1142 if (ret < 0)
1143 return NULL;
1144 if (ret == 0)
1145 free_more_memory();
1146 }
1147}
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184void mark_buffer_dirty(struct buffer_head *bh)
1185{
1186 WARN_ON_ONCE(!buffer_uptodate(bh));
1187
1188
1189
1190
1191
1192
1193
1194 if (buffer_dirty(bh)) {
1195 smp_mb();
1196 if (buffer_dirty(bh))
1197 return;
1198 }
1199
1200 if (!test_set_buffer_dirty(bh))
1201 __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
1202}
1203
1204
1205
1206
1207
1208
1209
1210
1211void __brelse(struct buffer_head * buf)
1212{
1213 if (atomic_read(&buf->b_count)) {
1214 put_bh(buf);
1215 return;
1216 }
1217 printk(KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1218 WARN_ON(1);
1219}
1220
1221
1222
1223
1224
1225void __bforget(struct buffer_head *bh)
1226{
1227 clear_buffer_dirty(bh);
1228 if (bh->b_assoc_map) {
1229 struct address_space *buffer_mapping = bh->b_page->mapping;
1230
1231 spin_lock(&buffer_mapping->private_lock);
1232 list_del_init(&bh->b_assoc_buffers);
1233 bh->b_assoc_map = NULL;
1234 spin_unlock(&buffer_mapping->private_lock);
1235 }
1236 __brelse(bh);
1237}
1238
1239static struct buffer_head *__bread_slow(struct buffer_head *bh)
1240{
1241 lock_buffer(bh);
1242 if (buffer_uptodate(bh)) {
1243 unlock_buffer(bh);
1244 return bh;
1245 } else {
1246 get_bh(bh);
1247 bh->b_end_io = end_buffer_read_sync;
1248 submit_bh(READ, bh);
1249 wait_on_buffer(bh);
1250 if (buffer_uptodate(bh))
1251 return bh;
1252 }
1253 brelse(bh);
1254 return NULL;
1255}
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271#define BH_LRU_SIZE 8
1272
1273struct bh_lru {
1274 struct buffer_head *bhs[BH_LRU_SIZE];
1275};
1276
1277static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1278
1279#ifdef CONFIG_SMP
1280#define bh_lru_lock() local_irq_disable()
1281#define bh_lru_unlock() local_irq_enable()
1282#else
1283#define bh_lru_lock() preempt_disable()
1284#define bh_lru_unlock() preempt_enable()
1285#endif
1286
1287static inline void check_irqs_on(void)
1288{
1289#ifdef irqs_disabled
1290 BUG_ON(irqs_disabled());
1291#endif
1292}
1293
1294
1295
1296
1297static void bh_lru_install(struct buffer_head *bh)
1298{
1299 struct buffer_head *evictee = NULL;
1300 struct bh_lru *lru;
1301
1302 check_irqs_on();
1303 bh_lru_lock();
1304 lru = &__get_cpu_var(bh_lrus);
1305 if (lru->bhs[0] != bh) {
1306 struct buffer_head *bhs[BH_LRU_SIZE];
1307 int in;
1308 int out = 0;
1309
1310 get_bh(bh);
1311 bhs[out++] = bh;
1312 for (in = 0; in < BH_LRU_SIZE; in++) {
1313 struct buffer_head *bh2 = lru->bhs[in];
1314
1315 if (bh2 == bh) {
1316 __brelse(bh2);
1317 } else {
1318 if (out >= BH_LRU_SIZE) {
1319 BUG_ON(evictee != NULL);
1320 evictee = bh2;
1321 } else {
1322 bhs[out++] = bh2;
1323 }
1324 }
1325 }
1326 while (out < BH_LRU_SIZE)
1327 bhs[out++] = NULL;
1328 memcpy(lru->bhs, bhs, sizeof(bhs));
1329 }
1330 bh_lru_unlock();
1331
1332 if (evictee)
1333 __brelse(evictee);
1334}
1335
1336
1337
1338
1339static struct buffer_head *
1340lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1341{
1342 struct buffer_head *ret = NULL;
1343 struct bh_lru *lru;
1344 unsigned int i;
1345
1346 check_irqs_on();
1347 bh_lru_lock();
1348 lru = &__get_cpu_var(bh_lrus);
1349 for (i = 0; i < BH_LRU_SIZE; i++) {
1350 struct buffer_head *bh = lru->bhs[i];
1351
1352 if (bh && bh->b_bdev == bdev &&
1353 bh->b_blocknr == block && bh->b_size == size) {
1354 if (i) {
1355 while (i) {
1356 lru->bhs[i] = lru->bhs[i - 1];
1357 i--;
1358 }
1359 lru->bhs[0] = bh;
1360 }
1361 get_bh(bh);
1362 ret = bh;
1363 break;
1364 }
1365 }
1366 bh_lru_unlock();
1367 return ret;
1368}
1369
1370
1371
1372
1373
1374
1375struct buffer_head *
1376__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1377{
1378 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1379
1380 if (bh == NULL) {
1381 bh = __find_get_block_slow(bdev, block);
1382 if (bh)
1383 bh_lru_install(bh);
1384 }
1385 if (bh)
1386 touch_buffer(bh);
1387 return bh;
1388}
1389EXPORT_SYMBOL(__find_get_block);
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403struct buffer_head *
1404__getblk(struct block_device *bdev, sector_t block, unsigned size)
1405{
1406 struct buffer_head *bh = __find_get_block(bdev, block, size);
1407
1408 might_sleep();
1409 if (bh == NULL)
1410 bh = __getblk_slow(bdev, block, size);
1411 return bh;
1412}
1413EXPORT_SYMBOL(__getblk);
1414
1415
1416
1417
1418void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1419{
1420 struct buffer_head *bh = __getblk(bdev, block, size);
1421 if (likely(bh)) {
1422 ll_rw_block(READA, 1, &bh);
1423 brelse(bh);
1424 }
1425}
1426EXPORT_SYMBOL(__breadahead);
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437struct buffer_head *
1438__bread(struct block_device *bdev, sector_t block, unsigned size)
1439{
1440 struct buffer_head *bh = __getblk(bdev, block, size);
1441
1442 if (likely(bh) && !buffer_uptodate(bh))
1443 bh = __bread_slow(bh);
1444 return bh;
1445}
1446EXPORT_SYMBOL(__bread);
1447
1448
1449
1450
1451
1452
1453static void invalidate_bh_lru(void *arg)
1454{
1455 struct bh_lru *b = &get_cpu_var(bh_lrus);
1456 int i;
1457
1458 for (i = 0; i < BH_LRU_SIZE; i++) {
1459 brelse(b->bhs[i]);
1460 b->bhs[i] = NULL;
1461 }
1462 put_cpu_var(bh_lrus);
1463}
1464
1465void invalidate_bh_lrus(void)
1466{
1467 on_each_cpu(invalidate_bh_lru, NULL, 1, 1);
1468}
1469EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1470
1471void set_bh_page(struct buffer_head *bh,
1472 struct page *page, unsigned long offset)
1473{
1474 bh->b_page = page;
1475 BUG_ON(offset >= PAGE_SIZE);
1476 if (PageHighMem(page))
1477
1478
1479
1480 bh->b_data = (char *)(0 + offset);
1481 else
1482 bh->b_data = page_address(page) + offset;
1483}
1484EXPORT_SYMBOL(set_bh_page);
1485
1486
1487
1488
1489static void discard_buffer(struct buffer_head * bh)
1490{
1491 lock_buffer(bh);
1492 clear_buffer_dirty(bh);
1493 bh->b_bdev = NULL;
1494 clear_buffer_mapped(bh);
1495 clear_buffer_req(bh);
1496 clear_buffer_new(bh);
1497 clear_buffer_delay(bh);
1498 clear_buffer_unwritten(bh);
1499 unlock_buffer(bh);
1500}
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517void block_invalidatepage(struct page *page, unsigned long offset)
1518{
1519 struct buffer_head *head, *bh, *next;
1520 unsigned int curr_off = 0;
1521
1522 BUG_ON(!PageLocked(page));
1523 if (!page_has_buffers(page))
1524 goto out;
1525
1526 head = page_buffers(page);
1527 bh = head;
1528 do {
1529 unsigned int next_off = curr_off + bh->b_size;
1530 next = bh->b_this_page;
1531
1532
1533
1534
1535 if (offset <= curr_off)
1536 discard_buffer(bh);
1537 curr_off = next_off;
1538 bh = next;
1539 } while (bh != head);
1540
1541
1542
1543
1544
1545
1546 if (offset == 0)
1547 try_to_release_page(page, 0);
1548out:
1549 return;
1550}
1551EXPORT_SYMBOL(block_invalidatepage);
1552
1553
1554
1555
1556
1557
1558void create_empty_buffers(struct page *page,
1559 unsigned long blocksize, unsigned long b_state)
1560{
1561 struct buffer_head *bh, *head, *tail;
1562
1563 head = alloc_page_buffers(page, blocksize, 1);
1564 bh = head;
1565 do {
1566 bh->b_state |= b_state;
1567 tail = bh;
1568 bh = bh->b_this_page;
1569 } while (bh);
1570 tail->b_this_page = head;
1571
1572 spin_lock(&page->mapping->private_lock);
1573 if (PageUptodate(page) || PageDirty(page)) {
1574 bh = head;
1575 do {
1576 if (PageDirty(page))
1577 set_buffer_dirty(bh);
1578 if (PageUptodate(page))
1579 set_buffer_uptodate(bh);
1580 bh = bh->b_this_page;
1581 } while (bh != head);
1582 }
1583 attach_page_buffers(page, head);
1584 spin_unlock(&page->mapping->private_lock);
1585}
1586EXPORT_SYMBOL(create_empty_buffers);
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1605{
1606 struct buffer_head *old_bh;
1607
1608 might_sleep();
1609
1610 old_bh = __find_get_block_slow(bdev, block);
1611 if (old_bh) {
1612 clear_buffer_dirty(old_bh);
1613 wait_on_buffer(old_bh);
1614 clear_buffer_req(old_bh);
1615 __brelse(old_bh);
1616 }
1617}
1618EXPORT_SYMBOL(unmap_underlying_metadata);
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645static int __block_write_full_page(struct inode *inode, struct page *page,
1646 get_block_t *get_block, struct writeback_control *wbc)
1647{
1648 int err;
1649 sector_t block;
1650 sector_t last_block;
1651 struct buffer_head *bh, *head;
1652 const unsigned blocksize = 1 << inode->i_blkbits;
1653 int nr_underway = 0;
1654
1655 BUG_ON(!PageLocked(page));
1656
1657 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1658
1659 if (!page_has_buffers(page)) {
1660 create_empty_buffers(page, blocksize,
1661 (1 << BH_Dirty)|(1 << BH_Uptodate));
1662 }
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1675 head = page_buffers(page);
1676 bh = head;
1677
1678
1679
1680
1681
1682 do {
1683 if (block > last_block) {
1684
1685
1686
1687
1688
1689
1690
1691
1692 clear_buffer_dirty(bh);
1693 set_buffer_uptodate(bh);
1694 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1695 WARN_ON(bh->b_size != blocksize);
1696 err = get_block(inode, block, bh, 1);
1697 if (err)
1698 goto recover;
1699 if (buffer_new(bh)) {
1700
1701 clear_buffer_new(bh);
1702 unmap_underlying_metadata(bh->b_bdev,
1703 bh->b_blocknr);
1704 }
1705 }
1706 bh = bh->b_this_page;
1707 block++;
1708 } while (bh != head);
1709
1710 do {
1711 if (!buffer_mapped(bh))
1712 continue;
1713
1714
1715
1716
1717
1718
1719
1720 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1721 lock_buffer(bh);
1722 } else if (test_set_buffer_locked(bh)) {
1723 redirty_page_for_writepage(wbc, page);
1724 continue;
1725 }
1726 if (test_clear_buffer_dirty(bh)) {
1727 mark_buffer_async_write(bh);
1728 } else {
1729 unlock_buffer(bh);
1730 }
1731 } while ((bh = bh->b_this_page) != head);
1732
1733
1734
1735
1736
1737 BUG_ON(PageWriteback(page));
1738 set_page_writeback(page);
1739
1740 do {
1741 struct buffer_head *next = bh->b_this_page;
1742 if (buffer_async_write(bh)) {
1743 submit_bh(WRITE, bh);
1744 nr_underway++;
1745 }
1746 bh = next;
1747 } while (bh != head);
1748 unlock_page(page);
1749
1750 err = 0;
1751done:
1752 if (nr_underway == 0) {
1753
1754
1755
1756
1757
1758 end_page_writeback(page);
1759
1760
1761
1762
1763
1764 }
1765 return err;
1766
1767recover:
1768
1769
1770
1771
1772
1773
1774 bh = head;
1775
1776 do {
1777 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1778 lock_buffer(bh);
1779 mark_buffer_async_write(bh);
1780 } else {
1781
1782
1783
1784
1785 clear_buffer_dirty(bh);
1786 }
1787 } while ((bh = bh->b_this_page) != head);
1788 SetPageError(page);
1789 BUG_ON(PageWriteback(page));
1790 mapping_set_error(page->mapping, err);
1791 set_page_writeback(page);
1792 do {
1793 struct buffer_head *next = bh->b_this_page;
1794 if (buffer_async_write(bh)) {
1795 clear_buffer_dirty(bh);
1796 submit_bh(WRITE, bh);
1797 nr_underway++;
1798 }
1799 bh = next;
1800 } while (bh != head);
1801 unlock_page(page);
1802 goto done;
1803}
1804
1805
1806
1807
1808
1809
1810void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1811{
1812 unsigned int block_start, block_end;
1813 struct buffer_head *head, *bh;
1814
1815 BUG_ON(!PageLocked(page));
1816 if (!page_has_buffers(page))
1817 return;
1818
1819 bh = head = page_buffers(page);
1820 block_start = 0;
1821 do {
1822 block_end = block_start + bh->b_size;
1823
1824 if (buffer_new(bh)) {
1825 if (block_end > from && block_start < to) {
1826 if (!PageUptodate(page)) {
1827 unsigned start, size;
1828
1829 start = max(from, block_start);
1830 size = min(to, block_end) - start;
1831
1832 zero_user(page, start, size);
1833 set_buffer_uptodate(bh);
1834 }
1835
1836 clear_buffer_new(bh);
1837 mark_buffer_dirty(bh);
1838 }
1839 }
1840
1841 block_start = block_end;
1842 bh = bh->b_this_page;
1843 } while (bh != head);
1844}
1845EXPORT_SYMBOL(page_zero_new_buffers);
1846
1847static int __block_prepare_write(struct inode *inode, struct page *page,
1848 unsigned from, unsigned to, get_block_t *get_block)
1849{
1850 unsigned block_start, block_end;
1851 sector_t block;
1852 int err = 0;
1853 unsigned blocksize, bbits;
1854 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1855
1856 BUG_ON(!PageLocked(page));
1857 BUG_ON(from > PAGE_CACHE_SIZE);
1858 BUG_ON(to > PAGE_CACHE_SIZE);
1859 BUG_ON(from > to);
1860
1861 blocksize = 1 << inode->i_blkbits;
1862 if (!page_has_buffers(page))
1863 create_empty_buffers(page, blocksize, 0);
1864 head = page_buffers(page);
1865
1866 bbits = inode->i_blkbits;
1867 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1868
1869 for(bh = head, block_start = 0; bh != head || !block_start;
1870 block++, block_start=block_end, bh = bh->b_this_page) {
1871 block_end = block_start + blocksize;
1872 if (block_end <= from || block_start >= to) {
1873 if (PageUptodate(page)) {
1874 if (!buffer_uptodate(bh))
1875 set_buffer_uptodate(bh);
1876 }
1877 continue;
1878 }
1879 if (buffer_new(bh))
1880 clear_buffer_new(bh);
1881 if (!buffer_mapped(bh)) {
1882 WARN_ON(bh->b_size != blocksize);
1883 err = get_block(inode, block, bh, 1);
1884 if (err)
1885 break;
1886 if (buffer_new(bh)) {
1887 unmap_underlying_metadata(bh->b_bdev,
1888 bh->b_blocknr);
1889 if (PageUptodate(page)) {
1890 clear_buffer_new(bh);
1891 set_buffer_uptodate(bh);
1892 mark_buffer_dirty(bh);
1893 continue;
1894 }
1895 if (block_end > to || block_start < from)
1896 zero_user_segments(page,
1897 to, block_end,
1898 block_start, from);
1899 continue;
1900 }
1901 }
1902 if (PageUptodate(page)) {
1903 if (!buffer_uptodate(bh))
1904 set_buffer_uptodate(bh);
1905 continue;
1906 }
1907 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1908 !buffer_unwritten(bh) &&
1909 (block_start < from || block_end > to)) {
1910 ll_rw_block(READ, 1, &bh);
1911 *wait_bh++=bh;
1912 }
1913 }
1914
1915
1916
1917 while(wait_bh > wait) {
1918 wait_on_buffer(*--wait_bh);
1919 if (!buffer_uptodate(*wait_bh))
1920 err = -EIO;
1921 }
1922 if (unlikely(err))
1923 page_zero_new_buffers(page, from, to);
1924 return err;
1925}
1926
1927static int __block_commit_write(struct inode *inode, struct page *page,
1928 unsigned from, unsigned to)
1929{
1930 unsigned block_start, block_end;
1931 int partial = 0;
1932 unsigned blocksize;
1933 struct buffer_head *bh, *head;
1934
1935 blocksize = 1 << inode->i_blkbits;
1936
1937 for(bh = head = page_buffers(page), block_start = 0;
1938 bh != head || !block_start;
1939 block_start=block_end, bh = bh->b_this_page) {
1940 block_end = block_start + blocksize;
1941 if (block_end <= from || block_start >= to) {
1942 if (!buffer_uptodate(bh))
1943 partial = 1;
1944 } else {
1945 set_buffer_uptodate(bh);
1946 mark_buffer_dirty(bh);
1947 }
1948 clear_buffer_new(bh);
1949 }
1950
1951
1952
1953
1954
1955
1956
1957 if (!partial)
1958 SetPageUptodate(page);
1959 return 0;
1960}
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970int block_write_begin(struct file *file, struct address_space *mapping,
1971 loff_t pos, unsigned len, unsigned flags,
1972 struct page **pagep, void **fsdata,
1973 get_block_t *get_block)
1974{
1975 struct inode *inode = mapping->host;
1976 int status = 0;
1977 struct page *page;
1978 pgoff_t index;
1979 unsigned start, end;
1980 int ownpage = 0;
1981
1982 index = pos >> PAGE_CACHE_SHIFT;
1983 start = pos & (PAGE_CACHE_SIZE - 1);
1984 end = start + len;
1985
1986 page = *pagep;
1987 if (page == NULL) {
1988 ownpage = 1;
1989 page = __grab_cache_page(mapping, index);
1990 if (!page) {
1991 status = -ENOMEM;
1992 goto out;
1993 }
1994 *pagep = page;
1995 } else
1996 BUG_ON(!PageLocked(page));
1997
1998 status = __block_prepare_write(inode, page, start, end, get_block);
1999 if (unlikely(status)) {
2000 ClearPageUptodate(page);
2001
2002 if (ownpage) {
2003 unlock_page(page);
2004 page_cache_release(page);
2005 *pagep = NULL;
2006
2007
2008
2009
2010
2011
2012 if (pos + len > inode->i_size)
2013 vmtruncate(inode, inode->i_size);
2014 }
2015 goto out;
2016 }
2017
2018out:
2019 return status;
2020}
2021EXPORT_SYMBOL(block_write_begin);
2022
2023int block_write_end(struct file *file, struct address_space *mapping,
2024 loff_t pos, unsigned len, unsigned copied,
2025 struct page *page, void *fsdata)
2026{
2027 struct inode *inode = mapping->host;
2028 unsigned start;
2029
2030 start = pos & (PAGE_CACHE_SIZE - 1);
2031
2032 if (unlikely(copied < len)) {
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045 if (!PageUptodate(page))
2046 copied = 0;
2047
2048 page_zero_new_buffers(page, start+copied, start+len);
2049 }
2050 flush_dcache_page(page);
2051
2052
2053 __block_commit_write(inode, page, start, start+copied);
2054
2055 return copied;
2056}
2057EXPORT_SYMBOL(block_write_end);
2058
2059int generic_write_end(struct file *file, struct address_space *mapping,
2060 loff_t pos, unsigned len, unsigned copied,
2061 struct page *page, void *fsdata)
2062{
2063 struct inode *inode = mapping->host;
2064
2065 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2066
2067
2068
2069
2070
2071
2072
2073
2074 if (pos+copied > inode->i_size) {
2075 i_size_write(inode, pos+copied);
2076 mark_inode_dirty(inode);
2077 }
2078
2079 unlock_page(page);
2080 page_cache_release(page);
2081
2082 return copied;
2083}
2084EXPORT_SYMBOL(generic_write_end);
2085
2086
2087
2088
2089
2090
2091
2092
2093int block_read_full_page(struct page *page, get_block_t *get_block)
2094{
2095 struct inode *inode = page->mapping->host;
2096 sector_t iblock, lblock;
2097 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2098 unsigned int blocksize;
2099 int nr, i;
2100 int fully_mapped = 1;
2101
2102 BUG_ON(!PageLocked(page));
2103 blocksize = 1 << inode->i_blkbits;
2104 if (!page_has_buffers(page))
2105 create_empty_buffers(page, blocksize, 0);
2106 head = page_buffers(page);
2107
2108 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2109 lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
2110 bh = head;
2111 nr = 0;
2112 i = 0;
2113
2114 do {
2115 if (buffer_uptodate(bh))
2116 continue;
2117
2118 if (!buffer_mapped(bh)) {
2119 int err = 0;
2120
2121 fully_mapped = 0;
2122 if (iblock < lblock) {
2123 WARN_ON(bh->b_size != blocksize);
2124 err = get_block(inode, iblock, bh, 0);
2125 if (err)
2126 SetPageError(page);
2127 }
2128 if (!buffer_mapped(bh)) {
2129 zero_user(page, i * blocksize, blocksize);
2130 if (!err)
2131 set_buffer_uptodate(bh);
2132 continue;
2133 }
2134
2135
2136
2137
2138 if (buffer_uptodate(bh))
2139 continue;
2140 }
2141 arr[nr++] = bh;
2142 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2143
2144 if (fully_mapped)
2145 SetPageMappedToDisk(page);
2146
2147 if (!nr) {
2148
2149
2150
2151
2152 if (!PageError(page))
2153 SetPageUptodate(page);
2154 unlock_page(page);
2155 return 0;
2156 }
2157
2158
2159 for (i = 0; i < nr; i++) {
2160 bh = arr[i];
2161 lock_buffer(bh);
2162 mark_buffer_async_read(bh);
2163 }
2164
2165
2166
2167
2168
2169
2170 for (i = 0; i < nr; i++) {
2171 bh = arr[i];
2172 if (buffer_uptodate(bh))
2173 end_buffer_async_read(bh, 1);
2174 else
2175 submit_bh(READ, bh);
2176 }
2177 return 0;
2178}
2179
2180
2181
2182
2183
2184int generic_cont_expand_simple(struct inode *inode, loff_t size)
2185{
2186 struct address_space *mapping = inode->i_mapping;
2187 struct page *page;
2188 void *fsdata;
2189 unsigned long limit;
2190 int err;
2191
2192 err = -EFBIG;
2193 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
2194 if (limit != RLIM_INFINITY && size > (loff_t)limit) {
2195 send_sig(SIGXFSZ, current, 0);
2196 goto out;
2197 }
2198 if (size > inode->i_sb->s_maxbytes)
2199 goto out;
2200
2201 err = pagecache_write_begin(NULL, mapping, size, 0,
2202 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2203 &page, &fsdata);
2204 if (err)
2205 goto out;
2206
2207 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2208 BUG_ON(err > 0);
2209
2210out:
2211 return err;
2212}
2213
2214static int cont_expand_zero(struct file *file, struct address_space *mapping,
2215 loff_t pos, loff_t *bytes)
2216{
2217 struct inode *inode = mapping->host;
2218 unsigned blocksize = 1 << inode->i_blkbits;
2219 struct page *page;
2220 void *fsdata;
2221 pgoff_t index, curidx;
2222 loff_t curpos;
2223 unsigned zerofrom, offset, len;
2224 int err = 0;
2225
2226 index = pos >> PAGE_CACHE_SHIFT;
2227 offset = pos & ~PAGE_CACHE_MASK;
2228
2229 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2230 zerofrom = curpos & ~PAGE_CACHE_MASK;
2231 if (zerofrom & (blocksize-1)) {
2232 *bytes |= (blocksize-1);
2233 (*bytes)++;
2234 }
2235 len = PAGE_CACHE_SIZE - zerofrom;
2236
2237 err = pagecache_write_begin(file, mapping, curpos, len,
2238 AOP_FLAG_UNINTERRUPTIBLE,
2239 &page, &fsdata);
2240 if (err)
2241 goto out;
2242 zero_user(page, zerofrom, len);
2243 err = pagecache_write_end(file, mapping, curpos, len, len,
2244 page, fsdata);
2245 if (err < 0)
2246 goto out;
2247 BUG_ON(err != len);
2248 err = 0;
2249
2250 balance_dirty_pages_ratelimited(mapping);
2251 }
2252
2253
2254 if (index == curidx) {
2255 zerofrom = curpos & ~PAGE_CACHE_MASK;
2256
2257 if (offset <= zerofrom) {
2258 goto out;
2259 }
2260 if (zerofrom & (blocksize-1)) {
2261 *bytes |= (blocksize-1);
2262 (*bytes)++;
2263 }
2264 len = offset - zerofrom;
2265
2266 err = pagecache_write_begin(file, mapping, curpos, len,
2267 AOP_FLAG_UNINTERRUPTIBLE,
2268 &page, &fsdata);
2269 if (err)
2270 goto out;
2271 zero_user(page, zerofrom, len);
2272 err = pagecache_write_end(file, mapping, curpos, len, len,
2273 page, fsdata);
2274 if (err < 0)
2275 goto out;
2276 BUG_ON(err != len);
2277 err = 0;
2278 }
2279out:
2280 return err;
2281}
2282
2283
2284
2285
2286
2287int cont_write_begin(struct file *file, struct address_space *mapping,
2288 loff_t pos, unsigned len, unsigned flags,
2289 struct page **pagep, void **fsdata,
2290 get_block_t *get_block, loff_t *bytes)
2291{
2292 struct inode *inode = mapping->host;
2293 unsigned blocksize = 1 << inode->i_blkbits;
2294 unsigned zerofrom;
2295 int err;
2296
2297 err = cont_expand_zero(file, mapping, pos, bytes);
2298 if (err)
2299 goto out;
2300
2301 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2302 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2303 *bytes |= (blocksize-1);
2304 (*bytes)++;
2305 }
2306
2307 *pagep = NULL;
2308 err = block_write_begin(file, mapping, pos, len,
2309 flags, pagep, fsdata, get_block);
2310out:
2311 return err;
2312}
2313
2314int block_prepare_write(struct page *page, unsigned from, unsigned to,
2315 get_block_t *get_block)
2316{
2317 struct inode *inode = page->mapping->host;
2318 int err = __block_prepare_write(inode, page, from, to, get_block);
2319 if (err)
2320 ClearPageUptodate(page);
2321 return err;
2322}
2323
2324int block_commit_write(struct page *page, unsigned from, unsigned to)
2325{
2326 struct inode *inode = page->mapping->host;
2327 __block_commit_write(inode,page,from,to);
2328 return 0;
2329}
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346int
2347block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
2348 get_block_t get_block)
2349{
2350 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
2351 unsigned long end;
2352 loff_t size;
2353 int ret = -EINVAL;
2354
2355 lock_page(page);
2356 size = i_size_read(inode);
2357 if ((page->mapping != inode->i_mapping) ||
2358 (page_offset(page) > size)) {
2359
2360 goto out_unlock;
2361 }
2362
2363
2364 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2365 end = size & ~PAGE_CACHE_MASK;
2366 else
2367 end = PAGE_CACHE_SIZE;
2368
2369 ret = block_prepare_write(page, 0, end, get_block);
2370 if (!ret)
2371 ret = block_commit_write(page, 0, end);
2372
2373out_unlock:
2374 unlock_page(page);
2375 return ret;
2376}
2377
2378
2379
2380
2381
2382
2383static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2384{
2385 __end_buffer_read_notouch(bh, uptodate);
2386}
2387
2388
2389
2390
2391
2392
2393static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2394{
2395 struct buffer_head *bh;
2396
2397 BUG_ON(!PageLocked(page));
2398
2399 spin_lock(&page->mapping->private_lock);
2400 bh = head;
2401 do {
2402 if (PageDirty(page))
2403 set_buffer_dirty(bh);
2404 if (!bh->b_this_page)
2405 bh->b_this_page = head;
2406 bh = bh->b_this_page;
2407 } while (bh != head);
2408 attach_page_buffers(page, head);
2409 spin_unlock(&page->mapping->private_lock);
2410}
2411
2412
2413
2414
2415
2416int nobh_write_begin(struct file *file, struct address_space *mapping,
2417 loff_t pos, unsigned len, unsigned flags,
2418 struct page **pagep, void **fsdata,
2419 get_block_t *get_block)
2420{
2421 struct inode *inode = mapping->host;
2422 const unsigned blkbits = inode->i_blkbits;
2423 const unsigned blocksize = 1 << blkbits;
2424 struct buffer_head *head, *bh;
2425 struct page *page;
2426 pgoff_t index;
2427 unsigned from, to;
2428 unsigned block_in_page;
2429 unsigned block_start, block_end;
2430 sector_t block_in_file;
2431 int nr_reads = 0;
2432 int ret = 0;
2433 int is_mapped_to_disk = 1;
2434
2435 index = pos >> PAGE_CACHE_SHIFT;
2436 from = pos & (PAGE_CACHE_SIZE - 1);
2437 to = from + len;
2438
2439 page = __grab_cache_page(mapping, index);
2440 if (!page)
2441 return -ENOMEM;
2442 *pagep = page;
2443 *fsdata = NULL;
2444
2445 if (page_has_buffers(page)) {
2446 unlock_page(page);
2447 page_cache_release(page);
2448 *pagep = NULL;
2449 return block_write_begin(file, mapping, pos, len, flags, pagep,
2450 fsdata, get_block);
2451 }
2452
2453 if (PageMappedToDisk(page))
2454 return 0;
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465 head = alloc_page_buffers(page, blocksize, 0);
2466 if (!head) {
2467 ret = -ENOMEM;
2468 goto out_release;
2469 }
2470
2471 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2472
2473
2474
2475
2476
2477
2478 for (block_start = 0, block_in_page = 0, bh = head;
2479 block_start < PAGE_CACHE_SIZE;
2480 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2481 int create;
2482
2483 block_end = block_start + blocksize;
2484 bh->b_state = 0;
2485 create = 1;
2486 if (block_start >= to)
2487 create = 0;
2488 ret = get_block(inode, block_in_file + block_in_page,
2489 bh, create);
2490 if (ret)
2491 goto failed;
2492 if (!buffer_mapped(bh))
2493 is_mapped_to_disk = 0;
2494 if (buffer_new(bh))
2495 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2496 if (PageUptodate(page)) {
2497 set_buffer_uptodate(bh);
2498 continue;
2499 }
2500 if (buffer_new(bh) || !buffer_mapped(bh)) {
2501 zero_user_segments(page, block_start, from,
2502 to, block_end);
2503 continue;
2504 }
2505 if (buffer_uptodate(bh))
2506 continue;
2507 if (block_start < from || block_end > to) {
2508 lock_buffer(bh);
2509 bh->b_end_io = end_buffer_read_nobh;
2510 submit_bh(READ, bh);
2511 nr_reads++;
2512 }
2513 }
2514
2515 if (nr_reads) {
2516
2517
2518
2519
2520
2521 for (bh = head; bh; bh = bh->b_this_page) {
2522 wait_on_buffer(bh);
2523 if (!buffer_uptodate(bh))
2524 ret = -EIO;
2525 }
2526 if (ret)
2527 goto failed;
2528 }
2529
2530 if (is_mapped_to_disk)
2531 SetPageMappedToDisk(page);
2532
2533 *fsdata = head;
2534
2535 return 0;
2536
2537failed:
2538 BUG_ON(!ret);
2539
2540
2541
2542
2543
2544
2545
2546 attach_nobh_buffers(page, head);
2547 page_zero_new_buffers(page, from, to);
2548
2549out_release:
2550 unlock_page(page);
2551 page_cache_release(page);
2552 *pagep = NULL;
2553
2554 if (pos + len > inode->i_size)
2555 vmtruncate(inode, inode->i_size);
2556
2557 return ret;
2558}
2559EXPORT_SYMBOL(nobh_write_begin);
2560
2561int nobh_write_end(struct file *file, struct address_space *mapping,
2562 loff_t pos, unsigned len, unsigned copied,
2563 struct page *page, void *fsdata)
2564{
2565 struct inode *inode = page->mapping->host;
2566 struct buffer_head *head = fsdata;
2567 struct buffer_head *bh;
2568 BUG_ON(fsdata != NULL && page_has_buffers(page));
2569
2570 if (unlikely(copied < len) && !page_has_buffers(page))
2571 attach_nobh_buffers(page, head);
2572 if (page_has_buffers(page))
2573 return generic_write_end(file, mapping, pos, len,
2574 copied, page, fsdata);
2575
2576 SetPageUptodate(page);
2577 set_page_dirty(page);
2578 if (pos+copied > inode->i_size) {
2579 i_size_write(inode, pos+copied);
2580 mark_inode_dirty(inode);
2581 }
2582
2583 unlock_page(page);
2584 page_cache_release(page);
2585
2586 while (head) {
2587 bh = head;
2588 head = head->b_this_page;
2589 free_buffer_head(bh);
2590 }
2591
2592 return copied;
2593}
2594EXPORT_SYMBOL(nobh_write_end);
2595
2596
2597
2598
2599
2600
2601int nobh_writepage(struct page *page, get_block_t *get_block,
2602 struct writeback_control *wbc)
2603{
2604 struct inode * const inode = page->mapping->host;
2605 loff_t i_size = i_size_read(inode);
2606 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2607 unsigned offset;
2608 int ret;
2609
2610
2611 if (page->index < end_index)
2612 goto out;
2613
2614
2615 offset = i_size & (PAGE_CACHE_SIZE-1);
2616 if (page->index >= end_index+1 || !offset) {
2617
2618
2619
2620
2621
2622#if 0
2623
2624 if (page->mapping->a_ops->invalidatepage)
2625 page->mapping->a_ops->invalidatepage(page, offset);
2626#endif
2627 unlock_page(page);
2628 return 0;
2629 }
2630
2631
2632
2633
2634
2635
2636
2637
2638 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2639out:
2640 ret = mpage_writepage(page, get_block, wbc);
2641 if (ret == -EAGAIN)
2642 ret = __block_write_full_page(inode, page, get_block, wbc);
2643 return ret;
2644}
2645EXPORT_SYMBOL(nobh_writepage);
2646
2647int nobh_truncate_page(struct address_space *mapping,
2648 loff_t from, get_block_t *get_block)
2649{
2650 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2651 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2652 unsigned blocksize;
2653 sector_t iblock;
2654 unsigned length, pos;
2655 struct inode *inode = mapping->host;
2656 struct page *page;
2657 struct buffer_head map_bh;
2658 int err;
2659
2660 blocksize = 1 << inode->i_blkbits;
2661 length = offset & (blocksize - 1);
2662
2663
2664 if (!length)
2665 return 0;
2666
2667 length = blocksize - length;
2668 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2669
2670 page = grab_cache_page(mapping, index);
2671 err = -ENOMEM;
2672 if (!page)
2673 goto out;
2674
2675 if (page_has_buffers(page)) {
2676has_buffers:
2677 unlock_page(page);
2678 page_cache_release(page);
2679 return block_truncate_page(mapping, from, get_block);
2680 }
2681
2682
2683 pos = blocksize;
2684 while (offset >= pos) {
2685 iblock++;
2686 pos += blocksize;
2687 }
2688
2689 err = get_block(inode, iblock, &map_bh, 0);
2690 if (err)
2691 goto unlock;
2692
2693 if (!buffer_mapped(&map_bh))
2694 goto unlock;
2695
2696
2697 if (!PageUptodate(page)) {
2698 err = mapping->a_ops->readpage(NULL, page);
2699 if (err) {
2700 page_cache_release(page);
2701 goto out;
2702 }
2703 lock_page(page);
2704 if (!PageUptodate(page)) {
2705 err = -EIO;
2706 goto unlock;
2707 }
2708 if (page_has_buffers(page))
2709 goto has_buffers;
2710 }
2711 zero_user(page, offset, length);
2712 set_page_dirty(page);
2713 err = 0;
2714
2715unlock:
2716 unlock_page(page);
2717 page_cache_release(page);
2718out:
2719 return err;
2720}
2721EXPORT_SYMBOL(nobh_truncate_page);
2722
2723int block_truncate_page(struct address_space *mapping,
2724 loff_t from, get_block_t *get_block)
2725{
2726 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2727 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2728 unsigned blocksize;
2729 sector_t iblock;
2730 unsigned length, pos;
2731 struct inode *inode = mapping->host;
2732 struct page *page;
2733 struct buffer_head *bh;
2734 int err;
2735
2736 blocksize = 1 << inode->i_blkbits;
2737 length = offset & (blocksize - 1);
2738
2739
2740 if (!length)
2741 return 0;
2742
2743 length = blocksize - length;
2744 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2745
2746 page = grab_cache_page(mapping, index);
2747 err = -ENOMEM;
2748 if (!page)
2749 goto out;
2750
2751 if (!page_has_buffers(page))
2752 create_empty_buffers(page, blocksize, 0);
2753
2754
2755 bh = page_buffers(page);
2756 pos = blocksize;
2757 while (offset >= pos) {
2758 bh = bh->b_this_page;
2759 iblock++;
2760 pos += blocksize;
2761 }
2762
2763 err = 0;
2764 if (!buffer_mapped(bh)) {
2765 WARN_ON(bh->b_size != blocksize);
2766 err = get_block(inode, iblock, bh, 0);
2767 if (err)
2768 goto unlock;
2769
2770 if (!buffer_mapped(bh))
2771 goto unlock;
2772 }
2773
2774
2775 if (PageUptodate(page))
2776 set_buffer_uptodate(bh);
2777
2778 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2779 err = -EIO;
2780 ll_rw_block(READ, 1, &bh);
2781 wait_on_buffer(bh);
2782
2783 if (!buffer_uptodate(bh))
2784 goto unlock;
2785 }
2786
2787 zero_user(page, offset, length);
2788 mark_buffer_dirty(bh);
2789 err = 0;
2790
2791unlock:
2792 unlock_page(page);
2793 page_cache_release(page);
2794out:
2795 return err;
2796}
2797
2798
2799
2800
2801int block_write_full_page(struct page *page, get_block_t *get_block,
2802 struct writeback_control *wbc)
2803{
2804 struct inode * const inode = page->mapping->host;
2805 loff_t i_size = i_size_read(inode);
2806 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2807 unsigned offset;
2808
2809
2810 if (page->index < end_index)
2811 return __block_write_full_page(inode, page, get_block, wbc);
2812
2813
2814 offset = i_size & (PAGE_CACHE_SIZE-1);
2815 if (page->index >= end_index+1 || !offset) {
2816
2817
2818
2819
2820
2821 do_invalidatepage(page, 0);
2822 unlock_page(page);
2823 return 0;
2824 }
2825
2826
2827
2828
2829
2830
2831
2832
2833 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2834 return __block_write_full_page(inode, page, get_block, wbc);
2835}
2836
2837sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2838 get_block_t *get_block)
2839{
2840 struct buffer_head tmp;
2841 struct inode *inode = mapping->host;
2842 tmp.b_state = 0;
2843 tmp.b_blocknr = 0;
2844 tmp.b_size = 1 << inode->i_blkbits;
2845 get_block(inode, block, &tmp, 0);
2846 return tmp.b_blocknr;
2847}
2848
2849static void end_bio_bh_io_sync(struct bio *bio, int err)
2850{
2851 struct buffer_head *bh = bio->bi_private;
2852
2853 if (err == -EOPNOTSUPP) {
2854 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2855 set_bit(BH_Eopnotsupp, &bh->b_state);
2856 }
2857
2858 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2859 bio_put(bio);
2860}
2861
2862int submit_bh(int rw, struct buffer_head * bh)
2863{
2864 struct bio *bio;
2865 int ret = 0;
2866
2867 BUG_ON(!buffer_locked(bh));
2868 BUG_ON(!buffer_mapped(bh));
2869 BUG_ON(!bh->b_end_io);
2870
2871 if (buffer_ordered(bh) && (rw == WRITE))
2872 rw = WRITE_BARRIER;
2873
2874
2875
2876
2877
2878 if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER))
2879 clear_buffer_write_io_error(bh);
2880
2881
2882
2883
2884
2885 bio = bio_alloc(GFP_NOIO, 1);
2886
2887 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
2888 bio->bi_bdev = bh->b_bdev;
2889 bio->bi_io_vec[0].bv_page = bh->b_page;
2890 bio->bi_io_vec[0].bv_len = bh->b_size;
2891 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
2892
2893 bio->bi_vcnt = 1;
2894 bio->bi_idx = 0;
2895 bio->bi_size = bh->b_size;
2896
2897 bio->bi_end_io = end_bio_bh_io_sync;
2898 bio->bi_private = bh;
2899
2900 bio_get(bio);
2901 submit_bio(rw, bio);
2902
2903 if (bio_flagged(bio, BIO_EOPNOTSUPP))
2904 ret = -EOPNOTSUPP;
2905
2906 bio_put(bio);
2907 return ret;
2908}
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2937{
2938 int i;
2939
2940 for (i = 0; i < nr; i++) {
2941 struct buffer_head *bh = bhs[i];
2942
2943 if (rw == SWRITE || rw == SWRITE_SYNC)
2944 lock_buffer(bh);
2945 else if (test_set_buffer_locked(bh))
2946 continue;
2947
2948 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
2949 if (test_clear_buffer_dirty(bh)) {
2950 bh->b_end_io = end_buffer_write_sync;
2951 get_bh(bh);
2952 if (rw == SWRITE_SYNC)
2953 submit_bh(WRITE_SYNC, bh);
2954 else
2955 submit_bh(WRITE, bh);
2956 continue;
2957 }
2958 } else {
2959 if (!buffer_uptodate(bh)) {
2960 bh->b_end_io = end_buffer_read_sync;
2961 get_bh(bh);
2962 submit_bh(rw, bh);
2963 continue;
2964 }
2965 }
2966 unlock_buffer(bh);
2967 }
2968}
2969
2970
2971
2972
2973
2974
2975int sync_dirty_buffer(struct buffer_head *bh)
2976{
2977 int ret = 0;
2978
2979 WARN_ON(atomic_read(&bh->b_count) < 1);
2980 lock_buffer(bh);
2981 if (test_clear_buffer_dirty(bh)) {
2982 get_bh(bh);
2983 bh->b_end_io = end_buffer_write_sync;
2984 ret = submit_bh(WRITE_SYNC, bh);
2985 wait_on_buffer(bh);
2986 if (buffer_eopnotsupp(bh)) {
2987 clear_buffer_eopnotsupp(bh);
2988 ret = -EOPNOTSUPP;
2989 }
2990 if (!ret && !buffer_uptodate(bh))
2991 ret = -EIO;
2992 } else {
2993 unlock_buffer(bh);
2994 }
2995 return ret;
2996}
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018static inline int buffer_busy(struct buffer_head *bh)
3019{
3020 return atomic_read(&bh->b_count) |
3021 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3022}
3023
3024static int
3025drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3026{
3027 struct buffer_head *head = page_buffers(page);
3028 struct buffer_head *bh;
3029
3030 bh = head;
3031 do {
3032 if (buffer_write_io_error(bh) && page->mapping)
3033 set_bit(AS_EIO, &page->mapping->flags);
3034 if (buffer_busy(bh))
3035 goto failed;
3036 bh = bh->b_this_page;
3037 } while (bh != head);
3038
3039 do {
3040 struct buffer_head *next = bh->b_this_page;
3041
3042 if (bh->b_assoc_map)
3043 __remove_assoc_queue(bh);
3044 bh = next;
3045 } while (bh != head);
3046 *buffers_to_free = head;
3047 __clear_page_buffers(page);
3048 return 1;
3049failed:
3050 return 0;
3051}
3052
3053int try_to_free_buffers(struct page *page)
3054{
3055 struct address_space * const mapping = page->mapping;
3056 struct buffer_head *buffers_to_free = NULL;
3057 int ret = 0;
3058
3059 BUG_ON(!PageLocked(page));
3060 if (PageWriteback(page))
3061 return 0;
3062
3063 if (mapping == NULL) {
3064 ret = drop_buffers(page, &buffers_to_free);
3065 goto out;
3066 }
3067
3068 spin_lock(&mapping->private_lock);
3069 ret = drop_buffers(page, &buffers_to_free);
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085 if (ret)
3086 cancel_dirty_page(page, PAGE_CACHE_SIZE);
3087 spin_unlock(&mapping->private_lock);
3088out:
3089 if (buffers_to_free) {
3090 struct buffer_head *bh = buffers_to_free;
3091
3092 do {
3093 struct buffer_head *next = bh->b_this_page;
3094 free_buffer_head(bh);
3095 bh = next;
3096 } while (bh != buffers_to_free);
3097 }
3098 return ret;
3099}
3100EXPORT_SYMBOL(try_to_free_buffers);
3101
3102void block_sync_page(struct page *page)
3103{
3104 struct address_space *mapping;
3105
3106 smp_mb();
3107 mapping = page_mapping(page);
3108 if (mapping)
3109 blk_run_backing_dev(mapping->backing_dev_info, page);
3110}
3111
3112
3113
3114
3115
3116
3117
3118
3119asmlinkage long sys_bdflush(int func, long data)
3120{
3121 static int msg_count;
3122
3123 if (!capable(CAP_SYS_ADMIN))
3124 return -EPERM;
3125
3126 if (msg_count < 5) {
3127 msg_count++;
3128 printk(KERN_INFO
3129 "warning: process `%s' used the obsolete bdflush"
3130 " system call\n", current->comm);
3131 printk(KERN_INFO "Fix your initscripts?\n");
3132 }
3133
3134 if (func == 1)
3135 do_exit(0);
3136 return 0;
3137}
3138
3139
3140
3141
3142static struct kmem_cache *bh_cachep;
3143
3144
3145
3146
3147
3148static int max_buffer_heads;
3149
3150int buffer_heads_over_limit;
3151
3152struct bh_accounting {
3153 int nr;
3154 int ratelimit;
3155};
3156
3157static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3158
3159static void recalc_bh_state(void)
3160{
3161 int i;
3162 int tot = 0;
3163
3164 if (__get_cpu_var(bh_accounting).ratelimit++ < 4096)
3165 return;
3166 __get_cpu_var(bh_accounting).ratelimit = 0;
3167 for_each_online_cpu(i)
3168 tot += per_cpu(bh_accounting, i).nr;
3169 buffer_heads_over_limit = (tot > max_buffer_heads);
3170}
3171
3172struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3173{
3174 struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
3175 if (ret) {
3176 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3177 get_cpu_var(bh_accounting).nr++;
3178 recalc_bh_state();
3179 put_cpu_var(bh_accounting);
3180 }
3181 return ret;
3182}
3183EXPORT_SYMBOL(alloc_buffer_head);
3184
3185void free_buffer_head(struct buffer_head *bh)
3186{
3187 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3188 kmem_cache_free(bh_cachep, bh);
3189 get_cpu_var(bh_accounting).nr--;
3190 recalc_bh_state();
3191 put_cpu_var(bh_accounting);
3192}
3193EXPORT_SYMBOL(free_buffer_head);
3194
3195static void buffer_exit_cpu(int cpu)
3196{
3197 int i;
3198 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3199
3200 for (i = 0; i < BH_LRU_SIZE; i++) {
3201 brelse(b->bhs[i]);
3202 b->bhs[i] = NULL;
3203 }
3204 get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr;
3205 per_cpu(bh_accounting, cpu).nr = 0;
3206 put_cpu_var(bh_accounting);
3207}
3208
3209static int buffer_cpu_notify(struct notifier_block *self,
3210 unsigned long action, void *hcpu)
3211{
3212 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3213 buffer_exit_cpu((unsigned long)hcpu);
3214 return NOTIFY_OK;
3215}
3216
3217
3218
3219
3220
3221
3222
3223
3224int bh_uptodate_or_lock(struct buffer_head *bh)
3225{
3226 if (!buffer_uptodate(bh)) {
3227 lock_buffer(bh);
3228 if (!buffer_uptodate(bh))
3229 return 0;
3230 unlock_buffer(bh);
3231 }
3232 return 1;
3233}
3234EXPORT_SYMBOL(bh_uptodate_or_lock);
3235
3236
3237
3238
3239
3240
3241
3242int bh_submit_read(struct buffer_head *bh)
3243{
3244 BUG_ON(!buffer_locked(bh));
3245
3246 if (buffer_uptodate(bh)) {
3247 unlock_buffer(bh);
3248 return 0;
3249 }
3250
3251 get_bh(bh);
3252 bh->b_end_io = end_buffer_read_sync;
3253 submit_bh(READ, bh);
3254 wait_on_buffer(bh);
3255 if (buffer_uptodate(bh))
3256 return 0;
3257 return -EIO;
3258}
3259EXPORT_SYMBOL(bh_submit_read);
3260
3261static void
3262init_buffer_head(struct kmem_cache *cachep, void *data)
3263{
3264 struct buffer_head *bh = data;
3265
3266 memset(bh, 0, sizeof(*bh));
3267 INIT_LIST_HEAD(&bh->b_assoc_buffers);
3268}
3269
3270void __init buffer_init(void)
3271{
3272 int nrpages;
3273
3274 bh_cachep = kmem_cache_create("buffer_head",
3275 sizeof(struct buffer_head), 0,
3276 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3277 SLAB_MEM_SPREAD),
3278 init_buffer_head);
3279
3280
3281
3282
3283 nrpages = (nr_free_buffer_pages() * 10) / 100;
3284 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3285 hotcpu_notifier(buffer_cpu_notify, 0);
3286}
3287
3288EXPORT_SYMBOL(__bforget);
3289EXPORT_SYMBOL(__brelse);
3290EXPORT_SYMBOL(__wait_on_buffer);
3291EXPORT_SYMBOL(block_commit_write);
3292EXPORT_SYMBOL(block_prepare_write);
3293EXPORT_SYMBOL(block_page_mkwrite);
3294EXPORT_SYMBOL(block_read_full_page);
3295EXPORT_SYMBOL(block_sync_page);
3296EXPORT_SYMBOL(block_truncate_page);
3297EXPORT_SYMBOL(block_write_full_page);
3298EXPORT_SYMBOL(cont_write_begin);
3299EXPORT_SYMBOL(end_buffer_read_sync);
3300EXPORT_SYMBOL(end_buffer_write_sync);
3301EXPORT_SYMBOL(file_fsync);
3302EXPORT_SYMBOL(fsync_bdev);
3303EXPORT_SYMBOL(generic_block_bmap);
3304EXPORT_SYMBOL(generic_cont_expand_simple);
3305EXPORT_SYMBOL(init_buffer);
3306EXPORT_SYMBOL(invalidate_bdev);
3307EXPORT_SYMBOL(ll_rw_block);
3308EXPORT_SYMBOL(mark_buffer_dirty);
3309EXPORT_SYMBOL(submit_bh);
3310EXPORT_SYMBOL(sync_dirty_buffer);
3311EXPORT_SYMBOL(unlock_buffer);
3312