1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/export.h>
33#include <linux/writeback.h>
34#include <linux/hash.h>
35#include <linux/suspend.h>
36#include <linux/buffer_head.h>
37#include <linux/task_io_accounting_ops.h>
38#include <linux/bio.h>
39#include <linux/notifier.h>
40#include <linux/cpu.h>
41#include <linux/bitops.h>
42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h>
44
45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
46
47#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
48
49inline void
50init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
51{
52 bh->b_end_io = handler;
53 bh->b_private = private;
54}
55EXPORT_SYMBOL(init_buffer);
56
57static int sleep_on_buffer(void *word)
58{
59 io_schedule();
60 return 0;
61}
62
63void __lock_buffer(struct buffer_head *bh)
64{
65 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
66 TASK_UNINTERRUPTIBLE);
67}
68EXPORT_SYMBOL(__lock_buffer);
69
70void unlock_buffer(struct buffer_head *bh)
71{
72 clear_bit_unlock(BH_Lock, &bh->b_state);
73 smp_mb__after_clear_bit();
74 wake_up_bit(&bh->b_state, BH_Lock);
75}
76EXPORT_SYMBOL(unlock_buffer);
77
78
79
80
81
82
83void __wait_on_buffer(struct buffer_head * bh)
84{
85 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
86}
87EXPORT_SYMBOL(__wait_on_buffer);
88
89static void
90__clear_page_buffers(struct page *page)
91{
92 ClearPagePrivate(page);
93 set_page_private(page, 0);
94 page_cache_release(page);
95}
96
97
98static int quiet_error(struct buffer_head *bh)
99{
100 if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
101 return 0;
102 return 1;
103}
104
105
106static void buffer_io_error(struct buffer_head *bh)
107{
108 char b[BDEVNAME_SIZE];
109 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
110 bdevname(bh->b_bdev, b),
111 (unsigned long long)bh->b_blocknr);
112}
113
114
115
116
117
118
119
120
121
122static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
123{
124 if (uptodate) {
125 set_buffer_uptodate(bh);
126 } else {
127
128 clear_buffer_uptodate(bh);
129 }
130 unlock_buffer(bh);
131}
132
133
134
135
136
137void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
138{
139 __end_buffer_read_notouch(bh, uptodate);
140 put_bh(bh);
141}
142EXPORT_SYMBOL(end_buffer_read_sync);
143
144void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
145{
146 char b[BDEVNAME_SIZE];
147
148 if (uptodate) {
149 set_buffer_uptodate(bh);
150 } else {
151 if (!quiet_error(bh)) {
152 buffer_io_error(bh);
153 printk(KERN_WARNING "lost page write due to "
154 "I/O error on %s\n",
155 bdevname(bh->b_bdev, b));
156 }
157 set_buffer_write_io_error(bh);
158 clear_buffer_uptodate(bh);
159 }
160 unlock_buffer(bh);
161 put_bh(bh);
162}
163EXPORT_SYMBOL(end_buffer_write_sync);
164
165
166
167
168
169
170
171
172
173
174
175
176static struct buffer_head *
177__find_get_block_slow(struct block_device *bdev, sector_t block)
178{
179 struct inode *bd_inode = bdev->bd_inode;
180 struct address_space *bd_mapping = bd_inode->i_mapping;
181 struct buffer_head *ret = NULL;
182 pgoff_t index;
183 struct buffer_head *bh;
184 struct buffer_head *head;
185 struct page *page;
186 int all_mapped = 1;
187
188 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
189 page = find_get_page(bd_mapping, index);
190 if (!page)
191 goto out;
192
193 spin_lock(&bd_mapping->private_lock);
194 if (!page_has_buffers(page))
195 goto out_unlock;
196 head = page_buffers(page);
197 bh = head;
198 do {
199 if (!buffer_mapped(bh))
200 all_mapped = 0;
201 else if (bh->b_blocknr == block) {
202 ret = bh;
203 get_bh(bh);
204 goto out_unlock;
205 }
206 bh = bh->b_this_page;
207 } while (bh != head);
208
209
210
211
212
213
214 if (all_mapped) {
215 char b[BDEVNAME_SIZE];
216
217 printk("__find_get_block_slow() failed. "
218 "block=%llu, b_blocknr=%llu\n",
219 (unsigned long long)block,
220 (unsigned long long)bh->b_blocknr);
221 printk("b_state=0x%08lx, b_size=%zu\n",
222 bh->b_state, bh->b_size);
223 printk("device %s blocksize: %d\n", bdevname(bdev, b),
224 1 << bd_inode->i_blkbits);
225 }
226out_unlock:
227 spin_unlock(&bd_mapping->private_lock);
228 page_cache_release(page);
229out:
230 return ret;
231}
232
233
234
235
236static void free_more_memory(void)
237{
238 struct zone *zone;
239 int nid;
240
241 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
242 yield();
243
244 for_each_online_node(nid) {
245 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
246 gfp_zone(GFP_NOFS), NULL,
247 &zone);
248 if (zone)
249 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
250 GFP_NOFS, NULL);
251 }
252}
253
254
255
256
257
258static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
259{
260 unsigned long flags;
261 struct buffer_head *first;
262 struct buffer_head *tmp;
263 struct page *page;
264 int page_uptodate = 1;
265
266 BUG_ON(!buffer_async_read(bh));
267
268 page = bh->b_page;
269 if (uptodate) {
270 set_buffer_uptodate(bh);
271 } else {
272 clear_buffer_uptodate(bh);
273 if (!quiet_error(bh))
274 buffer_io_error(bh);
275 SetPageError(page);
276 }
277
278
279
280
281
282
283 first = page_buffers(page);
284 local_irq_save(flags);
285 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
286 clear_buffer_async_read(bh);
287 unlock_buffer(bh);
288 tmp = bh;
289 do {
290 if (!buffer_uptodate(tmp))
291 page_uptodate = 0;
292 if (buffer_async_read(tmp)) {
293 BUG_ON(!buffer_locked(tmp));
294 goto still_busy;
295 }
296 tmp = tmp->b_this_page;
297 } while (tmp != bh);
298 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
299 local_irq_restore(flags);
300
301
302
303
304
305 if (page_uptodate && !PageError(page))
306 SetPageUptodate(page);
307 unlock_page(page);
308 return;
309
310still_busy:
311 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
312 local_irq_restore(flags);
313 return;
314}
315
316
317
318
319
320void end_buffer_async_write(struct buffer_head *bh, int uptodate)
321{
322 char b[BDEVNAME_SIZE];
323 unsigned long flags;
324 struct buffer_head *first;
325 struct buffer_head *tmp;
326 struct page *page;
327
328 BUG_ON(!buffer_async_write(bh));
329
330 page = bh->b_page;
331 if (uptodate) {
332 set_buffer_uptodate(bh);
333 } else {
334 if (!quiet_error(bh)) {
335 buffer_io_error(bh);
336 printk(KERN_WARNING "lost page write due to "
337 "I/O error on %s\n",
338 bdevname(bh->b_bdev, b));
339 }
340 set_bit(AS_EIO, &page->mapping->flags);
341 set_buffer_write_io_error(bh);
342 clear_buffer_uptodate(bh);
343 SetPageError(page);
344 }
345
346 first = page_buffers(page);
347 local_irq_save(flags);
348 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
349
350 clear_buffer_async_write(bh);
351 unlock_buffer(bh);
352 tmp = bh->b_this_page;
353 while (tmp != bh) {
354 if (buffer_async_write(tmp)) {
355 BUG_ON(!buffer_locked(tmp));
356 goto still_busy;
357 }
358 tmp = tmp->b_this_page;
359 }
360 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
361 local_irq_restore(flags);
362 end_page_writeback(page);
363 return;
364
365still_busy:
366 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
367 local_irq_restore(flags);
368 return;
369}
370EXPORT_SYMBOL(end_buffer_async_write);
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393static void mark_buffer_async_read(struct buffer_head *bh)
394{
395 bh->b_end_io = end_buffer_async_read;
396 set_buffer_async_read(bh);
397}
398
399static void mark_buffer_async_write_endio(struct buffer_head *bh,
400 bh_end_io_t *handler)
401{
402 bh->b_end_io = handler;
403 set_buffer_async_write(bh);
404}
405
406void mark_buffer_async_write(struct buffer_head *bh)
407{
408 mark_buffer_async_write_endio(bh, end_buffer_async_write);
409}
410EXPORT_SYMBOL(mark_buffer_async_write);
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465static void __remove_assoc_queue(struct buffer_head *bh)
466{
467 list_del_init(&bh->b_assoc_buffers);
468 WARN_ON(!bh->b_assoc_map);
469 if (buffer_write_io_error(bh))
470 set_bit(AS_EIO, &bh->b_assoc_map->flags);
471 bh->b_assoc_map = NULL;
472}
473
474int inode_has_buffers(struct inode *inode)
475{
476 return !list_empty(&inode->i_data.private_list);
477}
478
479
480
481
482
483
484
485
486
487
488
489static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
490{
491 struct buffer_head *bh;
492 struct list_head *p;
493 int err = 0;
494
495 spin_lock(lock);
496repeat:
497 list_for_each_prev(p, list) {
498 bh = BH_ENTRY(p);
499 if (buffer_locked(bh)) {
500 get_bh(bh);
501 spin_unlock(lock);
502 wait_on_buffer(bh);
503 if (!buffer_uptodate(bh))
504 err = -EIO;
505 brelse(bh);
506 spin_lock(lock);
507 goto repeat;
508 }
509 }
510 spin_unlock(lock);
511 return err;
512}
513
514static void do_thaw_one(struct super_block *sb, void *unused)
515{
516 char b[BDEVNAME_SIZE];
517 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
518 printk(KERN_WARNING "Emergency Thaw on %s\n",
519 bdevname(sb->s_bdev, b));
520}
521
522static void do_thaw_all(struct work_struct *work)
523{
524 iterate_supers(do_thaw_one, NULL);
525 kfree(work);
526 printk(KERN_WARNING "Emergency Thaw complete\n");
527}
528
529
530
531
532
533
534void emergency_thaw_all(void)
535{
536 struct work_struct *work;
537
538 work = kmalloc(sizeof(*work), GFP_ATOMIC);
539 if (work) {
540 INIT_WORK(work, do_thaw_all);
541 schedule_work(work);
542 }
543}
544
545
546
547
548
549
550
551
552
553
554
555
556int sync_mapping_buffers(struct address_space *mapping)
557{
558 struct address_space *buffer_mapping = mapping->assoc_mapping;
559
560 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
561 return 0;
562
563 return fsync_buffers_list(&buffer_mapping->private_lock,
564 &mapping->private_list);
565}
566EXPORT_SYMBOL(sync_mapping_buffers);
567
568
569
570
571
572
573
574void write_boundary_block(struct block_device *bdev,
575 sector_t bblock, unsigned blocksize)
576{
577 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
578 if (bh) {
579 if (buffer_dirty(bh))
580 ll_rw_block(WRITE, 1, &bh);
581 put_bh(bh);
582 }
583}
584
585void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
586{
587 struct address_space *mapping = inode->i_mapping;
588 struct address_space *buffer_mapping = bh->b_page->mapping;
589
590 mark_buffer_dirty(bh);
591 if (!mapping->assoc_mapping) {
592 mapping->assoc_mapping = buffer_mapping;
593 } else {
594 BUG_ON(mapping->assoc_mapping != buffer_mapping);
595 }
596 if (!bh->b_assoc_map) {
597 spin_lock(&buffer_mapping->private_lock);
598 list_move_tail(&bh->b_assoc_buffers,
599 &mapping->private_list);
600 bh->b_assoc_map = mapping;
601 spin_unlock(&buffer_mapping->private_lock);
602 }
603}
604EXPORT_SYMBOL(mark_buffer_dirty_inode);
605
606
607
608
609
610
611
612
613static void __set_page_dirty(struct page *page,
614 struct address_space *mapping, int warn)
615{
616 spin_lock_irq(&mapping->tree_lock);
617 if (page->mapping) {
618 WARN_ON_ONCE(warn && !PageUptodate(page));
619 account_page_dirtied(page, mapping);
620 radix_tree_tag_set(&mapping->page_tree,
621 page_index(page), PAGECACHE_TAG_DIRTY);
622 }
623 spin_unlock_irq(&mapping->tree_lock);
624 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
625}
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652int __set_page_dirty_buffers(struct page *page)
653{
654 int newly_dirty;
655 struct address_space *mapping = page_mapping(page);
656
657 if (unlikely(!mapping))
658 return !TestSetPageDirty(page);
659
660 spin_lock(&mapping->private_lock);
661 if (page_has_buffers(page)) {
662 struct buffer_head *head = page_buffers(page);
663 struct buffer_head *bh = head;
664
665 do {
666 set_buffer_dirty(bh);
667 bh = bh->b_this_page;
668 } while (bh != head);
669 }
670 newly_dirty = !TestSetPageDirty(page);
671 spin_unlock(&mapping->private_lock);
672
673 if (newly_dirty)
674 __set_page_dirty(page, mapping, 1);
675 return newly_dirty;
676}
677EXPORT_SYMBOL(__set_page_dirty_buffers);
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
699{
700 struct buffer_head *bh;
701 struct list_head tmp;
702 struct address_space *mapping;
703 int err = 0, err2;
704 struct blk_plug plug;
705
706 INIT_LIST_HEAD(&tmp);
707 blk_start_plug(&plug);
708
709 spin_lock(lock);
710 while (!list_empty(list)) {
711 bh = BH_ENTRY(list->next);
712 mapping = bh->b_assoc_map;
713 __remove_assoc_queue(bh);
714
715
716 smp_mb();
717 if (buffer_dirty(bh) || buffer_locked(bh)) {
718 list_add(&bh->b_assoc_buffers, &tmp);
719 bh->b_assoc_map = mapping;
720 if (buffer_dirty(bh)) {
721 get_bh(bh);
722 spin_unlock(lock);
723
724
725
726
727
728
729
730 write_dirty_buffer(bh, WRITE_SYNC);
731
732
733
734
735
736
737
738 brelse(bh);
739 spin_lock(lock);
740 }
741 }
742 }
743
744 spin_unlock(lock);
745 blk_finish_plug(&plug);
746 spin_lock(lock);
747
748 while (!list_empty(&tmp)) {
749 bh = BH_ENTRY(tmp.prev);
750 get_bh(bh);
751 mapping = bh->b_assoc_map;
752 __remove_assoc_queue(bh);
753
754
755 smp_mb();
756 if (buffer_dirty(bh)) {
757 list_add(&bh->b_assoc_buffers,
758 &mapping->private_list);
759 bh->b_assoc_map = mapping;
760 }
761 spin_unlock(lock);
762 wait_on_buffer(bh);
763 if (!buffer_uptodate(bh))
764 err = -EIO;
765 brelse(bh);
766 spin_lock(lock);
767 }
768
769 spin_unlock(lock);
770 err2 = osync_buffers_list(lock, list);
771 if (err)
772 return err;
773 else
774 return err2;
775}
776
777
778
779
780
781
782
783
784
785
786void invalidate_inode_buffers(struct inode *inode)
787{
788 if (inode_has_buffers(inode)) {
789 struct address_space *mapping = &inode->i_data;
790 struct list_head *list = &mapping->private_list;
791 struct address_space *buffer_mapping = mapping->assoc_mapping;
792
793 spin_lock(&buffer_mapping->private_lock);
794 while (!list_empty(list))
795 __remove_assoc_queue(BH_ENTRY(list->next));
796 spin_unlock(&buffer_mapping->private_lock);
797 }
798}
799EXPORT_SYMBOL(invalidate_inode_buffers);
800
801
802
803
804
805
806
807int remove_inode_buffers(struct inode *inode)
808{
809 int ret = 1;
810
811 if (inode_has_buffers(inode)) {
812 struct address_space *mapping = &inode->i_data;
813 struct list_head *list = &mapping->private_list;
814 struct address_space *buffer_mapping = mapping->assoc_mapping;
815
816 spin_lock(&buffer_mapping->private_lock);
817 while (!list_empty(list)) {
818 struct buffer_head *bh = BH_ENTRY(list->next);
819 if (buffer_dirty(bh)) {
820 ret = 0;
821 break;
822 }
823 __remove_assoc_queue(bh);
824 }
825 spin_unlock(&buffer_mapping->private_lock);
826 }
827 return ret;
828}
829
830
831
832
833
834
835
836
837
838
839struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
840 int retry)
841{
842 struct buffer_head *bh, *head;
843 long offset;
844
845try_again:
846 head = NULL;
847 offset = PAGE_SIZE;
848 while ((offset -= size) >= 0) {
849 bh = alloc_buffer_head(GFP_NOFS);
850 if (!bh)
851 goto no_grow;
852
853 bh->b_bdev = NULL;
854 bh->b_this_page = head;
855 bh->b_blocknr = -1;
856 head = bh;
857
858 bh->b_state = 0;
859 atomic_set(&bh->b_count, 0);
860 bh->b_size = size;
861
862
863 set_bh_page(bh, page, offset);
864
865 init_buffer(bh, NULL, NULL);
866 }
867 return head;
868
869
870
871no_grow:
872 if (head) {
873 do {
874 bh = head;
875 head = head->b_this_page;
876 free_buffer_head(bh);
877 } while (head);
878 }
879
880
881
882
883
884
885
886 if (!retry)
887 return NULL;
888
889
890
891
892
893
894
895 free_more_memory();
896 goto try_again;
897}
898EXPORT_SYMBOL_GPL(alloc_page_buffers);
899
900static inline void
901link_dev_buffers(struct page *page, struct buffer_head *head)
902{
903 struct buffer_head *bh, *tail;
904
905 bh = head;
906 do {
907 tail = bh;
908 bh = bh->b_this_page;
909 } while (bh);
910 tail->b_this_page = head;
911 attach_page_buffers(page, head);
912}
913
914
915
916
917static sector_t
918init_page_buffers(struct page *page, struct block_device *bdev,
919 sector_t block, int size)
920{
921 struct buffer_head *head = page_buffers(page);
922 struct buffer_head *bh = head;
923 int uptodate = PageUptodate(page);
924 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode));
925
926 do {
927 if (!buffer_mapped(bh)) {
928 init_buffer(bh, NULL, NULL);
929 bh->b_bdev = bdev;
930 bh->b_blocknr = block;
931 if (uptodate)
932 set_buffer_uptodate(bh);
933 if (block < end_block)
934 set_buffer_mapped(bh);
935 }
936 block++;
937 bh = bh->b_this_page;
938 } while (bh != head);
939
940
941
942
943 return end_block;
944}
945
946
947
948
949
950
951static int
952grow_dev_page(struct block_device *bdev, sector_t block,
953 pgoff_t index, int size, int sizebits)
954{
955 struct inode *inode = bdev->bd_inode;
956 struct page *page;
957 struct buffer_head *bh;
958 sector_t end_block;
959 int ret = 0;
960
961 page = find_or_create_page(inode->i_mapping, index,
962 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
963 if (!page)
964 return ret;
965
966 BUG_ON(!PageLocked(page));
967
968 if (page_has_buffers(page)) {
969 bh = page_buffers(page);
970 if (bh->b_size == size) {
971 end_block = init_page_buffers(page, bdev,
972 index << sizebits, size);
973 goto done;
974 }
975 if (!try_to_free_buffers(page))
976 goto failed;
977 }
978
979
980
981
982 bh = alloc_page_buffers(page, size, 0);
983 if (!bh)
984 goto failed;
985
986
987
988
989
990
991 spin_lock(&inode->i_mapping->private_lock);
992 link_dev_buffers(page, bh);
993 end_block = init_page_buffers(page, bdev, index << sizebits, size);
994 spin_unlock(&inode->i_mapping->private_lock);
995done:
996 ret = (block < end_block) ? 1 : -ENXIO;
997failed:
998 unlock_page(page);
999 page_cache_release(page);
1000 return ret;
1001}
1002
1003
1004
1005
1006
1007static int
1008grow_buffers(struct block_device *bdev, sector_t block, int size)
1009{
1010 pgoff_t index;
1011 int sizebits;
1012
1013 sizebits = -1;
1014 do {
1015 sizebits++;
1016 } while ((size << sizebits) < PAGE_SIZE);
1017
1018 index = block >> sizebits;
1019
1020
1021
1022
1023
1024 if (unlikely(index != block >> sizebits)) {
1025 char b[BDEVNAME_SIZE];
1026
1027 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1028 "device %s\n",
1029 __func__, (unsigned long long)block,
1030 bdevname(bdev, b));
1031 return -EIO;
1032 }
1033
1034
1035 return grow_dev_page(bdev, block, index, size, sizebits);
1036}
1037
1038static struct buffer_head *
1039__getblk_slow(struct block_device *bdev, sector_t block, int size)
1040{
1041
1042 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1043 (size < 512 || size > PAGE_SIZE))) {
1044 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1045 size);
1046 printk(KERN_ERR "logical block size: %d\n",
1047 bdev_logical_block_size(bdev));
1048
1049 dump_stack();
1050 return NULL;
1051 }
1052
1053 for (;;) {
1054 struct buffer_head *bh;
1055 int ret;
1056
1057 bh = __find_get_block(bdev, block, size);
1058 if (bh)
1059 return bh;
1060
1061 ret = grow_buffers(bdev, block, size);
1062 if (ret < 0)
1063 return NULL;
1064 if (ret == 0)
1065 free_more_memory();
1066 }
1067}
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104void mark_buffer_dirty(struct buffer_head *bh)
1105{
1106 WARN_ON_ONCE(!buffer_uptodate(bh));
1107
1108
1109
1110
1111
1112
1113
1114 if (buffer_dirty(bh)) {
1115 smp_mb();
1116 if (buffer_dirty(bh))
1117 return;
1118 }
1119
1120 if (!test_set_buffer_dirty(bh)) {
1121 struct page *page = bh->b_page;
1122 if (!TestSetPageDirty(page)) {
1123 struct address_space *mapping = page_mapping(page);
1124 if (mapping)
1125 __set_page_dirty(page, mapping, 0);
1126 }
1127 }
1128}
1129EXPORT_SYMBOL(mark_buffer_dirty);
1130
1131
1132
1133
1134
1135
1136
1137
1138void __brelse(struct buffer_head * buf)
1139{
1140 if (atomic_read(&buf->b_count)) {
1141 put_bh(buf);
1142 return;
1143 }
1144 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1145}
1146EXPORT_SYMBOL(__brelse);
1147
1148
1149
1150
1151
1152void __bforget(struct buffer_head *bh)
1153{
1154 clear_buffer_dirty(bh);
1155 if (bh->b_assoc_map) {
1156 struct address_space *buffer_mapping = bh->b_page->mapping;
1157
1158 spin_lock(&buffer_mapping->private_lock);
1159 list_del_init(&bh->b_assoc_buffers);
1160 bh->b_assoc_map = NULL;
1161 spin_unlock(&buffer_mapping->private_lock);
1162 }
1163 __brelse(bh);
1164}
1165EXPORT_SYMBOL(__bforget);
1166
1167static struct buffer_head *__bread_slow(struct buffer_head *bh)
1168{
1169 lock_buffer(bh);
1170 if (buffer_uptodate(bh)) {
1171 unlock_buffer(bh);
1172 return bh;
1173 } else {
1174 get_bh(bh);
1175 bh->b_end_io = end_buffer_read_sync;
1176 submit_bh(READ, bh);
1177 wait_on_buffer(bh);
1178 if (buffer_uptodate(bh))
1179 return bh;
1180 }
1181 brelse(bh);
1182 return NULL;
1183}
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199#define BH_LRU_SIZE 8
1200
1201struct bh_lru {
1202 struct buffer_head *bhs[BH_LRU_SIZE];
1203};
1204
1205static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1206
1207#ifdef CONFIG_SMP
1208#define bh_lru_lock() local_irq_disable()
1209#define bh_lru_unlock() local_irq_enable()
1210#else
1211#define bh_lru_lock() preempt_disable()
1212#define bh_lru_unlock() preempt_enable()
1213#endif
1214
1215static inline void check_irqs_on(void)
1216{
1217#ifdef irqs_disabled
1218 BUG_ON(irqs_disabled());
1219#endif
1220}
1221
1222
1223
1224
1225static void bh_lru_install(struct buffer_head *bh)
1226{
1227 struct buffer_head *evictee = NULL;
1228
1229 check_irqs_on();
1230 bh_lru_lock();
1231 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1232 struct buffer_head *bhs[BH_LRU_SIZE];
1233 int in;
1234 int out = 0;
1235
1236 get_bh(bh);
1237 bhs[out++] = bh;
1238 for (in = 0; in < BH_LRU_SIZE; in++) {
1239 struct buffer_head *bh2 =
1240 __this_cpu_read(bh_lrus.bhs[in]);
1241
1242 if (bh2 == bh) {
1243 __brelse(bh2);
1244 } else {
1245 if (out >= BH_LRU_SIZE) {
1246 BUG_ON(evictee != NULL);
1247 evictee = bh2;
1248 } else {
1249 bhs[out++] = bh2;
1250 }
1251 }
1252 }
1253 while (out < BH_LRU_SIZE)
1254 bhs[out++] = NULL;
1255 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1256 }
1257 bh_lru_unlock();
1258
1259 if (evictee)
1260 __brelse(evictee);
1261}
1262
1263
1264
1265
1266static struct buffer_head *
1267lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1268{
1269 struct buffer_head *ret = NULL;
1270 unsigned int i;
1271
1272 check_irqs_on();
1273 bh_lru_lock();
1274 for (i = 0; i < BH_LRU_SIZE; i++) {
1275 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1276
1277 if (bh && bh->b_bdev == bdev &&
1278 bh->b_blocknr == block && bh->b_size == size) {
1279 if (i) {
1280 while (i) {
1281 __this_cpu_write(bh_lrus.bhs[i],
1282 __this_cpu_read(bh_lrus.bhs[i - 1]));
1283 i--;
1284 }
1285 __this_cpu_write(bh_lrus.bhs[0], bh);
1286 }
1287 get_bh(bh);
1288 ret = bh;
1289 break;
1290 }
1291 }
1292 bh_lru_unlock();
1293 return ret;
1294}
1295
1296
1297
1298
1299
1300
1301struct buffer_head *
1302__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1303{
1304 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1305
1306 if (bh == NULL) {
1307 bh = __find_get_block_slow(bdev, block);
1308 if (bh)
1309 bh_lru_install(bh);
1310 }
1311 if (bh)
1312 touch_buffer(bh);
1313 return bh;
1314}
1315EXPORT_SYMBOL(__find_get_block);
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325struct buffer_head *
1326__getblk(struct block_device *bdev, sector_t block, unsigned size)
1327{
1328 struct buffer_head *bh = __find_get_block(bdev, block, size);
1329
1330 might_sleep();
1331 if (bh == NULL)
1332 bh = __getblk_slow(bdev, block, size);
1333 return bh;
1334}
1335EXPORT_SYMBOL(__getblk);
1336
1337
1338
1339
1340void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1341{
1342 struct buffer_head *bh = __getblk(bdev, block, size);
1343 if (likely(bh)) {
1344 ll_rw_block(READA, 1, &bh);
1345 brelse(bh);
1346 }
1347}
1348EXPORT_SYMBOL(__breadahead);
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359struct buffer_head *
1360__bread(struct block_device *bdev, sector_t block, unsigned size)
1361{
1362 struct buffer_head *bh = __getblk(bdev, block, size);
1363
1364 if (likely(bh) && !buffer_uptodate(bh))
1365 bh = __bread_slow(bh);
1366 return bh;
1367}
1368EXPORT_SYMBOL(__bread);
1369
1370
1371
1372
1373
1374
1375static void invalidate_bh_lru(void *arg)
1376{
1377 struct bh_lru *b = &get_cpu_var(bh_lrus);
1378 int i;
1379
1380 for (i = 0; i < BH_LRU_SIZE; i++) {
1381 brelse(b->bhs[i]);
1382 b->bhs[i] = NULL;
1383 }
1384 put_cpu_var(bh_lrus);
1385}
1386
1387static bool has_bh_in_lru(int cpu, void *dummy)
1388{
1389 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1390 int i;
1391
1392 for (i = 0; i < BH_LRU_SIZE; i++) {
1393 if (b->bhs[i])
1394 return 1;
1395 }
1396
1397 return 0;
1398}
1399
1400void invalidate_bh_lrus(void)
1401{
1402 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1403}
1404EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1405
1406void set_bh_page(struct buffer_head *bh,
1407 struct page *page, unsigned long offset)
1408{
1409 bh->b_page = page;
1410 BUG_ON(offset >= PAGE_SIZE);
1411 if (PageHighMem(page))
1412
1413
1414
1415 bh->b_data = (char *)(0 + offset);
1416 else
1417 bh->b_data = page_address(page) + offset;
1418}
1419EXPORT_SYMBOL(set_bh_page);
1420
1421
1422
1423
1424static void discard_buffer(struct buffer_head * bh)
1425{
1426 lock_buffer(bh);
1427 clear_buffer_dirty(bh);
1428 bh->b_bdev = NULL;
1429 clear_buffer_mapped(bh);
1430 clear_buffer_req(bh);
1431 clear_buffer_new(bh);
1432 clear_buffer_delay(bh);
1433 clear_buffer_unwritten(bh);
1434 unlock_buffer(bh);
1435}
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452void block_invalidatepage(struct page *page, unsigned long offset)
1453{
1454 struct buffer_head *head, *bh, *next;
1455 unsigned int curr_off = 0;
1456
1457 BUG_ON(!PageLocked(page));
1458 if (!page_has_buffers(page))
1459 goto out;
1460
1461 head = page_buffers(page);
1462 bh = head;
1463 do {
1464 unsigned int next_off = curr_off + bh->b_size;
1465 next = bh->b_this_page;
1466
1467
1468
1469
1470 if (offset <= curr_off)
1471 discard_buffer(bh);
1472 curr_off = next_off;
1473 bh = next;
1474 } while (bh != head);
1475
1476
1477
1478
1479
1480
1481 if (offset == 0)
1482 try_to_release_page(page, 0);
1483out:
1484 return;
1485}
1486EXPORT_SYMBOL(block_invalidatepage);
1487
1488
1489
1490
1491
1492
1493void create_empty_buffers(struct page *page,
1494 unsigned long blocksize, unsigned long b_state)
1495{
1496 struct buffer_head *bh, *head, *tail;
1497
1498 head = alloc_page_buffers(page, blocksize, 1);
1499 bh = head;
1500 do {
1501 bh->b_state |= b_state;
1502 tail = bh;
1503 bh = bh->b_this_page;
1504 } while (bh);
1505 tail->b_this_page = head;
1506
1507 spin_lock(&page->mapping->private_lock);
1508 if (PageUptodate(page) || PageDirty(page)) {
1509 bh = head;
1510 do {
1511 if (PageDirty(page))
1512 set_buffer_dirty(bh);
1513 if (PageUptodate(page))
1514 set_buffer_uptodate(bh);
1515 bh = bh->b_this_page;
1516 } while (bh != head);
1517 }
1518 attach_page_buffers(page, head);
1519 spin_unlock(&page->mapping->private_lock);
1520}
1521EXPORT_SYMBOL(create_empty_buffers);
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1540{
1541 struct buffer_head *old_bh;
1542
1543 might_sleep();
1544
1545 old_bh = __find_get_block_slow(bdev, block);
1546 if (old_bh) {
1547 clear_buffer_dirty(old_bh);
1548 wait_on_buffer(old_bh);
1549 clear_buffer_req(old_bh);
1550 __brelse(old_bh);
1551 }
1552}
1553EXPORT_SYMBOL(unmap_underlying_metadata);
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584static int __block_write_full_page(struct inode *inode, struct page *page,
1585 get_block_t *get_block, struct writeback_control *wbc,
1586 bh_end_io_t *handler)
1587{
1588 int err;
1589 sector_t block;
1590 sector_t last_block;
1591 struct buffer_head *bh, *head;
1592 const unsigned blocksize = 1 << inode->i_blkbits;
1593 int nr_underway = 0;
1594 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1595 WRITE_SYNC : WRITE);
1596
1597 BUG_ON(!PageLocked(page));
1598
1599 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1600
1601 if (!page_has_buffers(page)) {
1602 create_empty_buffers(page, blocksize,
1603 (1 << BH_Dirty)|(1 << BH_Uptodate));
1604 }
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1617 head = page_buffers(page);
1618 bh = head;
1619
1620
1621
1622
1623
1624 do {
1625 if (block > last_block) {
1626
1627
1628
1629
1630
1631
1632
1633
1634 clear_buffer_dirty(bh);
1635 set_buffer_uptodate(bh);
1636 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1637 buffer_dirty(bh)) {
1638 WARN_ON(bh->b_size != blocksize);
1639 err = get_block(inode, block, bh, 1);
1640 if (err)
1641 goto recover;
1642 clear_buffer_delay(bh);
1643 if (buffer_new(bh)) {
1644
1645 clear_buffer_new(bh);
1646 unmap_underlying_metadata(bh->b_bdev,
1647 bh->b_blocknr);
1648 }
1649 }
1650 bh = bh->b_this_page;
1651 block++;
1652 } while (bh != head);
1653
1654 do {
1655 if (!buffer_mapped(bh))
1656 continue;
1657
1658
1659
1660
1661
1662
1663
1664 if (wbc->sync_mode != WB_SYNC_NONE) {
1665 lock_buffer(bh);
1666 } else if (!trylock_buffer(bh)) {
1667 redirty_page_for_writepage(wbc, page);
1668 continue;
1669 }
1670 if (test_clear_buffer_dirty(bh)) {
1671 mark_buffer_async_write_endio(bh, handler);
1672 } else {
1673 unlock_buffer(bh);
1674 }
1675 } while ((bh = bh->b_this_page) != head);
1676
1677
1678
1679
1680
1681 BUG_ON(PageWriteback(page));
1682 set_page_writeback(page);
1683
1684 do {
1685 struct buffer_head *next = bh->b_this_page;
1686 if (buffer_async_write(bh)) {
1687 submit_bh(write_op, bh);
1688 nr_underway++;
1689 }
1690 bh = next;
1691 } while (bh != head);
1692 unlock_page(page);
1693
1694 err = 0;
1695done:
1696 if (nr_underway == 0) {
1697
1698
1699
1700
1701
1702 end_page_writeback(page);
1703
1704
1705
1706
1707
1708 }
1709 return err;
1710
1711recover:
1712
1713
1714
1715
1716
1717
1718 bh = head;
1719
1720 do {
1721 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1722 !buffer_delay(bh)) {
1723 lock_buffer(bh);
1724 mark_buffer_async_write_endio(bh, handler);
1725 } else {
1726
1727
1728
1729
1730 clear_buffer_dirty(bh);
1731 }
1732 } while ((bh = bh->b_this_page) != head);
1733 SetPageError(page);
1734 BUG_ON(PageWriteback(page));
1735 mapping_set_error(page->mapping, err);
1736 set_page_writeback(page);
1737 do {
1738 struct buffer_head *next = bh->b_this_page;
1739 if (buffer_async_write(bh)) {
1740 clear_buffer_dirty(bh);
1741 submit_bh(write_op, bh);
1742 nr_underway++;
1743 }
1744 bh = next;
1745 } while (bh != head);
1746 unlock_page(page);
1747 goto done;
1748}
1749
1750
1751
1752
1753
1754
1755void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1756{
1757 unsigned int block_start, block_end;
1758 struct buffer_head *head, *bh;
1759
1760 BUG_ON(!PageLocked(page));
1761 if (!page_has_buffers(page))
1762 return;
1763
1764 bh = head = page_buffers(page);
1765 block_start = 0;
1766 do {
1767 block_end = block_start + bh->b_size;
1768
1769 if (buffer_new(bh)) {
1770 if (block_end > from && block_start < to) {
1771 if (!PageUptodate(page)) {
1772 unsigned start, size;
1773
1774 start = max(from, block_start);
1775 size = min(to, block_end) - start;
1776
1777 zero_user(page, start, size);
1778 set_buffer_uptodate(bh);
1779 }
1780
1781 clear_buffer_new(bh);
1782 mark_buffer_dirty(bh);
1783 }
1784 }
1785
1786 block_start = block_end;
1787 bh = bh->b_this_page;
1788 } while (bh != head);
1789}
1790EXPORT_SYMBOL(page_zero_new_buffers);
1791
1792int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1793 get_block_t *get_block)
1794{
1795 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1796 unsigned to = from + len;
1797 struct inode *inode = page->mapping->host;
1798 unsigned block_start, block_end;
1799 sector_t block;
1800 int err = 0;
1801 unsigned blocksize, bbits;
1802 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1803
1804 BUG_ON(!PageLocked(page));
1805 BUG_ON(from > PAGE_CACHE_SIZE);
1806 BUG_ON(to > PAGE_CACHE_SIZE);
1807 BUG_ON(from > to);
1808
1809 blocksize = 1 << inode->i_blkbits;
1810 if (!page_has_buffers(page))
1811 create_empty_buffers(page, blocksize, 0);
1812 head = page_buffers(page);
1813
1814 bbits = inode->i_blkbits;
1815 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1816
1817 for(bh = head, block_start = 0; bh != head || !block_start;
1818 block++, block_start=block_end, bh = bh->b_this_page) {
1819 block_end = block_start + blocksize;
1820 if (block_end <= from || block_start >= to) {
1821 if (PageUptodate(page)) {
1822 if (!buffer_uptodate(bh))
1823 set_buffer_uptodate(bh);
1824 }
1825 continue;
1826 }
1827 if (buffer_new(bh))
1828 clear_buffer_new(bh);
1829 if (!buffer_mapped(bh)) {
1830 WARN_ON(bh->b_size != blocksize);
1831 err = get_block(inode, block, bh, 1);
1832 if (err)
1833 break;
1834 if (buffer_new(bh)) {
1835 unmap_underlying_metadata(bh->b_bdev,
1836 bh->b_blocknr);
1837 if (PageUptodate(page)) {
1838 clear_buffer_new(bh);
1839 set_buffer_uptodate(bh);
1840 mark_buffer_dirty(bh);
1841 continue;
1842 }
1843 if (block_end > to || block_start < from)
1844 zero_user_segments(page,
1845 to, block_end,
1846 block_start, from);
1847 continue;
1848 }
1849 }
1850 if (PageUptodate(page)) {
1851 if (!buffer_uptodate(bh))
1852 set_buffer_uptodate(bh);
1853 continue;
1854 }
1855 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1856 !buffer_unwritten(bh) &&
1857 (block_start < from || block_end > to)) {
1858 ll_rw_block(READ, 1, &bh);
1859 *wait_bh++=bh;
1860 }
1861 }
1862
1863
1864
1865 while(wait_bh > wait) {
1866 wait_on_buffer(*--wait_bh);
1867 if (!buffer_uptodate(*wait_bh))
1868 err = -EIO;
1869 }
1870 if (unlikely(err))
1871 page_zero_new_buffers(page, from, to);
1872 return err;
1873}
1874EXPORT_SYMBOL(__block_write_begin);
1875
1876static int __block_commit_write(struct inode *inode, struct page *page,
1877 unsigned from, unsigned to)
1878{
1879 unsigned block_start, block_end;
1880 int partial = 0;
1881 unsigned blocksize;
1882 struct buffer_head *bh, *head;
1883
1884 blocksize = 1 << inode->i_blkbits;
1885
1886 for(bh = head = page_buffers(page), block_start = 0;
1887 bh != head || !block_start;
1888 block_start=block_end, bh = bh->b_this_page) {
1889 block_end = block_start + blocksize;
1890 if (block_end <= from || block_start >= to) {
1891 if (!buffer_uptodate(bh))
1892 partial = 1;
1893 } else {
1894 set_buffer_uptodate(bh);
1895 mark_buffer_dirty(bh);
1896 }
1897 clear_buffer_new(bh);
1898 }
1899
1900
1901
1902
1903
1904
1905
1906 if (!partial)
1907 SetPageUptodate(page);
1908 return 0;
1909}
1910
1911
1912
1913
1914
1915
1916
1917int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
1918 unsigned flags, struct page **pagep, get_block_t *get_block)
1919{
1920 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1921 struct page *page;
1922 int status;
1923
1924 page = grab_cache_page_write_begin(mapping, index, flags);
1925 if (!page)
1926 return -ENOMEM;
1927
1928 status = __block_write_begin(page, pos, len, get_block);
1929 if (unlikely(status)) {
1930 unlock_page(page);
1931 page_cache_release(page);
1932 page = NULL;
1933 }
1934
1935 *pagep = page;
1936 return status;
1937}
1938EXPORT_SYMBOL(block_write_begin);
1939
1940int block_write_end(struct file *file, struct address_space *mapping,
1941 loff_t pos, unsigned len, unsigned copied,
1942 struct page *page, void *fsdata)
1943{
1944 struct inode *inode = mapping->host;
1945 unsigned start;
1946
1947 start = pos & (PAGE_CACHE_SIZE - 1);
1948
1949 if (unlikely(copied < len)) {
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962 if (!PageUptodate(page))
1963 copied = 0;
1964
1965 page_zero_new_buffers(page, start+copied, start+len);
1966 }
1967 flush_dcache_page(page);
1968
1969
1970 __block_commit_write(inode, page, start, start+copied);
1971
1972 return copied;
1973}
1974EXPORT_SYMBOL(block_write_end);
1975
1976int generic_write_end(struct file *file, struct address_space *mapping,
1977 loff_t pos, unsigned len, unsigned copied,
1978 struct page *page, void *fsdata)
1979{
1980 struct inode *inode = mapping->host;
1981 int i_size_changed = 0;
1982
1983 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1984
1985
1986
1987
1988
1989
1990
1991
1992 if (pos+copied > inode->i_size) {
1993 i_size_write(inode, pos+copied);
1994 i_size_changed = 1;
1995 }
1996
1997 unlock_page(page);
1998 page_cache_release(page);
1999
2000
2001
2002
2003
2004
2005
2006 if (i_size_changed)
2007 mark_inode_dirty(inode);
2008
2009 return copied;
2010}
2011EXPORT_SYMBOL(generic_write_end);
2012
2013
2014
2015
2016
2017
2018
2019
2020int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2021 unsigned long from)
2022{
2023 struct inode *inode = page->mapping->host;
2024 unsigned block_start, block_end, blocksize;
2025 unsigned to;
2026 struct buffer_head *bh, *head;
2027 int ret = 1;
2028
2029 if (!page_has_buffers(page))
2030 return 0;
2031
2032 blocksize = 1 << inode->i_blkbits;
2033 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2034 to = from + to;
2035 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2036 return 0;
2037
2038 head = page_buffers(page);
2039 bh = head;
2040 block_start = 0;
2041 do {
2042 block_end = block_start + blocksize;
2043 if (block_end > from && block_start < to) {
2044 if (!buffer_uptodate(bh)) {
2045 ret = 0;
2046 break;
2047 }
2048 if (block_end >= to)
2049 break;
2050 }
2051 block_start = block_end;
2052 bh = bh->b_this_page;
2053 } while (bh != head);
2054
2055 return ret;
2056}
2057EXPORT_SYMBOL(block_is_partially_uptodate);
2058
2059
2060
2061
2062
2063
2064
2065
2066int block_read_full_page(struct page *page, get_block_t *get_block)
2067{
2068 struct inode *inode = page->mapping->host;
2069 sector_t iblock, lblock;
2070 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2071 unsigned int blocksize;
2072 int nr, i;
2073 int fully_mapped = 1;
2074
2075 BUG_ON(!PageLocked(page));
2076 blocksize = 1 << inode->i_blkbits;
2077 if (!page_has_buffers(page))
2078 create_empty_buffers(page, blocksize, 0);
2079 head = page_buffers(page);
2080
2081 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2082 lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
2083 bh = head;
2084 nr = 0;
2085 i = 0;
2086
2087 do {
2088 if (buffer_uptodate(bh))
2089 continue;
2090
2091 if (!buffer_mapped(bh)) {
2092 int err = 0;
2093
2094 fully_mapped = 0;
2095 if (iblock < lblock) {
2096 WARN_ON(bh->b_size != blocksize);
2097 err = get_block(inode, iblock, bh, 0);
2098 if (err)
2099 SetPageError(page);
2100 }
2101 if (!buffer_mapped(bh)) {
2102 zero_user(page, i * blocksize, blocksize);
2103 if (!err)
2104 set_buffer_uptodate(bh);
2105 continue;
2106 }
2107
2108
2109
2110
2111 if (buffer_uptodate(bh))
2112 continue;
2113 }
2114 arr[nr++] = bh;
2115 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2116
2117 if (fully_mapped)
2118 SetPageMappedToDisk(page);
2119
2120 if (!nr) {
2121
2122
2123
2124
2125 if (!PageError(page))
2126 SetPageUptodate(page);
2127 unlock_page(page);
2128 return 0;
2129 }
2130
2131
2132 for (i = 0; i < nr; i++) {
2133 bh = arr[i];
2134 lock_buffer(bh);
2135 mark_buffer_async_read(bh);
2136 }
2137
2138
2139
2140
2141
2142
2143 for (i = 0; i < nr; i++) {
2144 bh = arr[i];
2145 if (buffer_uptodate(bh))
2146 end_buffer_async_read(bh, 1);
2147 else
2148 submit_bh(READ, bh);
2149 }
2150 return 0;
2151}
2152EXPORT_SYMBOL(block_read_full_page);
2153
2154
2155
2156
2157
2158int generic_cont_expand_simple(struct inode *inode, loff_t size)
2159{
2160 struct address_space *mapping = inode->i_mapping;
2161 struct page *page;
2162 void *fsdata;
2163 int err;
2164
2165 err = inode_newsize_ok(inode, size);
2166 if (err)
2167 goto out;
2168
2169 err = pagecache_write_begin(NULL, mapping, size, 0,
2170 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2171 &page, &fsdata);
2172 if (err)
2173 goto out;
2174
2175 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2176 BUG_ON(err > 0);
2177
2178out:
2179 return err;
2180}
2181EXPORT_SYMBOL(generic_cont_expand_simple);
2182
2183static int cont_expand_zero(struct file *file, struct address_space *mapping,
2184 loff_t pos, loff_t *bytes)
2185{
2186 struct inode *inode = mapping->host;
2187 unsigned blocksize = 1 << inode->i_blkbits;
2188 struct page *page;
2189 void *fsdata;
2190 pgoff_t index, curidx;
2191 loff_t curpos;
2192 unsigned zerofrom, offset, len;
2193 int err = 0;
2194
2195 index = pos >> PAGE_CACHE_SHIFT;
2196 offset = pos & ~PAGE_CACHE_MASK;
2197
2198 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2199 zerofrom = curpos & ~PAGE_CACHE_MASK;
2200 if (zerofrom & (blocksize-1)) {
2201 *bytes |= (blocksize-1);
2202 (*bytes)++;
2203 }
2204 len = PAGE_CACHE_SIZE - zerofrom;
2205
2206 err = pagecache_write_begin(file, mapping, curpos, len,
2207 AOP_FLAG_UNINTERRUPTIBLE,
2208 &page, &fsdata);
2209 if (err)
2210 goto out;
2211 zero_user(page, zerofrom, len);
2212 err = pagecache_write_end(file, mapping, curpos, len, len,
2213 page, fsdata);
2214 if (err < 0)
2215 goto out;
2216 BUG_ON(err != len);
2217 err = 0;
2218
2219 balance_dirty_pages_ratelimited(mapping);
2220 }
2221
2222
2223 if (index == curidx) {
2224 zerofrom = curpos & ~PAGE_CACHE_MASK;
2225
2226 if (offset <= zerofrom) {
2227 goto out;
2228 }
2229 if (zerofrom & (blocksize-1)) {
2230 *bytes |= (blocksize-1);
2231 (*bytes)++;
2232 }
2233 len = offset - zerofrom;
2234
2235 err = pagecache_write_begin(file, mapping, curpos, len,
2236 AOP_FLAG_UNINTERRUPTIBLE,
2237 &page, &fsdata);
2238 if (err)
2239 goto out;
2240 zero_user(page, zerofrom, len);
2241 err = pagecache_write_end(file, mapping, curpos, len, len,
2242 page, fsdata);
2243 if (err < 0)
2244 goto out;
2245 BUG_ON(err != len);
2246 err = 0;
2247 }
2248out:
2249 return err;
2250}
2251
2252
2253
2254
2255
2256int cont_write_begin(struct file *file, struct address_space *mapping,
2257 loff_t pos, unsigned len, unsigned flags,
2258 struct page **pagep, void **fsdata,
2259 get_block_t *get_block, loff_t *bytes)
2260{
2261 struct inode *inode = mapping->host;
2262 unsigned blocksize = 1 << inode->i_blkbits;
2263 unsigned zerofrom;
2264 int err;
2265
2266 err = cont_expand_zero(file, mapping, pos, bytes);
2267 if (err)
2268 return err;
2269
2270 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2271 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2272 *bytes |= (blocksize-1);
2273 (*bytes)++;
2274 }
2275
2276 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2277}
2278EXPORT_SYMBOL(cont_write_begin);
2279
2280int block_commit_write(struct page *page, unsigned from, unsigned to)
2281{
2282 struct inode *inode = page->mapping->host;
2283 __block_commit_write(inode,page,from,to);
2284 return 0;
2285}
2286EXPORT_SYMBOL(block_commit_write);
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2307 get_block_t get_block)
2308{
2309 struct page *page = vmf->page;
2310 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
2311 unsigned long end;
2312 loff_t size;
2313 int ret;
2314
2315 lock_page(page);
2316 size = i_size_read(inode);
2317 if ((page->mapping != inode->i_mapping) ||
2318 (page_offset(page) > size)) {
2319
2320 ret = -EFAULT;
2321 goto out_unlock;
2322 }
2323
2324
2325 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2326 end = size & ~PAGE_CACHE_MASK;
2327 else
2328 end = PAGE_CACHE_SIZE;
2329
2330 ret = __block_write_begin(page, 0, end, get_block);
2331 if (!ret)
2332 ret = block_commit_write(page, 0, end);
2333
2334 if (unlikely(ret < 0))
2335 goto out_unlock;
2336 set_page_dirty(page);
2337 wait_on_page_writeback(page);
2338 return 0;
2339out_unlock:
2340 unlock_page(page);
2341 return ret;
2342}
2343EXPORT_SYMBOL(__block_page_mkwrite);
2344
2345int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2346 get_block_t get_block)
2347{
2348 int ret;
2349 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2350
2351 sb_start_pagefault(sb);
2352
2353
2354
2355
2356
2357 file_update_time(vma->vm_file);
2358
2359 ret = __block_page_mkwrite(vma, vmf, get_block);
2360 sb_end_pagefault(sb);
2361 return block_page_mkwrite_return(ret);
2362}
2363EXPORT_SYMBOL(block_page_mkwrite);
2364
2365
2366
2367
2368
2369
2370static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2371{
2372 __end_buffer_read_notouch(bh, uptodate);
2373}
2374
2375
2376
2377
2378
2379
2380static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2381{
2382 struct buffer_head *bh;
2383
2384 BUG_ON(!PageLocked(page));
2385
2386 spin_lock(&page->mapping->private_lock);
2387 bh = head;
2388 do {
2389 if (PageDirty(page))
2390 set_buffer_dirty(bh);
2391 if (!bh->b_this_page)
2392 bh->b_this_page = head;
2393 bh = bh->b_this_page;
2394 } while (bh != head);
2395 attach_page_buffers(page, head);
2396 spin_unlock(&page->mapping->private_lock);
2397}
2398
2399
2400
2401
2402
2403
2404int nobh_write_begin(struct address_space *mapping,
2405 loff_t pos, unsigned len, unsigned flags,
2406 struct page **pagep, void **fsdata,
2407 get_block_t *get_block)
2408{
2409 struct inode *inode = mapping->host;
2410 const unsigned blkbits = inode->i_blkbits;
2411 const unsigned blocksize = 1 << blkbits;
2412 struct buffer_head *head, *bh;
2413 struct page *page;
2414 pgoff_t index;
2415 unsigned from, to;
2416 unsigned block_in_page;
2417 unsigned block_start, block_end;
2418 sector_t block_in_file;
2419 int nr_reads = 0;
2420 int ret = 0;
2421 int is_mapped_to_disk = 1;
2422
2423 index = pos >> PAGE_CACHE_SHIFT;
2424 from = pos & (PAGE_CACHE_SIZE - 1);
2425 to = from + len;
2426
2427 page = grab_cache_page_write_begin(mapping, index, flags);
2428 if (!page)
2429 return -ENOMEM;
2430 *pagep = page;
2431 *fsdata = NULL;
2432
2433 if (page_has_buffers(page)) {
2434 ret = __block_write_begin(page, pos, len, get_block);
2435 if (unlikely(ret))
2436 goto out_release;
2437 return ret;
2438 }
2439
2440 if (PageMappedToDisk(page))
2441 return 0;
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452 head = alloc_page_buffers(page, blocksize, 0);
2453 if (!head) {
2454 ret = -ENOMEM;
2455 goto out_release;
2456 }
2457
2458 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2459
2460
2461
2462
2463
2464
2465 for (block_start = 0, block_in_page = 0, bh = head;
2466 block_start < PAGE_CACHE_SIZE;
2467 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2468 int create;
2469
2470 block_end = block_start + blocksize;
2471 bh->b_state = 0;
2472 create = 1;
2473 if (block_start >= to)
2474 create = 0;
2475 ret = get_block(inode, block_in_file + block_in_page,
2476 bh, create);
2477 if (ret)
2478 goto failed;
2479 if (!buffer_mapped(bh))
2480 is_mapped_to_disk = 0;
2481 if (buffer_new(bh))
2482 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2483 if (PageUptodate(page)) {
2484 set_buffer_uptodate(bh);
2485 continue;
2486 }
2487 if (buffer_new(bh) || !buffer_mapped(bh)) {
2488 zero_user_segments(page, block_start, from,
2489 to, block_end);
2490 continue;
2491 }
2492 if (buffer_uptodate(bh))
2493 continue;
2494 if (block_start < from || block_end > to) {
2495 lock_buffer(bh);
2496 bh->b_end_io = end_buffer_read_nobh;
2497 submit_bh(READ, bh);
2498 nr_reads++;
2499 }
2500 }
2501
2502 if (nr_reads) {
2503
2504
2505
2506
2507
2508 for (bh = head; bh; bh = bh->b_this_page) {
2509 wait_on_buffer(bh);
2510 if (!buffer_uptodate(bh))
2511 ret = -EIO;
2512 }
2513 if (ret)
2514 goto failed;
2515 }
2516
2517 if (is_mapped_to_disk)
2518 SetPageMappedToDisk(page);
2519
2520 *fsdata = head;
2521
2522 return 0;
2523
2524failed:
2525 BUG_ON(!ret);
2526
2527
2528
2529
2530
2531
2532
2533 attach_nobh_buffers(page, head);
2534 page_zero_new_buffers(page, from, to);
2535
2536out_release:
2537 unlock_page(page);
2538 page_cache_release(page);
2539 *pagep = NULL;
2540
2541 return ret;
2542}
2543EXPORT_SYMBOL(nobh_write_begin);
2544
2545int nobh_write_end(struct file *file, struct address_space *mapping,
2546 loff_t pos, unsigned len, unsigned copied,
2547 struct page *page, void *fsdata)
2548{
2549 struct inode *inode = page->mapping->host;
2550 struct buffer_head *head = fsdata;
2551 struct buffer_head *bh;
2552 BUG_ON(fsdata != NULL && page_has_buffers(page));
2553
2554 if (unlikely(copied < len) && head)
2555 attach_nobh_buffers(page, head);
2556 if (page_has_buffers(page))
2557 return generic_write_end(file, mapping, pos, len,
2558 copied, page, fsdata);
2559
2560 SetPageUptodate(page);
2561 set_page_dirty(page);
2562 if (pos+copied > inode->i_size) {
2563 i_size_write(inode, pos+copied);
2564 mark_inode_dirty(inode);
2565 }
2566
2567 unlock_page(page);
2568 page_cache_release(page);
2569
2570 while (head) {
2571 bh = head;
2572 head = head->b_this_page;
2573 free_buffer_head(bh);
2574 }
2575
2576 return copied;
2577}
2578EXPORT_SYMBOL(nobh_write_end);
2579
2580
2581
2582
2583
2584
2585int nobh_writepage(struct page *page, get_block_t *get_block,
2586 struct writeback_control *wbc)
2587{
2588 struct inode * const inode = page->mapping->host;
2589 loff_t i_size = i_size_read(inode);
2590 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2591 unsigned offset;
2592 int ret;
2593
2594
2595 if (page->index < end_index)
2596 goto out;
2597
2598
2599 offset = i_size & (PAGE_CACHE_SIZE-1);
2600 if (page->index >= end_index+1 || !offset) {
2601
2602
2603
2604
2605
2606#if 0
2607
2608 if (page->mapping->a_ops->invalidatepage)
2609 page->mapping->a_ops->invalidatepage(page, offset);
2610#endif
2611 unlock_page(page);
2612 return 0;
2613 }
2614
2615
2616
2617
2618
2619
2620
2621
2622 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2623out:
2624 ret = mpage_writepage(page, get_block, wbc);
2625 if (ret == -EAGAIN)
2626 ret = __block_write_full_page(inode, page, get_block, wbc,
2627 end_buffer_async_write);
2628 return ret;
2629}
2630EXPORT_SYMBOL(nobh_writepage);
2631
2632int nobh_truncate_page(struct address_space *mapping,
2633 loff_t from, get_block_t *get_block)
2634{
2635 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2636 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2637 unsigned blocksize;
2638 sector_t iblock;
2639 unsigned length, pos;
2640 struct inode *inode = mapping->host;
2641 struct page *page;
2642 struct buffer_head map_bh;
2643 int err;
2644
2645 blocksize = 1 << inode->i_blkbits;
2646 length = offset & (blocksize - 1);
2647
2648
2649 if (!length)
2650 return 0;
2651
2652 length = blocksize - length;
2653 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2654
2655 page = grab_cache_page(mapping, index);
2656 err = -ENOMEM;
2657 if (!page)
2658 goto out;
2659
2660 if (page_has_buffers(page)) {
2661has_buffers:
2662 unlock_page(page);
2663 page_cache_release(page);
2664 return block_truncate_page(mapping, from, get_block);
2665 }
2666
2667
2668 pos = blocksize;
2669 while (offset >= pos) {
2670 iblock++;
2671 pos += blocksize;
2672 }
2673
2674 map_bh.b_size = blocksize;
2675 map_bh.b_state = 0;
2676 err = get_block(inode, iblock, &map_bh, 0);
2677 if (err)
2678 goto unlock;
2679
2680 if (!buffer_mapped(&map_bh))
2681 goto unlock;
2682
2683
2684 if (!PageUptodate(page)) {
2685 err = mapping->a_ops->readpage(NULL, page);
2686 if (err) {
2687 page_cache_release(page);
2688 goto out;
2689 }
2690 lock_page(page);
2691 if (!PageUptodate(page)) {
2692 err = -EIO;
2693 goto unlock;
2694 }
2695 if (page_has_buffers(page))
2696 goto has_buffers;
2697 }
2698 zero_user(page, offset, length);
2699 set_page_dirty(page);
2700 err = 0;
2701
2702unlock:
2703 unlock_page(page);
2704 page_cache_release(page);
2705out:
2706 return err;
2707}
2708EXPORT_SYMBOL(nobh_truncate_page);
2709
2710int block_truncate_page(struct address_space *mapping,
2711 loff_t from, get_block_t *get_block)
2712{
2713 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2714 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2715 unsigned blocksize;
2716 sector_t iblock;
2717 unsigned length, pos;
2718 struct inode *inode = mapping->host;
2719 struct page *page;
2720 struct buffer_head *bh;
2721 int err;
2722
2723 blocksize = 1 << inode->i_blkbits;
2724 length = offset & (blocksize - 1);
2725
2726
2727 if (!length)
2728 return 0;
2729
2730 length = blocksize - length;
2731 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2732
2733 page = grab_cache_page(mapping, index);
2734 err = -ENOMEM;
2735 if (!page)
2736 goto out;
2737
2738 if (!page_has_buffers(page))
2739 create_empty_buffers(page, blocksize, 0);
2740
2741
2742 bh = page_buffers(page);
2743 pos = blocksize;
2744 while (offset >= pos) {
2745 bh = bh->b_this_page;
2746 iblock++;
2747 pos += blocksize;
2748 }
2749
2750 err = 0;
2751 if (!buffer_mapped(bh)) {
2752 WARN_ON(bh->b_size != blocksize);
2753 err = get_block(inode, iblock, bh, 0);
2754 if (err)
2755 goto unlock;
2756
2757 if (!buffer_mapped(bh))
2758 goto unlock;
2759 }
2760
2761
2762 if (PageUptodate(page))
2763 set_buffer_uptodate(bh);
2764
2765 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2766 err = -EIO;
2767 ll_rw_block(READ, 1, &bh);
2768 wait_on_buffer(bh);
2769
2770 if (!buffer_uptodate(bh))
2771 goto unlock;
2772 }
2773
2774 zero_user(page, offset, length);
2775 mark_buffer_dirty(bh);
2776 err = 0;
2777
2778unlock:
2779 unlock_page(page);
2780 page_cache_release(page);
2781out:
2782 return err;
2783}
2784EXPORT_SYMBOL(block_truncate_page);
2785
2786
2787
2788
2789
2790int block_write_full_page_endio(struct page *page, get_block_t *get_block,
2791 struct writeback_control *wbc, bh_end_io_t *handler)
2792{
2793 struct inode * const inode = page->mapping->host;
2794 loff_t i_size = i_size_read(inode);
2795 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2796 unsigned offset;
2797
2798
2799 if (page->index < end_index)
2800 return __block_write_full_page(inode, page, get_block, wbc,
2801 handler);
2802
2803
2804 offset = i_size & (PAGE_CACHE_SIZE-1);
2805 if (page->index >= end_index+1 || !offset) {
2806
2807
2808
2809
2810
2811 do_invalidatepage(page, 0);
2812 unlock_page(page);
2813 return 0;
2814 }
2815
2816
2817
2818
2819
2820
2821
2822
2823 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2824 return __block_write_full_page(inode, page, get_block, wbc, handler);
2825}
2826EXPORT_SYMBOL(block_write_full_page_endio);
2827
2828
2829
2830
2831int block_write_full_page(struct page *page, get_block_t *get_block,
2832 struct writeback_control *wbc)
2833{
2834 return block_write_full_page_endio(page, get_block, wbc,
2835 end_buffer_async_write);
2836}
2837EXPORT_SYMBOL(block_write_full_page);
2838
2839sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2840 get_block_t *get_block)
2841{
2842 struct buffer_head tmp;
2843 struct inode *inode = mapping->host;
2844 tmp.b_state = 0;
2845 tmp.b_blocknr = 0;
2846 tmp.b_size = 1 << inode->i_blkbits;
2847 get_block(inode, block, &tmp, 0);
2848 return tmp.b_blocknr;
2849}
2850EXPORT_SYMBOL(generic_block_bmap);
2851
2852static void end_bio_bh_io_sync(struct bio *bio, int err)
2853{
2854 struct buffer_head *bh = bio->bi_private;
2855
2856 if (err == -EOPNOTSUPP) {
2857 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2858 }
2859
2860 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2861 set_bit(BH_Quiet, &bh->b_state);
2862
2863 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2864 bio_put(bio);
2865}
2866
2867int submit_bh(int rw, struct buffer_head * bh)
2868{
2869 struct bio *bio;
2870 int ret = 0;
2871
2872 BUG_ON(!buffer_locked(bh));
2873 BUG_ON(!buffer_mapped(bh));
2874 BUG_ON(!bh->b_end_io);
2875 BUG_ON(buffer_delay(bh));
2876 BUG_ON(buffer_unwritten(bh));
2877
2878
2879
2880
2881 if (test_set_buffer_req(bh) && (rw & WRITE))
2882 clear_buffer_write_io_error(bh);
2883
2884
2885
2886
2887
2888 bio = bio_alloc(GFP_NOIO, 1);
2889
2890 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
2891 bio->bi_bdev = bh->b_bdev;
2892 bio->bi_io_vec[0].bv_page = bh->b_page;
2893 bio->bi_io_vec[0].bv_len = bh->b_size;
2894 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
2895
2896 bio->bi_vcnt = 1;
2897 bio->bi_idx = 0;
2898 bio->bi_size = bh->b_size;
2899
2900 bio->bi_end_io = end_bio_bh_io_sync;
2901 bio->bi_private = bh;
2902
2903 bio_get(bio);
2904 submit_bio(rw, bio);
2905
2906 if (bio_flagged(bio, BIO_EOPNOTSUPP))
2907 ret = -EOPNOTSUPP;
2908
2909 bio_put(bio);
2910 return ret;
2911}
2912EXPORT_SYMBOL(submit_bh);
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2940{
2941 int i;
2942
2943 for (i = 0; i < nr; i++) {
2944 struct buffer_head *bh = bhs[i];
2945
2946 if (!trylock_buffer(bh))
2947 continue;
2948 if (rw == WRITE) {
2949 if (test_clear_buffer_dirty(bh)) {
2950 bh->b_end_io = end_buffer_write_sync;
2951 get_bh(bh);
2952 submit_bh(WRITE, bh);
2953 continue;
2954 }
2955 } else {
2956 if (!buffer_uptodate(bh)) {
2957 bh->b_end_io = end_buffer_read_sync;
2958 get_bh(bh);
2959 submit_bh(rw, bh);
2960 continue;
2961 }
2962 }
2963 unlock_buffer(bh);
2964 }
2965}
2966EXPORT_SYMBOL(ll_rw_block);
2967
2968void write_dirty_buffer(struct buffer_head *bh, int rw)
2969{
2970 lock_buffer(bh);
2971 if (!test_clear_buffer_dirty(bh)) {
2972 unlock_buffer(bh);
2973 return;
2974 }
2975 bh->b_end_io = end_buffer_write_sync;
2976 get_bh(bh);
2977 submit_bh(rw, bh);
2978}
2979EXPORT_SYMBOL(write_dirty_buffer);
2980
2981
2982
2983
2984
2985
2986int __sync_dirty_buffer(struct buffer_head *bh, int rw)
2987{
2988 int ret = 0;
2989
2990 WARN_ON(atomic_read(&bh->b_count) < 1);
2991 lock_buffer(bh);
2992 if (test_clear_buffer_dirty(bh)) {
2993 get_bh(bh);
2994 bh->b_end_io = end_buffer_write_sync;
2995 ret = submit_bh(rw, bh);
2996 wait_on_buffer(bh);
2997 if (!ret && !buffer_uptodate(bh))
2998 ret = -EIO;
2999 } else {
3000 unlock_buffer(bh);
3001 }
3002 return ret;
3003}
3004EXPORT_SYMBOL(__sync_dirty_buffer);
3005
3006int sync_dirty_buffer(struct buffer_head *bh)
3007{
3008 return __sync_dirty_buffer(bh, WRITE_SYNC);
3009}
3010EXPORT_SYMBOL(sync_dirty_buffer);
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032static inline int buffer_busy(struct buffer_head *bh)
3033{
3034 return atomic_read(&bh->b_count) |
3035 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3036}
3037
3038static int
3039drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3040{
3041 struct buffer_head *head = page_buffers(page);
3042 struct buffer_head *bh;
3043
3044 bh = head;
3045 do {
3046 if (buffer_write_io_error(bh) && page->mapping)
3047 set_bit(AS_EIO, &page->mapping->flags);
3048 if (buffer_busy(bh))
3049 goto failed;
3050 bh = bh->b_this_page;
3051 } while (bh != head);
3052
3053 do {
3054 struct buffer_head *next = bh->b_this_page;
3055
3056 if (bh->b_assoc_map)
3057 __remove_assoc_queue(bh);
3058 bh = next;
3059 } while (bh != head);
3060 *buffers_to_free = head;
3061 __clear_page_buffers(page);
3062 return 1;
3063failed:
3064 return 0;
3065}
3066
3067int try_to_free_buffers(struct page *page)
3068{
3069 struct address_space * const mapping = page->mapping;
3070 struct buffer_head *buffers_to_free = NULL;
3071 int ret = 0;
3072
3073 BUG_ON(!PageLocked(page));
3074 if (PageWriteback(page))
3075 return 0;
3076
3077 if (mapping == NULL) {
3078 ret = drop_buffers(page, &buffers_to_free);
3079 goto out;
3080 }
3081
3082 spin_lock(&mapping->private_lock);
3083 ret = drop_buffers(page, &buffers_to_free);
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099 if (ret)
3100 cancel_dirty_page(page, PAGE_CACHE_SIZE);
3101 spin_unlock(&mapping->private_lock);
3102out:
3103 if (buffers_to_free) {
3104 struct buffer_head *bh = buffers_to_free;
3105
3106 do {
3107 struct buffer_head *next = bh->b_this_page;
3108 free_buffer_head(bh);
3109 bh = next;
3110 } while (bh != buffers_to_free);
3111 }
3112 return ret;
3113}
3114EXPORT_SYMBOL(try_to_free_buffers);
3115
3116
3117
3118
3119
3120
3121
3122
3123SYSCALL_DEFINE2(bdflush, int, func, long, data)
3124{
3125 static int msg_count;
3126
3127 if (!capable(CAP_SYS_ADMIN))
3128 return -EPERM;
3129
3130 if (msg_count < 5) {
3131 msg_count++;
3132 printk(KERN_INFO
3133 "warning: process `%s' used the obsolete bdflush"
3134 " system call\n", current->comm);
3135 printk(KERN_INFO "Fix your initscripts?\n");
3136 }
3137
3138 if (func == 1)
3139 do_exit(0);
3140 return 0;
3141}
3142
3143
3144
3145
3146static struct kmem_cache *bh_cachep __read_mostly;
3147
3148
3149
3150
3151
3152static int max_buffer_heads;
3153
3154int buffer_heads_over_limit;
3155
3156struct bh_accounting {
3157 int nr;
3158 int ratelimit;
3159};
3160
3161static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3162
3163static void recalc_bh_state(void)
3164{
3165 int i;
3166 int tot = 0;
3167
3168 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3169 return;
3170 __this_cpu_write(bh_accounting.ratelimit, 0);
3171 for_each_online_cpu(i)
3172 tot += per_cpu(bh_accounting, i).nr;
3173 buffer_heads_over_limit = (tot > max_buffer_heads);
3174}
3175
3176struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3177{
3178 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3179 if (ret) {
3180 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3181 preempt_disable();
3182 __this_cpu_inc(bh_accounting.nr);
3183 recalc_bh_state();
3184 preempt_enable();
3185 }
3186 return ret;
3187}
3188EXPORT_SYMBOL(alloc_buffer_head);
3189
3190void free_buffer_head(struct buffer_head *bh)
3191{
3192 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3193 kmem_cache_free(bh_cachep, bh);
3194 preempt_disable();
3195 __this_cpu_dec(bh_accounting.nr);
3196 recalc_bh_state();
3197 preempt_enable();
3198}
3199EXPORT_SYMBOL(free_buffer_head);
3200
3201static void buffer_exit_cpu(int cpu)
3202{
3203 int i;
3204 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3205
3206 for (i = 0; i < BH_LRU_SIZE; i++) {
3207 brelse(b->bhs[i]);
3208 b->bhs[i] = NULL;
3209 }
3210 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3211 per_cpu(bh_accounting, cpu).nr = 0;
3212}
3213
3214static int buffer_cpu_notify(struct notifier_block *self,
3215 unsigned long action, void *hcpu)
3216{
3217 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3218 buffer_exit_cpu((unsigned long)hcpu);
3219 return NOTIFY_OK;
3220}
3221
3222
3223
3224
3225
3226
3227
3228
3229int bh_uptodate_or_lock(struct buffer_head *bh)
3230{
3231 if (!buffer_uptodate(bh)) {
3232 lock_buffer(bh);
3233 if (!buffer_uptodate(bh))
3234 return 0;
3235 unlock_buffer(bh);
3236 }
3237 return 1;
3238}
3239EXPORT_SYMBOL(bh_uptodate_or_lock);
3240
3241
3242
3243
3244
3245
3246
3247int bh_submit_read(struct buffer_head *bh)
3248{
3249 BUG_ON(!buffer_locked(bh));
3250
3251 if (buffer_uptodate(bh)) {
3252 unlock_buffer(bh);
3253 return 0;
3254 }
3255
3256 get_bh(bh);
3257 bh->b_end_io = end_buffer_read_sync;
3258 submit_bh(READ, bh);
3259 wait_on_buffer(bh);
3260 if (buffer_uptodate(bh))
3261 return 0;
3262 return -EIO;
3263}
3264EXPORT_SYMBOL(bh_submit_read);
3265
3266void __init buffer_init(void)
3267{
3268 int nrpages;
3269
3270 bh_cachep = kmem_cache_create("buffer_head",
3271 sizeof(struct buffer_head), 0,
3272 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3273 SLAB_MEM_SPREAD),
3274 NULL);
3275
3276
3277
3278
3279 nrpages = (nr_free_buffer_pages() * 10) / 100;
3280 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3281 hotcpu_notifier(buffer_cpu_notify, 0);
3282}
3283