1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/export.h>
33#include <linux/writeback.h>
34#include <linux/hash.h>
35#include <linux/suspend.h>
36#include <linux/buffer_head.h>
37#include <linux/task_io_accounting_ops.h>
38#include <linux/bio.h>
39#include <linux/notifier.h>
40#include <linux/cpu.h>
41#include <linux/bitops.h>
42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h>
44
45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
46
47#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
48
49inline void
50init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
51{
52 bh->b_end_io = handler;
53 bh->b_private = private;
54}
55EXPORT_SYMBOL(init_buffer);
56
57static int sleep_on_buffer(void *word)
58{
59 io_schedule();
60 return 0;
61}
62
63void __lock_buffer(struct buffer_head *bh)
64{
65 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
66 TASK_UNINTERRUPTIBLE);
67}
68EXPORT_SYMBOL(__lock_buffer);
69
70void unlock_buffer(struct buffer_head *bh)
71{
72 clear_bit_unlock(BH_Lock, &bh->b_state);
73 smp_mb__after_clear_bit();
74 wake_up_bit(&bh->b_state, BH_Lock);
75}
76EXPORT_SYMBOL(unlock_buffer);
77
78
79
80
81
82
83void __wait_on_buffer(struct buffer_head * bh)
84{
85 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
86}
87EXPORT_SYMBOL(__wait_on_buffer);
88
89static void
90__clear_page_buffers(struct page *page)
91{
92 ClearPagePrivate(page);
93 set_page_private(page, 0);
94 page_cache_release(page);
95}
96
97
98static int quiet_error(struct buffer_head *bh)
99{
100 if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
101 return 0;
102 return 1;
103}
104
105
106static void buffer_io_error(struct buffer_head *bh)
107{
108 char b[BDEVNAME_SIZE];
109 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
110 bdevname(bh->b_bdev, b),
111 (unsigned long long)bh->b_blocknr);
112}
113
114
115
116
117
118
119
120
121
122static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
123{
124 if (uptodate) {
125 set_buffer_uptodate(bh);
126 } else {
127
128 clear_buffer_uptodate(bh);
129 }
130 unlock_buffer(bh);
131}
132
133
134
135
136
137void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
138{
139 __end_buffer_read_notouch(bh, uptodate);
140 put_bh(bh);
141}
142EXPORT_SYMBOL(end_buffer_read_sync);
143
144void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
145{
146 char b[BDEVNAME_SIZE];
147
148 if (uptodate) {
149 set_buffer_uptodate(bh);
150 } else {
151 if (!quiet_error(bh)) {
152 buffer_io_error(bh);
153 printk(KERN_WARNING "lost page write due to "
154 "I/O error on %s\n",
155 bdevname(bh->b_bdev, b));
156 }
157 set_buffer_write_io_error(bh);
158 clear_buffer_uptodate(bh);
159 }
160 unlock_buffer(bh);
161 put_bh(bh);
162}
163EXPORT_SYMBOL(end_buffer_write_sync);
164
165
166
167
168
169
170
171
172
173
174
175
176static struct buffer_head *
177__find_get_block_slow(struct block_device *bdev, sector_t block)
178{
179 struct inode *bd_inode = bdev->bd_inode;
180 struct address_space *bd_mapping = bd_inode->i_mapping;
181 struct buffer_head *ret = NULL;
182 pgoff_t index;
183 struct buffer_head *bh;
184 struct buffer_head *head;
185 struct page *page;
186 int all_mapped = 1;
187
188 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
189 page = find_get_page(bd_mapping, index);
190 if (!page)
191 goto out;
192
193 spin_lock(&bd_mapping->private_lock);
194 if (!page_has_buffers(page))
195 goto out_unlock;
196 head = page_buffers(page);
197 bh = head;
198 do {
199 if (!buffer_mapped(bh))
200 all_mapped = 0;
201 else if (bh->b_blocknr == block) {
202 ret = bh;
203 get_bh(bh);
204 goto out_unlock;
205 }
206 bh = bh->b_this_page;
207 } while (bh != head);
208
209
210
211
212
213
214 if (all_mapped) {
215 char b[BDEVNAME_SIZE];
216
217 printk("__find_get_block_slow() failed. "
218 "block=%llu, b_blocknr=%llu\n",
219 (unsigned long long)block,
220 (unsigned long long)bh->b_blocknr);
221 printk("b_state=0x%08lx, b_size=%zu\n",
222 bh->b_state, bh->b_size);
223 printk("device %s blocksize: %d\n", bdevname(bdev, b),
224 1 << bd_inode->i_blkbits);
225 }
226out_unlock:
227 spin_unlock(&bd_mapping->private_lock);
228 page_cache_release(page);
229out:
230 return ret;
231}
232
233
234
235
236static void free_more_memory(void)
237{
238 struct zone *zone;
239 int nid;
240
241 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
242 yield();
243
244 for_each_online_node(nid) {
245 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
246 gfp_zone(GFP_NOFS), NULL,
247 &zone);
248 if (zone)
249 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
250 GFP_NOFS, NULL);
251 }
252}
253
254
255
256
257
258static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
259{
260 unsigned long flags;
261 struct buffer_head *first;
262 struct buffer_head *tmp;
263 struct page *page;
264 int page_uptodate = 1;
265
266 BUG_ON(!buffer_async_read(bh));
267
268 page = bh->b_page;
269 if (uptodate) {
270 set_buffer_uptodate(bh);
271 } else {
272 clear_buffer_uptodate(bh);
273 if (!quiet_error(bh))
274 buffer_io_error(bh);
275 SetPageError(page);
276 }
277
278
279
280
281
282
283 first = page_buffers(page);
284 local_irq_save(flags);
285 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
286 clear_buffer_async_read(bh);
287 unlock_buffer(bh);
288 tmp = bh;
289 do {
290 if (!buffer_uptodate(tmp))
291 page_uptodate = 0;
292 if (buffer_async_read(tmp)) {
293 BUG_ON(!buffer_locked(tmp));
294 goto still_busy;
295 }
296 tmp = tmp->b_this_page;
297 } while (tmp != bh);
298 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
299 local_irq_restore(flags);
300
301
302
303
304
305 if (page_uptodate && !PageError(page))
306 SetPageUptodate(page);
307 unlock_page(page);
308 return;
309
310still_busy:
311 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
312 local_irq_restore(flags);
313 return;
314}
315
316
317
318
319
320void end_buffer_async_write(struct buffer_head *bh, int uptodate)
321{
322 char b[BDEVNAME_SIZE];
323 unsigned long flags;
324 struct buffer_head *first;
325 struct buffer_head *tmp;
326 struct page *page;
327
328 BUG_ON(!buffer_async_write(bh));
329
330 page = bh->b_page;
331 if (uptodate) {
332 set_buffer_uptodate(bh);
333 } else {
334 if (!quiet_error(bh)) {
335 buffer_io_error(bh);
336 printk(KERN_WARNING "lost page write due to "
337 "I/O error on %s\n",
338 bdevname(bh->b_bdev, b));
339 }
340 set_bit(AS_EIO, &page->mapping->flags);
341 set_buffer_write_io_error(bh);
342 clear_buffer_uptodate(bh);
343 SetPageError(page);
344 }
345
346 first = page_buffers(page);
347 local_irq_save(flags);
348 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
349
350 clear_buffer_async_write(bh);
351 unlock_buffer(bh);
352 tmp = bh->b_this_page;
353 while (tmp != bh) {
354 if (buffer_async_write(tmp)) {
355 BUG_ON(!buffer_locked(tmp));
356 goto still_busy;
357 }
358 tmp = tmp->b_this_page;
359 }
360 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
361 local_irq_restore(flags);
362 end_page_writeback(page);
363 return;
364
365still_busy:
366 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
367 local_irq_restore(flags);
368 return;
369}
370EXPORT_SYMBOL(end_buffer_async_write);
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393static void mark_buffer_async_read(struct buffer_head *bh)
394{
395 bh->b_end_io = end_buffer_async_read;
396 set_buffer_async_read(bh);
397}
398
399static void mark_buffer_async_write_endio(struct buffer_head *bh,
400 bh_end_io_t *handler)
401{
402 bh->b_end_io = handler;
403 set_buffer_async_write(bh);
404}
405
406void mark_buffer_async_write(struct buffer_head *bh)
407{
408 mark_buffer_async_write_endio(bh, end_buffer_async_write);
409}
410EXPORT_SYMBOL(mark_buffer_async_write);
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465static void __remove_assoc_queue(struct buffer_head *bh)
466{
467 list_del_init(&bh->b_assoc_buffers);
468 WARN_ON(!bh->b_assoc_map);
469 if (buffer_write_io_error(bh))
470 set_bit(AS_EIO, &bh->b_assoc_map->flags);
471 bh->b_assoc_map = NULL;
472}
473
474int inode_has_buffers(struct inode *inode)
475{
476 return !list_empty(&inode->i_data.private_list);
477}
478
479
480
481
482
483
484
485
486
487
488
489static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
490{
491 struct buffer_head *bh;
492 struct list_head *p;
493 int err = 0;
494
495 spin_lock(lock);
496repeat:
497 list_for_each_prev(p, list) {
498 bh = BH_ENTRY(p);
499 if (buffer_locked(bh)) {
500 get_bh(bh);
501 spin_unlock(lock);
502 wait_on_buffer(bh);
503 if (!buffer_uptodate(bh))
504 err = -EIO;
505 brelse(bh);
506 spin_lock(lock);
507 goto repeat;
508 }
509 }
510 spin_unlock(lock);
511 return err;
512}
513
514static void do_thaw_one(struct super_block *sb, void *unused)
515{
516 char b[BDEVNAME_SIZE];
517 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
518 printk(KERN_WARNING "Emergency Thaw on %s\n",
519 bdevname(sb->s_bdev, b));
520}
521
522static void do_thaw_all(struct work_struct *work)
523{
524 iterate_supers(do_thaw_one, NULL);
525 kfree(work);
526 printk(KERN_WARNING "Emergency Thaw complete\n");
527}
528
529
530
531
532
533
534void emergency_thaw_all(void)
535{
536 struct work_struct *work;
537
538 work = kmalloc(sizeof(*work), GFP_ATOMIC);
539 if (work) {
540 INIT_WORK(work, do_thaw_all);
541 schedule_work(work);
542 }
543}
544
545
546
547
548
549
550
551
552
553
554
555
556int sync_mapping_buffers(struct address_space *mapping)
557{
558 struct address_space *buffer_mapping = mapping->assoc_mapping;
559
560 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
561 return 0;
562
563 return fsync_buffers_list(&buffer_mapping->private_lock,
564 &mapping->private_list);
565}
566EXPORT_SYMBOL(sync_mapping_buffers);
567
568
569
570
571
572
573
574void write_boundary_block(struct block_device *bdev,
575 sector_t bblock, unsigned blocksize)
576{
577 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
578 if (bh) {
579 if (buffer_dirty(bh))
580 ll_rw_block(WRITE, 1, &bh);
581 put_bh(bh);
582 }
583}
584
585void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
586{
587 struct address_space *mapping = inode->i_mapping;
588 struct address_space *buffer_mapping = bh->b_page->mapping;
589
590 mark_buffer_dirty(bh);
591 if (!mapping->assoc_mapping) {
592 mapping->assoc_mapping = buffer_mapping;
593 } else {
594 BUG_ON(mapping->assoc_mapping != buffer_mapping);
595 }
596 if (!bh->b_assoc_map) {
597 spin_lock(&buffer_mapping->private_lock);
598 list_move_tail(&bh->b_assoc_buffers,
599 &mapping->private_list);
600 bh->b_assoc_map = mapping;
601 spin_unlock(&buffer_mapping->private_lock);
602 }
603}
604EXPORT_SYMBOL(mark_buffer_dirty_inode);
605
606
607
608
609
610
611
612
613static void __set_page_dirty(struct page *page,
614 struct address_space *mapping, int warn)
615{
616 spin_lock_irq(&mapping->tree_lock);
617 if (page->mapping) {
618 WARN_ON_ONCE(warn && !PageUptodate(page));
619 account_page_dirtied(page, mapping);
620 radix_tree_tag_set(&mapping->page_tree,
621 page_index(page), PAGECACHE_TAG_DIRTY);
622 }
623 spin_unlock_irq(&mapping->tree_lock);
624 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
625}
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652int __set_page_dirty_buffers(struct page *page)
653{
654 int newly_dirty;
655 struct address_space *mapping = page_mapping(page);
656
657 if (unlikely(!mapping))
658 return !TestSetPageDirty(page);
659
660 spin_lock(&mapping->private_lock);
661 if (page_has_buffers(page)) {
662 struct buffer_head *head = page_buffers(page);
663 struct buffer_head *bh = head;
664
665 do {
666 set_buffer_dirty(bh);
667 bh = bh->b_this_page;
668 } while (bh != head);
669 }
670 newly_dirty = !TestSetPageDirty(page);
671 spin_unlock(&mapping->private_lock);
672
673 if (newly_dirty)
674 __set_page_dirty(page, mapping, 1);
675 return newly_dirty;
676}
677EXPORT_SYMBOL(__set_page_dirty_buffers);
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
699{
700 struct buffer_head *bh;
701 struct list_head tmp;
702 struct address_space *mapping;
703 int err = 0, err2;
704 struct blk_plug plug;
705
706 INIT_LIST_HEAD(&tmp);
707 blk_start_plug(&plug);
708
709 spin_lock(lock);
710 while (!list_empty(list)) {
711 bh = BH_ENTRY(list->next);
712 mapping = bh->b_assoc_map;
713 __remove_assoc_queue(bh);
714
715
716 smp_mb();
717 if (buffer_dirty(bh) || buffer_locked(bh)) {
718 list_add(&bh->b_assoc_buffers, &tmp);
719 bh->b_assoc_map = mapping;
720 if (buffer_dirty(bh)) {
721 get_bh(bh);
722 spin_unlock(lock);
723
724
725
726
727
728
729
730 write_dirty_buffer(bh, WRITE_SYNC);
731
732
733
734
735
736
737
738 brelse(bh);
739 spin_lock(lock);
740 }
741 }
742 }
743
744 spin_unlock(lock);
745 blk_finish_plug(&plug);
746 spin_lock(lock);
747
748 while (!list_empty(&tmp)) {
749 bh = BH_ENTRY(tmp.prev);
750 get_bh(bh);
751 mapping = bh->b_assoc_map;
752 __remove_assoc_queue(bh);
753
754
755 smp_mb();
756 if (buffer_dirty(bh)) {
757 list_add(&bh->b_assoc_buffers,
758 &mapping->private_list);
759 bh->b_assoc_map = mapping;
760 }
761 spin_unlock(lock);
762 wait_on_buffer(bh);
763 if (!buffer_uptodate(bh))
764 err = -EIO;
765 brelse(bh);
766 spin_lock(lock);
767 }
768
769 spin_unlock(lock);
770 err2 = osync_buffers_list(lock, list);
771 if (err)
772 return err;
773 else
774 return err2;
775}
776
777
778
779
780
781
782
783
784
785
786void invalidate_inode_buffers(struct inode *inode)
787{
788 if (inode_has_buffers(inode)) {
789 struct address_space *mapping = &inode->i_data;
790 struct list_head *list = &mapping->private_list;
791 struct address_space *buffer_mapping = mapping->assoc_mapping;
792
793 spin_lock(&buffer_mapping->private_lock);
794 while (!list_empty(list))
795 __remove_assoc_queue(BH_ENTRY(list->next));
796 spin_unlock(&buffer_mapping->private_lock);
797 }
798}
799EXPORT_SYMBOL(invalidate_inode_buffers);
800
801
802
803
804
805
806
807int remove_inode_buffers(struct inode *inode)
808{
809 int ret = 1;
810
811 if (inode_has_buffers(inode)) {
812 struct address_space *mapping = &inode->i_data;
813 struct list_head *list = &mapping->private_list;
814 struct address_space *buffer_mapping = mapping->assoc_mapping;
815
816 spin_lock(&buffer_mapping->private_lock);
817 while (!list_empty(list)) {
818 struct buffer_head *bh = BH_ENTRY(list->next);
819 if (buffer_dirty(bh)) {
820 ret = 0;
821 break;
822 }
823 __remove_assoc_queue(bh);
824 }
825 spin_unlock(&buffer_mapping->private_lock);
826 }
827 return ret;
828}
829
830
831
832
833
834
835
836
837
838
839struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
840 int retry)
841{
842 struct buffer_head *bh, *head;
843 long offset;
844
845try_again:
846 head = NULL;
847 offset = PAGE_SIZE;
848 while ((offset -= size) >= 0) {
849 bh = alloc_buffer_head(GFP_NOFS);
850 if (!bh)
851 goto no_grow;
852
853 bh->b_bdev = NULL;
854 bh->b_this_page = head;
855 bh->b_blocknr = -1;
856 head = bh;
857
858 bh->b_state = 0;
859 atomic_set(&bh->b_count, 0);
860 bh->b_size = size;
861
862
863 set_bh_page(bh, page, offset);
864
865 init_buffer(bh, NULL, NULL);
866 }
867 return head;
868
869
870
871no_grow:
872 if (head) {
873 do {
874 bh = head;
875 head = head->b_this_page;
876 free_buffer_head(bh);
877 } while (head);
878 }
879
880
881
882
883
884
885
886 if (!retry)
887 return NULL;
888
889
890
891
892
893
894
895 free_more_memory();
896 goto try_again;
897}
898EXPORT_SYMBOL_GPL(alloc_page_buffers);
899
900static inline void
901link_dev_buffers(struct page *page, struct buffer_head *head)
902{
903 struct buffer_head *bh, *tail;
904
905 bh = head;
906 do {
907 tail = bh;
908 bh = bh->b_this_page;
909 } while (bh);
910 tail->b_this_page = head;
911 attach_page_buffers(page, head);
912}
913
914
915
916
917static sector_t
918init_page_buffers(struct page *page, struct block_device *bdev,
919 sector_t block, int size)
920{
921 struct buffer_head *head = page_buffers(page);
922 struct buffer_head *bh = head;
923 int uptodate = PageUptodate(page);
924 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode));
925
926 do {
927 if (!buffer_mapped(bh)) {
928 init_buffer(bh, NULL, NULL);
929 bh->b_bdev = bdev;
930 bh->b_blocknr = block;
931 if (uptodate)
932 set_buffer_uptodate(bh);
933 if (block < end_block)
934 set_buffer_mapped(bh);
935 }
936 block++;
937 bh = bh->b_this_page;
938 } while (bh != head);
939
940
941
942
943 return end_block;
944}
945
946
947
948
949
950
951static int
952grow_dev_page(struct block_device *bdev, sector_t block,
953 pgoff_t index, int size, int sizebits)
954{
955 struct inode *inode = bdev->bd_inode;
956 struct page *page;
957 struct buffer_head *bh;
958 sector_t end_block;
959 int ret = 0;
960
961 page = find_or_create_page(inode->i_mapping, index,
962 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
963 if (!page)
964 return ret;
965
966 BUG_ON(!PageLocked(page));
967
968 if (page_has_buffers(page)) {
969 bh = page_buffers(page);
970 if (bh->b_size == size) {
971 end_block = init_page_buffers(page, bdev,
972 index << sizebits, size);
973 goto done;
974 }
975 if (!try_to_free_buffers(page))
976 goto failed;
977 }
978
979
980
981
982 bh = alloc_page_buffers(page, size, 0);
983 if (!bh)
984 goto failed;
985
986
987
988
989
990
991 spin_lock(&inode->i_mapping->private_lock);
992 link_dev_buffers(page, bh);
993 end_block = init_page_buffers(page, bdev, index << sizebits, size);
994 spin_unlock(&inode->i_mapping->private_lock);
995done:
996 ret = (block < end_block) ? 1 : -ENXIO;
997failed:
998 unlock_page(page);
999 page_cache_release(page);
1000 return ret;
1001}
1002
1003
1004
1005
1006
1007static int
1008grow_buffers(struct block_device *bdev, sector_t block, int size)
1009{
1010 pgoff_t index;
1011 int sizebits;
1012
1013 sizebits = -1;
1014 do {
1015 sizebits++;
1016 } while ((size << sizebits) < PAGE_SIZE);
1017
1018 index = block >> sizebits;
1019
1020
1021
1022
1023
1024 if (unlikely(index != block >> sizebits)) {
1025 char b[BDEVNAME_SIZE];
1026
1027 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1028 "device %s\n",
1029 __func__, (unsigned long long)block,
1030 bdevname(bdev, b));
1031 return -EIO;
1032 }
1033
1034
1035 return grow_dev_page(bdev, block, index, size, sizebits);
1036}
1037
1038static struct buffer_head *
1039__getblk_slow(struct block_device *bdev, sector_t block, int size)
1040{
1041
1042 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1043 (size < 512 || size > PAGE_SIZE))) {
1044 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1045 size);
1046 printk(KERN_ERR "logical block size: %d\n",
1047 bdev_logical_block_size(bdev));
1048
1049 dump_stack();
1050 return NULL;
1051 }
1052
1053 for (;;) {
1054 struct buffer_head *bh;
1055 int ret;
1056
1057 bh = __find_get_block(bdev, block, size);
1058 if (bh)
1059 return bh;
1060
1061 ret = grow_buffers(bdev, block, size);
1062 if (ret < 0)
1063 return NULL;
1064 if (ret == 0)
1065 free_more_memory();
1066 }
1067}
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104void mark_buffer_dirty(struct buffer_head *bh)
1105{
1106 WARN_ON_ONCE(!buffer_uptodate(bh));
1107
1108
1109
1110
1111
1112
1113
1114 if (buffer_dirty(bh)) {
1115 smp_mb();
1116 if (buffer_dirty(bh))
1117 return;
1118 }
1119
1120 if (!test_set_buffer_dirty(bh)) {
1121 struct page *page = bh->b_page;
1122 if (!TestSetPageDirty(page)) {
1123 struct address_space *mapping = page_mapping(page);
1124 if (mapping)
1125 __set_page_dirty(page, mapping, 0);
1126 }
1127 }
1128}
1129EXPORT_SYMBOL(mark_buffer_dirty);
1130
1131
1132
1133
1134
1135
1136
1137
1138void __brelse(struct buffer_head * buf)
1139{
1140 if (atomic_read(&buf->b_count)) {
1141 put_bh(buf);
1142 return;
1143 }
1144 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1145}
1146EXPORT_SYMBOL(__brelse);
1147
1148
1149
1150
1151
1152void __bforget(struct buffer_head *bh)
1153{
1154 clear_buffer_dirty(bh);
1155 if (bh->b_assoc_map) {
1156 struct address_space *buffer_mapping = bh->b_page->mapping;
1157
1158 spin_lock(&buffer_mapping->private_lock);
1159 list_del_init(&bh->b_assoc_buffers);
1160 bh->b_assoc_map = NULL;
1161 spin_unlock(&buffer_mapping->private_lock);
1162 }
1163 __brelse(bh);
1164}
1165EXPORT_SYMBOL(__bforget);
1166
1167static struct buffer_head *__bread_slow(struct buffer_head *bh)
1168{
1169 lock_buffer(bh);
1170 if (buffer_uptodate(bh)) {
1171 unlock_buffer(bh);
1172 return bh;
1173 } else {
1174 get_bh(bh);
1175 bh->b_end_io = end_buffer_read_sync;
1176 submit_bh(READ, bh);
1177 wait_on_buffer(bh);
1178 if (buffer_uptodate(bh))
1179 return bh;
1180 }
1181 brelse(bh);
1182 return NULL;
1183}
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199#define BH_LRU_SIZE 8
1200
1201struct bh_lru {
1202 struct buffer_head *bhs[BH_LRU_SIZE];
1203};
1204
1205static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1206
1207#ifdef CONFIG_SMP
1208#define bh_lru_lock() local_irq_disable()
1209#define bh_lru_unlock() local_irq_enable()
1210#else
1211#define bh_lru_lock() preempt_disable()
1212#define bh_lru_unlock() preempt_enable()
1213#endif
1214
1215static inline void check_irqs_on(void)
1216{
1217#ifdef irqs_disabled
1218 BUG_ON(irqs_disabled());
1219#endif
1220}
1221
1222
1223
1224
1225static void bh_lru_install(struct buffer_head *bh)
1226{
1227 struct buffer_head *evictee = NULL;
1228
1229 check_irqs_on();
1230 bh_lru_lock();
1231 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1232 struct buffer_head *bhs[BH_LRU_SIZE];
1233 int in;
1234 int out = 0;
1235
1236 get_bh(bh);
1237 bhs[out++] = bh;
1238 for (in = 0; in < BH_LRU_SIZE; in++) {
1239 struct buffer_head *bh2 =
1240 __this_cpu_read(bh_lrus.bhs[in]);
1241
1242 if (bh2 == bh) {
1243 __brelse(bh2);
1244 } else {
1245 if (out >= BH_LRU_SIZE) {
1246 BUG_ON(evictee != NULL);
1247 evictee = bh2;
1248 } else {
1249 bhs[out++] = bh2;
1250 }
1251 }
1252 }
1253 while (out < BH_LRU_SIZE)
1254 bhs[out++] = NULL;
1255 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1256 }
1257 bh_lru_unlock();
1258
1259 if (evictee)
1260 __brelse(evictee);
1261}
1262
1263
1264
1265
1266static struct buffer_head *
1267lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1268{
1269 struct buffer_head *ret = NULL;
1270 unsigned int i;
1271
1272 check_irqs_on();
1273 bh_lru_lock();
1274 for (i = 0; i < BH_LRU_SIZE; i++) {
1275 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1276
1277 if (bh && bh->b_bdev == bdev &&
1278 bh->b_blocknr == block && bh->b_size == size) {
1279 if (i) {
1280 while (i) {
1281 __this_cpu_write(bh_lrus.bhs[i],
1282 __this_cpu_read(bh_lrus.bhs[i - 1]));
1283 i--;
1284 }
1285 __this_cpu_write(bh_lrus.bhs[0], bh);
1286 }
1287 get_bh(bh);
1288 ret = bh;
1289 break;
1290 }
1291 }
1292 bh_lru_unlock();
1293 return ret;
1294}
1295
1296
1297
1298
1299
1300
1301struct buffer_head *
1302__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1303{
1304 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1305
1306 if (bh == NULL) {
1307 bh = __find_get_block_slow(bdev, block);
1308 if (bh)
1309 bh_lru_install(bh);
1310 }
1311 if (bh)
1312 touch_buffer(bh);
1313 return bh;
1314}
1315EXPORT_SYMBOL(__find_get_block);
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325struct buffer_head *
1326__getblk(struct block_device *bdev, sector_t block, unsigned size)
1327{
1328 struct buffer_head *bh = __find_get_block(bdev, block, size);
1329
1330 might_sleep();
1331 if (bh == NULL)
1332 bh = __getblk_slow(bdev, block, size);
1333 return bh;
1334}
1335EXPORT_SYMBOL(__getblk);
1336
1337
1338
1339
1340void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1341{
1342 struct buffer_head *bh = __getblk(bdev, block, size);
1343 if (likely(bh)) {
1344 ll_rw_block(READA, 1, &bh);
1345 brelse(bh);
1346 }
1347}
1348EXPORT_SYMBOL(__breadahead);
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359struct buffer_head *
1360__bread(struct block_device *bdev, sector_t block, unsigned size)
1361{
1362 struct buffer_head *bh = __getblk(bdev, block, size);
1363
1364 if (likely(bh) && !buffer_uptodate(bh))
1365 bh = __bread_slow(bh);
1366 return bh;
1367}
1368EXPORT_SYMBOL(__bread);
1369
1370
1371
1372
1373
1374
1375static void invalidate_bh_lru(void *arg)
1376{
1377 struct bh_lru *b = &get_cpu_var(bh_lrus);
1378 int i;
1379
1380 for (i = 0; i < BH_LRU_SIZE; i++) {
1381 brelse(b->bhs[i]);
1382 b->bhs[i] = NULL;
1383 }
1384 put_cpu_var(bh_lrus);
1385}
1386
1387static bool has_bh_in_lru(int cpu, void *dummy)
1388{
1389 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1390 int i;
1391
1392 for (i = 0; i < BH_LRU_SIZE; i++) {
1393 if (b->bhs[i])
1394 return 1;
1395 }
1396
1397 return 0;
1398}
1399
1400void invalidate_bh_lrus(void)
1401{
1402 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1403}
1404EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1405
1406void set_bh_page(struct buffer_head *bh,
1407 struct page *page, unsigned long offset)
1408{
1409 bh->b_page = page;
1410 BUG_ON(offset >= PAGE_SIZE);
1411 if (PageHighMem(page))
1412
1413
1414
1415 bh->b_data = (char *)(0 + offset);
1416 else
1417 bh->b_data = page_address(page) + offset;
1418}
1419EXPORT_SYMBOL(set_bh_page);
1420
1421
1422
1423
1424static void discard_buffer(struct buffer_head * bh)
1425{
1426 lock_buffer(bh);
1427 clear_buffer_dirty(bh);
1428 bh->b_bdev = NULL;
1429 clear_buffer_mapped(bh);
1430 clear_buffer_req(bh);
1431 clear_buffer_new(bh);
1432 clear_buffer_delay(bh);
1433 clear_buffer_unwritten(bh);
1434 unlock_buffer(bh);
1435}
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452void block_invalidatepage(struct page *page, unsigned long offset)
1453{
1454 struct buffer_head *head, *bh, *next;
1455 unsigned int curr_off = 0;
1456
1457 BUG_ON(!PageLocked(page));
1458 if (!page_has_buffers(page))
1459 goto out;
1460
1461 head = page_buffers(page);
1462 bh = head;
1463 do {
1464 unsigned int next_off = curr_off + bh->b_size;
1465 next = bh->b_this_page;
1466
1467
1468
1469
1470 if (offset <= curr_off)
1471 discard_buffer(bh);
1472 curr_off = next_off;
1473 bh = next;
1474 } while (bh != head);
1475
1476
1477
1478
1479
1480
1481 if (offset == 0)
1482 try_to_release_page(page, 0);
1483out:
1484 return;
1485}
1486EXPORT_SYMBOL(block_invalidatepage);
1487
1488
1489
1490
1491
1492
1493void create_empty_buffers(struct page *page,
1494 unsigned long blocksize, unsigned long b_state)
1495{
1496 struct buffer_head *bh, *head, *tail;
1497
1498 head = alloc_page_buffers(page, blocksize, 1);
1499 bh = head;
1500 do {
1501 bh->b_state |= b_state;
1502 tail = bh;
1503 bh = bh->b_this_page;
1504 } while (bh);
1505 tail->b_this_page = head;
1506
1507 spin_lock(&page->mapping->private_lock);
1508 if (PageUptodate(page) || PageDirty(page)) {
1509 bh = head;
1510 do {
1511 if (PageDirty(page))
1512 set_buffer_dirty(bh);
1513 if (PageUptodate(page))
1514 set_buffer_uptodate(bh);
1515 bh = bh->b_this_page;
1516 } while (bh != head);
1517 }
1518 attach_page_buffers(page, head);
1519 spin_unlock(&page->mapping->private_lock);
1520}
1521EXPORT_SYMBOL(create_empty_buffers);
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1540{
1541 struct buffer_head *old_bh;
1542
1543 might_sleep();
1544
1545 old_bh = __find_get_block_slow(bdev, block);
1546 if (old_bh) {
1547 clear_buffer_dirty(old_bh);
1548 wait_on_buffer(old_bh);
1549 clear_buffer_req(old_bh);
1550 __brelse(old_bh);
1551 }
1552}
1553EXPORT_SYMBOL(unmap_underlying_metadata);
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584static int __block_write_full_page(struct inode *inode, struct page *page,
1585 get_block_t *get_block, struct writeback_control *wbc,
1586 bh_end_io_t *handler)
1587{
1588 int err;
1589 sector_t block;
1590 sector_t last_block;
1591 struct buffer_head *bh, *head;
1592 const unsigned blocksize = 1 << inode->i_blkbits;
1593 int nr_underway = 0;
1594 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1595 WRITE_SYNC : WRITE);
1596
1597 BUG_ON(!PageLocked(page));
1598
1599 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1600
1601 if (!page_has_buffers(page)) {
1602 create_empty_buffers(page, blocksize,
1603 (1 << BH_Dirty)|(1 << BH_Uptodate));
1604 }
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1617 head = page_buffers(page);
1618 bh = head;
1619
1620
1621
1622
1623
1624 do {
1625 if (block > last_block) {
1626
1627
1628
1629
1630
1631
1632
1633
1634 clear_buffer_dirty(bh);
1635 set_buffer_uptodate(bh);
1636 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1637 buffer_dirty(bh)) {
1638 WARN_ON(bh->b_size != blocksize);
1639 err = get_block(inode, block, bh, 1);
1640 if (err)
1641 goto recover;
1642 clear_buffer_delay(bh);
1643 if (buffer_new(bh)) {
1644
1645 clear_buffer_new(bh);
1646 unmap_underlying_metadata(bh->b_bdev,
1647 bh->b_blocknr);
1648 }
1649 }
1650 bh = bh->b_this_page;
1651 block++;
1652 } while (bh != head);
1653
1654 do {
1655 if (!buffer_mapped(bh))
1656 continue;
1657
1658
1659
1660
1661
1662
1663
1664 if (wbc->sync_mode != WB_SYNC_NONE) {
1665 lock_buffer(bh);
1666 } else if (!trylock_buffer(bh)) {
1667 redirty_page_for_writepage(wbc, page);
1668 continue;
1669 }
1670 if (test_clear_buffer_dirty(bh)) {
1671 mark_buffer_async_write_endio(bh, handler);
1672 } else {
1673 unlock_buffer(bh);
1674 }
1675 } while ((bh = bh->b_this_page) != head);
1676
1677
1678
1679
1680
1681 BUG_ON(PageWriteback(page));
1682 set_page_writeback(page);
1683
1684 do {
1685 struct buffer_head *next = bh->b_this_page;
1686 if (buffer_async_write(bh)) {
1687 submit_bh(write_op, bh);
1688 nr_underway++;
1689 }
1690 bh = next;
1691 } while (bh != head);
1692 unlock_page(page);
1693
1694 err = 0;
1695done:
1696 if (nr_underway == 0) {
1697
1698
1699
1700
1701
1702 end_page_writeback(page);
1703
1704
1705
1706
1707
1708 }
1709 return err;
1710
1711recover:
1712
1713
1714
1715
1716
1717
1718 bh = head;
1719
1720 do {
1721 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1722 !buffer_delay(bh)) {
1723 lock_buffer(bh);
1724 mark_buffer_async_write_endio(bh, handler);
1725 } else {
1726
1727
1728
1729
1730 clear_buffer_dirty(bh);
1731 }
1732 } while ((bh = bh->b_this_page) != head);
1733 SetPageError(page);
1734 BUG_ON(PageWriteback(page));
1735 mapping_set_error(page->mapping, err);
1736 set_page_writeback(page);
1737 do {
1738 struct buffer_head *next = bh->b_this_page;
1739 if (buffer_async_write(bh)) {
1740 clear_buffer_dirty(bh);
1741 submit_bh(write_op, bh);
1742 nr_underway++;
1743 }
1744 bh = next;
1745 } while (bh != head);
1746 unlock_page(page);
1747 goto done;
1748}
1749
1750
1751
1752
1753
1754
1755void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1756{
1757 unsigned int block_start, block_end;
1758 struct buffer_head *head, *bh;
1759
1760 BUG_ON(!PageLocked(page));
1761 if (!page_has_buffers(page))
1762 return;
1763
1764 bh = head = page_buffers(page);
1765 block_start = 0;
1766 do {
1767 block_end = block_start + bh->b_size;
1768
1769 if (buffer_new(bh)) {
1770 if (block_end > from && block_start < to) {
1771 if (!PageUptodate(page)) {
1772 unsigned start, size;
1773
1774 start = max(from, block_start);
1775 size = min(to, block_end) - start;
1776
1777 zero_user(page, start, size);
1778 set_buffer_uptodate(bh);
1779 }
1780
1781 clear_buffer_new(bh);
1782 mark_buffer_dirty(bh);
1783 }
1784 }
1785
1786 block_start = block_end;
1787 bh = bh->b_this_page;
1788 } while (bh != head);
1789}
1790EXPORT_SYMBOL(page_zero_new_buffers);
1791
1792int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1793 get_block_t *get_block)
1794{
1795 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1796 unsigned to = from + len;
1797 struct inode *inode = page->mapping->host;
1798 unsigned block_start, block_end;
1799 sector_t block;
1800 int err = 0;
1801 unsigned blocksize, bbits;
1802 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1803
1804 BUG_ON(!PageLocked(page));
1805 BUG_ON(from > PAGE_CACHE_SIZE);
1806 BUG_ON(to > PAGE_CACHE_SIZE);
1807 BUG_ON(from > to);
1808
1809 blocksize = 1 << inode->i_blkbits;
1810 if (!page_has_buffers(page))
1811 create_empty_buffers(page, blocksize, 0);
1812 head = page_buffers(page);
1813
1814 bbits = inode->i_blkbits;
1815 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1816
1817 for(bh = head, block_start = 0; bh != head || !block_start;
1818 block++, block_start=block_end, bh = bh->b_this_page) {
1819 block_end = block_start + blocksize;
1820 if (block_end <= from || block_start >= to) {
1821 if (PageUptodate(page)) {
1822 if (!buffer_uptodate(bh))
1823 set_buffer_uptodate(bh);
1824 }
1825 continue;
1826 }
1827 if (buffer_new(bh))
1828 clear_buffer_new(bh);
1829 if (!buffer_mapped(bh)) {
1830 WARN_ON(bh->b_size != blocksize);
1831 err = get_block(inode, block, bh, 1);
1832 if (err)
1833 break;
1834 if (buffer_new(bh)) {
1835 unmap_underlying_metadata(bh->b_bdev,
1836 bh->b_blocknr);
1837 if (PageUptodate(page)) {
1838 clear_buffer_new(bh);
1839 set_buffer_uptodate(bh);
1840 mark_buffer_dirty(bh);
1841 continue;
1842 }
1843 if (block_end > to || block_start < from)
1844 zero_user_segments(page,
1845 to, block_end,
1846 block_start, from);
1847 continue;
1848 }
1849 }
1850 if (PageUptodate(page)) {
1851 if (!buffer_uptodate(bh))
1852 set_buffer_uptodate(bh);
1853 continue;
1854 }
1855 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1856 !buffer_unwritten(bh) &&
1857 (block_start < from || block_end > to)) {
1858 ll_rw_block(READ, 1, &bh);
1859 *wait_bh++=bh;
1860 }
1861 }
1862
1863
1864
1865 while(wait_bh > wait) {
1866 wait_on_buffer(*--wait_bh);
1867 if (!buffer_uptodate(*wait_bh))
1868 err = -EIO;
1869 }
1870 if (unlikely(err))
1871 page_zero_new_buffers(page, from, to);
1872 return err;
1873}
1874EXPORT_SYMBOL(__block_write_begin);
1875
1876static int __block_commit_write(struct inode *inode, struct page *page,
1877 unsigned from, unsigned to)
1878{
1879 unsigned block_start, block_end;
1880 int partial = 0;
1881 unsigned blocksize;
1882 struct buffer_head *bh, *head;
1883
1884 blocksize = 1 << inode->i_blkbits;
1885
1886 for(bh = head = page_buffers(page), block_start = 0;
1887 bh != head || !block_start;
1888 block_start=block_end, bh = bh->b_this_page) {
1889 block_end = block_start + blocksize;
1890 if (block_end <= from || block_start >= to) {
1891 if (!buffer_uptodate(bh))
1892 partial = 1;
1893 } else {
1894 set_buffer_uptodate(bh);
1895 mark_buffer_dirty(bh);
1896 }
1897 clear_buffer_new(bh);
1898 }
1899
1900
1901
1902
1903
1904
1905
1906 if (!partial)
1907 SetPageUptodate(page);
1908 return 0;
1909}
1910
1911
1912
1913
1914
1915
1916
1917int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
1918 unsigned flags, struct page **pagep, get_block_t *get_block)
1919{
1920 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1921 struct page *page;
1922 int status;
1923
1924 page = grab_cache_page_write_begin(mapping, index, flags);
1925 if (!page)
1926 return -ENOMEM;
1927
1928 status = __block_write_begin(page, pos, len, get_block);
1929 if (unlikely(status)) {
1930 unlock_page(page);
1931 page_cache_release(page);
1932 page = NULL;
1933 }
1934
1935 *pagep = page;
1936 return status;
1937}
1938EXPORT_SYMBOL(block_write_begin);
1939
1940int block_write_end(struct file *file, struct address_space *mapping,
1941 loff_t pos, unsigned len, unsigned copied,
1942 struct page *page, void *fsdata)
1943{
1944 struct inode *inode = mapping->host;
1945 unsigned start;
1946
1947 start = pos & (PAGE_CACHE_SIZE - 1);
1948
1949 if (unlikely(copied < len)) {
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962 if (!PageUptodate(page))
1963 copied = 0;
1964
1965 page_zero_new_buffers(page, start+copied, start+len);
1966 }
1967 flush_dcache_page(page);
1968
1969
1970 __block_commit_write(inode, page, start, start+copied);
1971
1972 return copied;
1973}
1974EXPORT_SYMBOL(block_write_end);
1975
1976int generic_write_end(struct file *file, struct address_space *mapping,
1977 loff_t pos, unsigned len, unsigned copied,
1978 struct page *page, void *fsdata)
1979{
1980 struct inode *inode = mapping->host;
1981 int i_size_changed = 0;
1982
1983 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1984
1985
1986
1987
1988
1989
1990
1991
1992 if (pos+copied > inode->i_size) {
1993 i_size_write(inode, pos+copied);
1994 i_size_changed = 1;
1995 }
1996
1997 unlock_page(page);
1998 page_cache_release(page);
1999
2000
2001
2002
2003
2004
2005
2006 if (i_size_changed)
2007 mark_inode_dirty(inode);
2008
2009 return copied;
2010}
2011EXPORT_SYMBOL(generic_write_end);
2012
2013
2014
2015
2016
2017
2018
2019
2020int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2021 unsigned long from)
2022{
2023 struct inode *inode = page->mapping->host;
2024 unsigned block_start, block_end, blocksize;
2025 unsigned to;
2026 struct buffer_head *bh, *head;
2027 int ret = 1;
2028
2029 if (!page_has_buffers(page))
2030 return 0;
2031
2032 blocksize = 1 << inode->i_blkbits;
2033 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2034 to = from + to;
2035 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2036 return 0;
2037
2038 head = page_buffers(page);
2039 bh = head;
2040 block_start = 0;
2041 do {
2042 block_end = block_start + blocksize;
2043 if (block_end > from && block_start < to) {
2044 if (!buffer_uptodate(bh)) {
2045 ret = 0;
2046 break;
2047 }
2048 if (block_end >= to)
2049 break;
2050 }
2051 block_start = block_end;
2052 bh = bh->b_this_page;
2053 } while (bh != head);
2054
2055 return ret;
2056}
2057EXPORT_SYMBOL(block_is_partially_uptodate);
2058
2059
2060
2061
2062
2063
2064
2065
2066int block_read_full_page(struct page *page, get_block_t *get_block)
2067{
2068 struct inode *inode = page->mapping->host;
2069 sector_t iblock, lblock;
2070 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2071 unsigned int blocksize;
2072 int nr, i;
2073 int fully_mapped = 1;
2074
2075 BUG_ON(!PageLocked(page));
2076 blocksize = 1 << inode->i_blkbits;
2077 if (!page_has_buffers(page))
2078 create_empty_buffers(page, blocksize, 0);
2079 head = page_buffers(page);
2080
2081 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2082 lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
2083 bh = head;
2084 nr = 0;
2085 i = 0;
2086
2087 do {
2088 if (buffer_uptodate(bh))
2089 continue;
2090
2091 if (!buffer_mapped(bh)) {
2092 int err = 0;
2093
2094 fully_mapped = 0;
2095 if (iblock < lblock) {
2096 WARN_ON(bh->b_size != blocksize);
2097 err = get_block(inode, iblock, bh, 0);
2098 if (err)
2099 SetPageError(page);
2100 }
2101 if (!buffer_mapped(bh)) {
2102 zero_user(page, i * blocksize, blocksize);
2103 if (!err)
2104 set_buffer_uptodate(bh);
2105 continue;
2106 }
2107
2108
2109
2110
2111 if (buffer_uptodate(bh))
2112 continue;
2113 }
2114 arr[nr++] = bh;
2115 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2116
2117 if (fully_mapped)
2118 SetPageMappedToDisk(page);
2119
2120 if (!nr) {
2121
2122
2123
2124
2125 if (!PageError(page))
2126 SetPageUptodate(page);
2127 unlock_page(page);
2128 return 0;
2129 }
2130
2131
2132 for (i = 0; i < nr; i++) {
2133 bh = arr[i];
2134 lock_buffer(bh);
2135 mark_buffer_async_read(bh);
2136 }
2137
2138
2139
2140
2141
2142
2143 for (i = 0; i < nr; i++) {
2144 bh = arr[i];
2145 if (buffer_uptodate(bh))
2146 end_buffer_async_read(bh, 1);
2147 else
2148 submit_bh(READ, bh);
2149 }
2150 return 0;
2151}
2152EXPORT_SYMBOL(block_read_full_page);
2153
2154
2155
2156
2157
2158int generic_cont_expand_simple(struct inode *inode, loff_t size)
2159{
2160 struct address_space *mapping = inode->i_mapping;
2161 struct page *page;
2162 void *fsdata;
2163 int err;
2164
2165 err = inode_newsize_ok(inode, size);
2166 if (err)
2167 goto out;
2168
2169 err = pagecache_write_begin(NULL, mapping, size, 0,
2170 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2171 &page, &fsdata);
2172 if (err)
2173 goto out;
2174
2175 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2176 BUG_ON(err > 0);
2177
2178out:
2179 return err;
2180}
2181EXPORT_SYMBOL(generic_cont_expand_simple);
2182
2183static int cont_expand_zero(struct file *file, struct address_space *mapping,
2184 loff_t pos, loff_t *bytes)
2185{
2186 struct inode *inode = mapping->host;
2187 unsigned blocksize = 1 << inode->i_blkbits;
2188 struct page *page;
2189 void *fsdata;
2190 pgoff_t index, curidx;
2191 loff_t curpos;
2192 unsigned zerofrom, offset, len;
2193 int err = 0;
2194
2195 index = pos >> PAGE_CACHE_SHIFT;
2196 offset = pos & ~PAGE_CACHE_MASK;
2197
2198 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2199 zerofrom = curpos & ~PAGE_CACHE_MASK;
2200 if (zerofrom & (blocksize-1)) {
2201 *bytes |= (blocksize-1);
2202 (*bytes)++;
2203 }
2204 len = PAGE_CACHE_SIZE - zerofrom;
2205
2206 err = pagecache_write_begin(file, mapping, curpos, len,
2207 AOP_FLAG_UNINTERRUPTIBLE,
2208 &page, &fsdata);
2209 if (err)
2210 goto out;
2211 zero_user(page, zerofrom, len);
2212 err = pagecache_write_end(file, mapping, curpos, len, len,
2213 page, fsdata);
2214 if (err < 0)
2215 goto out;
2216 BUG_ON(err != len);
2217 err = 0;
2218
2219 balance_dirty_pages_ratelimited(mapping);
2220 }
2221
2222
2223 if (index == curidx) {
2224 zerofrom = curpos & ~PAGE_CACHE_MASK;
2225
2226 if (offset <= zerofrom) {
2227 goto out;
2228 }
2229 if (zerofrom & (blocksize-1)) {
2230 *bytes |= (blocksize-1);
2231 (*bytes)++;
2232 }
2233 len = offset - zerofrom;
2234
2235 err = pagecache_write_begin(file, mapping, curpos, len,
2236 AOP_FLAG_UNINTERRUPTIBLE,
2237 &page, &fsdata);
2238 if (err)
2239 goto out;
2240 zero_user(page, zerofrom, len);
2241 err = pagecache_write_end(file, mapping, curpos, len, len,
2242 page, fsdata);
2243 if (err < 0)
2244 goto out;
2245 BUG_ON(err != len);
2246 err = 0;
2247 }
2248out:
2249 return err;
2250}
2251
2252
2253
2254
2255
2256int cont_write_begin(struct file *file, struct address_space *mapping,
2257 loff_t pos, unsigned len, unsigned flags,
2258 struct page **pagep, void **fsdata,
2259 get_block_t *get_block, loff_t *bytes)
2260{
2261 struct inode *inode = mapping->host;
2262 unsigned blocksize = 1 << inode->i_blkbits;
2263 unsigned zerofrom;
2264 int err;
2265
2266 err = cont_expand_zero(file, mapping, pos, bytes);
2267 if (err)
2268 return err;
2269
2270 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2271 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2272 *bytes |= (blocksize-1);
2273 (*bytes)++;
2274 }
2275
2276 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2277}
2278EXPORT_SYMBOL(cont_write_begin);
2279
2280int block_commit_write(struct page *page, unsigned from, unsigned to)
2281{
2282 struct inode *inode = page->mapping->host;
2283 __block_commit_write(inode,page,from,to);
2284 return 0;
2285}
2286EXPORT_SYMBOL(block_commit_write);
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2307 get_block_t get_block)
2308{
2309 struct page *page = vmf->page;
2310 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
2311 unsigned long end;
2312 loff_t size;
2313 int ret;
2314
2315 lock_page(page);
2316 size = i_size_read(inode);
2317 if ((page->mapping != inode->i_mapping) ||
2318 (page_offset(page) > size)) {
2319
2320 ret = -EFAULT;
2321 goto out_unlock;
2322 }
2323
2324
2325 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2326 end = size & ~PAGE_CACHE_MASK;
2327 else
2328 end = PAGE_CACHE_SIZE;
2329
2330 ret = __block_write_begin(page, 0, end, get_block);
2331 if (!ret)
2332 ret = block_commit_write(page, 0, end);
2333
2334 if (unlikely(ret < 0))
2335 goto out_unlock;
2336
2337
2338
2339
2340
2341
2342
2343 set_page_dirty(page);
2344 if (inode->i_sb->s_frozen != SB_UNFROZEN) {
2345 ret = -EAGAIN;
2346 goto out_unlock;
2347 }
2348 wait_on_page_writeback(page);
2349 return 0;
2350out_unlock:
2351 unlock_page(page);
2352 return ret;
2353}
2354EXPORT_SYMBOL(__block_page_mkwrite);
2355
2356int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2357 get_block_t get_block)
2358{
2359 int ret;
2360 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2361
2362
2363
2364
2365
2366 vfs_check_frozen(sb, SB_FREEZE_WRITE);
2367 ret = __block_page_mkwrite(vma, vmf, get_block);
2368 return block_page_mkwrite_return(ret);
2369}
2370EXPORT_SYMBOL(block_page_mkwrite);
2371
2372
2373
2374
2375
2376
2377static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2378{
2379 __end_buffer_read_notouch(bh, uptodate);
2380}
2381
2382
2383
2384
2385
2386
2387static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2388{
2389 struct buffer_head *bh;
2390
2391 BUG_ON(!PageLocked(page));
2392
2393 spin_lock(&page->mapping->private_lock);
2394 bh = head;
2395 do {
2396 if (PageDirty(page))
2397 set_buffer_dirty(bh);
2398 if (!bh->b_this_page)
2399 bh->b_this_page = head;
2400 bh = bh->b_this_page;
2401 } while (bh != head);
2402 attach_page_buffers(page, head);
2403 spin_unlock(&page->mapping->private_lock);
2404}
2405
2406
2407
2408
2409
2410
2411int nobh_write_begin(struct address_space *mapping,
2412 loff_t pos, unsigned len, unsigned flags,
2413 struct page **pagep, void **fsdata,
2414 get_block_t *get_block)
2415{
2416 struct inode *inode = mapping->host;
2417 const unsigned blkbits = inode->i_blkbits;
2418 const unsigned blocksize = 1 << blkbits;
2419 struct buffer_head *head, *bh;
2420 struct page *page;
2421 pgoff_t index;
2422 unsigned from, to;
2423 unsigned block_in_page;
2424 unsigned block_start, block_end;
2425 sector_t block_in_file;
2426 int nr_reads = 0;
2427 int ret = 0;
2428 int is_mapped_to_disk = 1;
2429
2430 index = pos >> PAGE_CACHE_SHIFT;
2431 from = pos & (PAGE_CACHE_SIZE - 1);
2432 to = from + len;
2433
2434 page = grab_cache_page_write_begin(mapping, index, flags);
2435 if (!page)
2436 return -ENOMEM;
2437 *pagep = page;
2438 *fsdata = NULL;
2439
2440 if (page_has_buffers(page)) {
2441 ret = __block_write_begin(page, pos, len, get_block);
2442 if (unlikely(ret))
2443 goto out_release;
2444 return ret;
2445 }
2446
2447 if (PageMappedToDisk(page))
2448 return 0;
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459 head = alloc_page_buffers(page, blocksize, 0);
2460 if (!head) {
2461 ret = -ENOMEM;
2462 goto out_release;
2463 }
2464
2465 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2466
2467
2468
2469
2470
2471
2472 for (block_start = 0, block_in_page = 0, bh = head;
2473 block_start < PAGE_CACHE_SIZE;
2474 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2475 int create;
2476
2477 block_end = block_start + blocksize;
2478 bh->b_state = 0;
2479 create = 1;
2480 if (block_start >= to)
2481 create = 0;
2482 ret = get_block(inode, block_in_file + block_in_page,
2483 bh, create);
2484 if (ret)
2485 goto failed;
2486 if (!buffer_mapped(bh))
2487 is_mapped_to_disk = 0;
2488 if (buffer_new(bh))
2489 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2490 if (PageUptodate(page)) {
2491 set_buffer_uptodate(bh);
2492 continue;
2493 }
2494 if (buffer_new(bh) || !buffer_mapped(bh)) {
2495 zero_user_segments(page, block_start, from,
2496 to, block_end);
2497 continue;
2498 }
2499 if (buffer_uptodate(bh))
2500 continue;
2501 if (block_start < from || block_end > to) {
2502 lock_buffer(bh);
2503 bh->b_end_io = end_buffer_read_nobh;
2504 submit_bh(READ, bh);
2505 nr_reads++;
2506 }
2507 }
2508
2509 if (nr_reads) {
2510
2511
2512
2513
2514
2515 for (bh = head; bh; bh = bh->b_this_page) {
2516 wait_on_buffer(bh);
2517 if (!buffer_uptodate(bh))
2518 ret = -EIO;
2519 }
2520 if (ret)
2521 goto failed;
2522 }
2523
2524 if (is_mapped_to_disk)
2525 SetPageMappedToDisk(page);
2526
2527 *fsdata = head;
2528
2529 return 0;
2530
2531failed:
2532 BUG_ON(!ret);
2533
2534
2535
2536
2537
2538
2539
2540 attach_nobh_buffers(page, head);
2541 page_zero_new_buffers(page, from, to);
2542
2543out_release:
2544 unlock_page(page);
2545 page_cache_release(page);
2546 *pagep = NULL;
2547
2548 return ret;
2549}
2550EXPORT_SYMBOL(nobh_write_begin);
2551
2552int nobh_write_end(struct file *file, struct address_space *mapping,
2553 loff_t pos, unsigned len, unsigned copied,
2554 struct page *page, void *fsdata)
2555{
2556 struct inode *inode = page->mapping->host;
2557 struct buffer_head *head = fsdata;
2558 struct buffer_head *bh;
2559 BUG_ON(fsdata != NULL && page_has_buffers(page));
2560
2561 if (unlikely(copied < len) && head)
2562 attach_nobh_buffers(page, head);
2563 if (page_has_buffers(page))
2564 return generic_write_end(file, mapping, pos, len,
2565 copied, page, fsdata);
2566
2567 SetPageUptodate(page);
2568 set_page_dirty(page);
2569 if (pos+copied > inode->i_size) {
2570 i_size_write(inode, pos+copied);
2571 mark_inode_dirty(inode);
2572 }
2573
2574 unlock_page(page);
2575 page_cache_release(page);
2576
2577 while (head) {
2578 bh = head;
2579 head = head->b_this_page;
2580 free_buffer_head(bh);
2581 }
2582
2583 return copied;
2584}
2585EXPORT_SYMBOL(nobh_write_end);
2586
2587
2588
2589
2590
2591
2592int nobh_writepage(struct page *page, get_block_t *get_block,
2593 struct writeback_control *wbc)
2594{
2595 struct inode * const inode = page->mapping->host;
2596 loff_t i_size = i_size_read(inode);
2597 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2598 unsigned offset;
2599 int ret;
2600
2601
2602 if (page->index < end_index)
2603 goto out;
2604
2605
2606 offset = i_size & (PAGE_CACHE_SIZE-1);
2607 if (page->index >= end_index+1 || !offset) {
2608
2609
2610
2611
2612
2613#if 0
2614
2615 if (page->mapping->a_ops->invalidatepage)
2616 page->mapping->a_ops->invalidatepage(page, offset);
2617#endif
2618 unlock_page(page);
2619 return 0;
2620 }
2621
2622
2623
2624
2625
2626
2627
2628
2629 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2630out:
2631 ret = mpage_writepage(page, get_block, wbc);
2632 if (ret == -EAGAIN)
2633 ret = __block_write_full_page(inode, page, get_block, wbc,
2634 end_buffer_async_write);
2635 return ret;
2636}
2637EXPORT_SYMBOL(nobh_writepage);
2638
2639int nobh_truncate_page(struct address_space *mapping,
2640 loff_t from, get_block_t *get_block)
2641{
2642 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2643 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2644 unsigned blocksize;
2645 sector_t iblock;
2646 unsigned length, pos;
2647 struct inode *inode = mapping->host;
2648 struct page *page;
2649 struct buffer_head map_bh;
2650 int err;
2651
2652 blocksize = 1 << inode->i_blkbits;
2653 length = offset & (blocksize - 1);
2654
2655
2656 if (!length)
2657 return 0;
2658
2659 length = blocksize - length;
2660 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2661
2662 page = grab_cache_page(mapping, index);
2663 err = -ENOMEM;
2664 if (!page)
2665 goto out;
2666
2667 if (page_has_buffers(page)) {
2668has_buffers:
2669 unlock_page(page);
2670 page_cache_release(page);
2671 return block_truncate_page(mapping, from, get_block);
2672 }
2673
2674
2675 pos = blocksize;
2676 while (offset >= pos) {
2677 iblock++;
2678 pos += blocksize;
2679 }
2680
2681 map_bh.b_size = blocksize;
2682 map_bh.b_state = 0;
2683 err = get_block(inode, iblock, &map_bh, 0);
2684 if (err)
2685 goto unlock;
2686
2687 if (!buffer_mapped(&map_bh))
2688 goto unlock;
2689
2690
2691 if (!PageUptodate(page)) {
2692 err = mapping->a_ops->readpage(NULL, page);
2693 if (err) {
2694 page_cache_release(page);
2695 goto out;
2696 }
2697 lock_page(page);
2698 if (!PageUptodate(page)) {
2699 err = -EIO;
2700 goto unlock;
2701 }
2702 if (page_has_buffers(page))
2703 goto has_buffers;
2704 }
2705 zero_user(page, offset, length);
2706 set_page_dirty(page);
2707 err = 0;
2708
2709unlock:
2710 unlock_page(page);
2711 page_cache_release(page);
2712out:
2713 return err;
2714}
2715EXPORT_SYMBOL(nobh_truncate_page);
2716
2717int block_truncate_page(struct address_space *mapping,
2718 loff_t from, get_block_t *get_block)
2719{
2720 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2721 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2722 unsigned blocksize;
2723 sector_t iblock;
2724 unsigned length, pos;
2725 struct inode *inode = mapping->host;
2726 struct page *page;
2727 struct buffer_head *bh;
2728 int err;
2729
2730 blocksize = 1 << inode->i_blkbits;
2731 length = offset & (blocksize - 1);
2732
2733
2734 if (!length)
2735 return 0;
2736
2737 length = blocksize - length;
2738 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2739
2740 page = grab_cache_page(mapping, index);
2741 err = -ENOMEM;
2742 if (!page)
2743 goto out;
2744
2745 if (!page_has_buffers(page))
2746 create_empty_buffers(page, blocksize, 0);
2747
2748
2749 bh = page_buffers(page);
2750 pos = blocksize;
2751 while (offset >= pos) {
2752 bh = bh->b_this_page;
2753 iblock++;
2754 pos += blocksize;
2755 }
2756
2757 err = 0;
2758 if (!buffer_mapped(bh)) {
2759 WARN_ON(bh->b_size != blocksize);
2760 err = get_block(inode, iblock, bh, 0);
2761 if (err)
2762 goto unlock;
2763
2764 if (!buffer_mapped(bh))
2765 goto unlock;
2766 }
2767
2768
2769 if (PageUptodate(page))
2770 set_buffer_uptodate(bh);
2771
2772 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2773 err = -EIO;
2774 ll_rw_block(READ, 1, &bh);
2775 wait_on_buffer(bh);
2776
2777 if (!buffer_uptodate(bh))
2778 goto unlock;
2779 }
2780
2781 zero_user(page, offset, length);
2782 mark_buffer_dirty(bh);
2783 err = 0;
2784
2785unlock:
2786 unlock_page(page);
2787 page_cache_release(page);
2788out:
2789 return err;
2790}
2791EXPORT_SYMBOL(block_truncate_page);
2792
2793
2794
2795
2796
2797int block_write_full_page_endio(struct page *page, get_block_t *get_block,
2798 struct writeback_control *wbc, bh_end_io_t *handler)
2799{
2800 struct inode * const inode = page->mapping->host;
2801 loff_t i_size = i_size_read(inode);
2802 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2803 unsigned offset;
2804
2805
2806 if (page->index < end_index)
2807 return __block_write_full_page(inode, page, get_block, wbc,
2808 handler);
2809
2810
2811 offset = i_size & (PAGE_CACHE_SIZE-1);
2812 if (page->index >= end_index+1 || !offset) {
2813
2814
2815
2816
2817
2818 do_invalidatepage(page, 0);
2819 unlock_page(page);
2820 return 0;
2821 }
2822
2823
2824
2825
2826
2827
2828
2829
2830 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2831 return __block_write_full_page(inode, page, get_block, wbc, handler);
2832}
2833EXPORT_SYMBOL(block_write_full_page_endio);
2834
2835
2836
2837
2838int block_write_full_page(struct page *page, get_block_t *get_block,
2839 struct writeback_control *wbc)
2840{
2841 return block_write_full_page_endio(page, get_block, wbc,
2842 end_buffer_async_write);
2843}
2844EXPORT_SYMBOL(block_write_full_page);
2845
2846sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2847 get_block_t *get_block)
2848{
2849 struct buffer_head tmp;
2850 struct inode *inode = mapping->host;
2851 tmp.b_state = 0;
2852 tmp.b_blocknr = 0;
2853 tmp.b_size = 1 << inode->i_blkbits;
2854 get_block(inode, block, &tmp, 0);
2855 return tmp.b_blocknr;
2856}
2857EXPORT_SYMBOL(generic_block_bmap);
2858
2859static void end_bio_bh_io_sync(struct bio *bio, int err)
2860{
2861 struct buffer_head *bh = bio->bi_private;
2862
2863 if (err == -EOPNOTSUPP) {
2864 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2865 }
2866
2867 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2868 set_bit(BH_Quiet, &bh->b_state);
2869
2870 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2871 bio_put(bio);
2872}
2873
2874int submit_bh(int rw, struct buffer_head * bh)
2875{
2876 struct bio *bio;
2877 int ret = 0;
2878
2879 BUG_ON(!buffer_locked(bh));
2880 BUG_ON(!buffer_mapped(bh));
2881 BUG_ON(!bh->b_end_io);
2882 BUG_ON(buffer_delay(bh));
2883 BUG_ON(buffer_unwritten(bh));
2884
2885
2886
2887
2888 if (test_set_buffer_req(bh) && (rw & WRITE))
2889 clear_buffer_write_io_error(bh);
2890
2891
2892
2893
2894
2895 bio = bio_alloc(GFP_NOIO, 1);
2896
2897 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
2898 bio->bi_bdev = bh->b_bdev;
2899 bio->bi_io_vec[0].bv_page = bh->b_page;
2900 bio->bi_io_vec[0].bv_len = bh->b_size;
2901 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
2902
2903 bio->bi_vcnt = 1;
2904 bio->bi_idx = 0;
2905 bio->bi_size = bh->b_size;
2906
2907 bio->bi_end_io = end_bio_bh_io_sync;
2908 bio->bi_private = bh;
2909
2910 bio_get(bio);
2911 submit_bio(rw, bio);
2912
2913 if (bio_flagged(bio, BIO_EOPNOTSUPP))
2914 ret = -EOPNOTSUPP;
2915
2916 bio_put(bio);
2917 return ret;
2918}
2919EXPORT_SYMBOL(submit_bh);
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2947{
2948 int i;
2949
2950 for (i = 0; i < nr; i++) {
2951 struct buffer_head *bh = bhs[i];
2952
2953 if (!trylock_buffer(bh))
2954 continue;
2955 if (rw == WRITE) {
2956 if (test_clear_buffer_dirty(bh)) {
2957 bh->b_end_io = end_buffer_write_sync;
2958 get_bh(bh);
2959 submit_bh(WRITE, bh);
2960 continue;
2961 }
2962 } else {
2963 if (!buffer_uptodate(bh)) {
2964 bh->b_end_io = end_buffer_read_sync;
2965 get_bh(bh);
2966 submit_bh(rw, bh);
2967 continue;
2968 }
2969 }
2970 unlock_buffer(bh);
2971 }
2972}
2973EXPORT_SYMBOL(ll_rw_block);
2974
2975void write_dirty_buffer(struct buffer_head *bh, int rw)
2976{
2977 lock_buffer(bh);
2978 if (!test_clear_buffer_dirty(bh)) {
2979 unlock_buffer(bh);
2980 return;
2981 }
2982 bh->b_end_io = end_buffer_write_sync;
2983 get_bh(bh);
2984 submit_bh(rw, bh);
2985}
2986EXPORT_SYMBOL(write_dirty_buffer);
2987
2988
2989
2990
2991
2992
2993int __sync_dirty_buffer(struct buffer_head *bh, int rw)
2994{
2995 int ret = 0;
2996
2997 WARN_ON(atomic_read(&bh->b_count) < 1);
2998 lock_buffer(bh);
2999 if (test_clear_buffer_dirty(bh)) {
3000 get_bh(bh);
3001 bh->b_end_io = end_buffer_write_sync;
3002 ret = submit_bh(rw, bh);
3003 wait_on_buffer(bh);
3004 if (!ret && !buffer_uptodate(bh))
3005 ret = -EIO;
3006 } else {
3007 unlock_buffer(bh);
3008 }
3009 return ret;
3010}
3011EXPORT_SYMBOL(__sync_dirty_buffer);
3012
3013int sync_dirty_buffer(struct buffer_head *bh)
3014{
3015 return __sync_dirty_buffer(bh, WRITE_SYNC);
3016}
3017EXPORT_SYMBOL(sync_dirty_buffer);
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039static inline int buffer_busy(struct buffer_head *bh)
3040{
3041 return atomic_read(&bh->b_count) |
3042 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3043}
3044
3045static int
3046drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3047{
3048 struct buffer_head *head = page_buffers(page);
3049 struct buffer_head *bh;
3050
3051 bh = head;
3052 do {
3053 if (buffer_write_io_error(bh) && page->mapping)
3054 set_bit(AS_EIO, &page->mapping->flags);
3055 if (buffer_busy(bh))
3056 goto failed;
3057 bh = bh->b_this_page;
3058 } while (bh != head);
3059
3060 do {
3061 struct buffer_head *next = bh->b_this_page;
3062
3063 if (bh->b_assoc_map)
3064 __remove_assoc_queue(bh);
3065 bh = next;
3066 } while (bh != head);
3067 *buffers_to_free = head;
3068 __clear_page_buffers(page);
3069 return 1;
3070failed:
3071 return 0;
3072}
3073
3074int try_to_free_buffers(struct page *page)
3075{
3076 struct address_space * const mapping = page->mapping;
3077 struct buffer_head *buffers_to_free = NULL;
3078 int ret = 0;
3079
3080 BUG_ON(!PageLocked(page));
3081 if (PageWriteback(page))
3082 return 0;
3083
3084 if (mapping == NULL) {
3085 ret = drop_buffers(page, &buffers_to_free);
3086 goto out;
3087 }
3088
3089 spin_lock(&mapping->private_lock);
3090 ret = drop_buffers(page, &buffers_to_free);
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106 if (ret)
3107 cancel_dirty_page(page, PAGE_CACHE_SIZE);
3108 spin_unlock(&mapping->private_lock);
3109out:
3110 if (buffers_to_free) {
3111 struct buffer_head *bh = buffers_to_free;
3112
3113 do {
3114 struct buffer_head *next = bh->b_this_page;
3115 free_buffer_head(bh);
3116 bh = next;
3117 } while (bh != buffers_to_free);
3118 }
3119 return ret;
3120}
3121EXPORT_SYMBOL(try_to_free_buffers);
3122
3123
3124
3125
3126
3127
3128
3129
3130SYSCALL_DEFINE2(bdflush, int, func, long, data)
3131{
3132 static int msg_count;
3133
3134 if (!capable(CAP_SYS_ADMIN))
3135 return -EPERM;
3136
3137 if (msg_count < 5) {
3138 msg_count++;
3139 printk(KERN_INFO
3140 "warning: process `%s' used the obsolete bdflush"
3141 " system call\n", current->comm);
3142 printk(KERN_INFO "Fix your initscripts?\n");
3143 }
3144
3145 if (func == 1)
3146 do_exit(0);
3147 return 0;
3148}
3149
3150
3151
3152
3153static struct kmem_cache *bh_cachep __read_mostly;
3154
3155
3156
3157
3158
3159static int max_buffer_heads;
3160
3161int buffer_heads_over_limit;
3162
3163struct bh_accounting {
3164 int nr;
3165 int ratelimit;
3166};
3167
3168static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3169
3170static void recalc_bh_state(void)
3171{
3172 int i;
3173 int tot = 0;
3174
3175 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3176 return;
3177 __this_cpu_write(bh_accounting.ratelimit, 0);
3178 for_each_online_cpu(i)
3179 tot += per_cpu(bh_accounting, i).nr;
3180 buffer_heads_over_limit = (tot > max_buffer_heads);
3181}
3182
3183struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3184{
3185 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3186 if (ret) {
3187 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3188 preempt_disable();
3189 __this_cpu_inc(bh_accounting.nr);
3190 recalc_bh_state();
3191 preempt_enable();
3192 }
3193 return ret;
3194}
3195EXPORT_SYMBOL(alloc_buffer_head);
3196
3197void free_buffer_head(struct buffer_head *bh)
3198{
3199 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3200 kmem_cache_free(bh_cachep, bh);
3201 preempt_disable();
3202 __this_cpu_dec(bh_accounting.nr);
3203 recalc_bh_state();
3204 preempt_enable();
3205}
3206EXPORT_SYMBOL(free_buffer_head);
3207
3208static void buffer_exit_cpu(int cpu)
3209{
3210 int i;
3211 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3212
3213 for (i = 0; i < BH_LRU_SIZE; i++) {
3214 brelse(b->bhs[i]);
3215 b->bhs[i] = NULL;
3216 }
3217 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3218 per_cpu(bh_accounting, cpu).nr = 0;
3219}
3220
3221static int buffer_cpu_notify(struct notifier_block *self,
3222 unsigned long action, void *hcpu)
3223{
3224 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3225 buffer_exit_cpu((unsigned long)hcpu);
3226 return NOTIFY_OK;
3227}
3228
3229
3230
3231
3232
3233
3234
3235
3236int bh_uptodate_or_lock(struct buffer_head *bh)
3237{
3238 if (!buffer_uptodate(bh)) {
3239 lock_buffer(bh);
3240 if (!buffer_uptodate(bh))
3241 return 0;
3242 unlock_buffer(bh);
3243 }
3244 return 1;
3245}
3246EXPORT_SYMBOL(bh_uptodate_or_lock);
3247
3248
3249
3250
3251
3252
3253
3254int bh_submit_read(struct buffer_head *bh)
3255{
3256 BUG_ON(!buffer_locked(bh));
3257
3258 if (buffer_uptodate(bh)) {
3259 unlock_buffer(bh);
3260 return 0;
3261 }
3262
3263 get_bh(bh);
3264 bh->b_end_io = end_buffer_read_sync;
3265 submit_bh(READ, bh);
3266 wait_on_buffer(bh);
3267 if (buffer_uptodate(bh))
3268 return 0;
3269 return -EIO;
3270}
3271EXPORT_SYMBOL(bh_submit_read);
3272
3273void __init buffer_init(void)
3274{
3275 int nrpages;
3276
3277 bh_cachep = kmem_cache_create("buffer_head",
3278 sizeof(struct buffer_head), 0,
3279 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3280 SLAB_MEM_SPREAD),
3281 NULL);
3282
3283
3284
3285
3286 nrpages = (nr_free_buffer_pages() * 10) / 100;
3287 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3288 hotcpu_notifier(buffer_cpu_notify, 0);
3289}
3290