1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/export.h>
33#include <linux/writeback.h>
34#include <linux/hash.h>
35#include <linux/suspend.h>
36#include <linux/buffer_head.h>
37#include <linux/task_io_accounting_ops.h>
38#include <linux/bio.h>
39#include <linux/notifier.h>
40#include <linux/cpu.h>
41#include <linux/bitops.h>
42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h>
44
45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
46
47#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
48
49void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
50{
51 bh->b_end_io = handler;
52 bh->b_private = private;
53}
54EXPORT_SYMBOL(init_buffer);
55
56static int sleep_on_buffer(void *word)
57{
58 io_schedule();
59 return 0;
60}
61
62void __lock_buffer(struct buffer_head *bh)
63{
64 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
65 TASK_UNINTERRUPTIBLE);
66}
67EXPORT_SYMBOL(__lock_buffer);
68
69void unlock_buffer(struct buffer_head *bh)
70{
71 clear_bit_unlock(BH_Lock, &bh->b_state);
72 smp_mb__after_clear_bit();
73 wake_up_bit(&bh->b_state, BH_Lock);
74}
75EXPORT_SYMBOL(unlock_buffer);
76
77
78
79
80
81
82void __wait_on_buffer(struct buffer_head * bh)
83{
84 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
85}
86EXPORT_SYMBOL(__wait_on_buffer);
87
88static void
89__clear_page_buffers(struct page *page)
90{
91 ClearPagePrivate(page);
92 set_page_private(page, 0);
93 page_cache_release(page);
94}
95
96
97static int quiet_error(struct buffer_head *bh)
98{
99 if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
100 return 0;
101 return 1;
102}
103
104
105static void buffer_io_error(struct buffer_head *bh)
106{
107 char b[BDEVNAME_SIZE];
108 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
109 bdevname(bh->b_bdev, b),
110 (unsigned long long)bh->b_blocknr);
111}
112
113
114
115
116
117
118
119
120
121static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
122{
123 if (uptodate) {
124 set_buffer_uptodate(bh);
125 } else {
126
127 clear_buffer_uptodate(bh);
128 }
129 unlock_buffer(bh);
130}
131
132
133
134
135
136void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
137{
138 __end_buffer_read_notouch(bh, uptodate);
139 put_bh(bh);
140}
141EXPORT_SYMBOL(end_buffer_read_sync);
142
143void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
144{
145 char b[BDEVNAME_SIZE];
146
147 if (uptodate) {
148 set_buffer_uptodate(bh);
149 } else {
150 if (!quiet_error(bh)) {
151 buffer_io_error(bh);
152 printk(KERN_WARNING "lost page write due to "
153 "I/O error on %s\n",
154 bdevname(bh->b_bdev, b));
155 }
156 set_buffer_write_io_error(bh);
157 clear_buffer_uptodate(bh);
158 }
159 unlock_buffer(bh);
160 put_bh(bh);
161}
162EXPORT_SYMBOL(end_buffer_write_sync);
163
164
165
166
167
168
169
170
171
172
173
174
175static struct buffer_head *
176__find_get_block_slow(struct block_device *bdev, sector_t block)
177{
178 struct inode *bd_inode = bdev->bd_inode;
179 struct address_space *bd_mapping = bd_inode->i_mapping;
180 struct buffer_head *ret = NULL;
181 pgoff_t index;
182 struct buffer_head *bh;
183 struct buffer_head *head;
184 struct page *page;
185 int all_mapped = 1;
186
187 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
188 page = find_get_page(bd_mapping, index);
189 if (!page)
190 goto out;
191
192 spin_lock(&bd_mapping->private_lock);
193 if (!page_has_buffers(page))
194 goto out_unlock;
195 head = page_buffers(page);
196 bh = head;
197 do {
198 if (!buffer_mapped(bh))
199 all_mapped = 0;
200 else if (bh->b_blocknr == block) {
201 ret = bh;
202 get_bh(bh);
203 goto out_unlock;
204 }
205 bh = bh->b_this_page;
206 } while (bh != head);
207
208
209
210
211
212
213 if (all_mapped) {
214 char b[BDEVNAME_SIZE];
215
216 printk("__find_get_block_slow() failed. "
217 "block=%llu, b_blocknr=%llu\n",
218 (unsigned long long)block,
219 (unsigned long long)bh->b_blocknr);
220 printk("b_state=0x%08lx, b_size=%zu\n",
221 bh->b_state, bh->b_size);
222 printk("device %s blocksize: %d\n", bdevname(bdev, b),
223 1 << bd_inode->i_blkbits);
224 }
225out_unlock:
226 spin_unlock(&bd_mapping->private_lock);
227 page_cache_release(page);
228out:
229 return ret;
230}
231
232
233
234
235static void free_more_memory(void)
236{
237 struct zone *zone;
238 int nid;
239
240 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
241 yield();
242
243 for_each_online_node(nid) {
244 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
245 gfp_zone(GFP_NOFS), NULL,
246 &zone);
247 if (zone)
248 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
249 GFP_NOFS, NULL);
250 }
251}
252
253
254
255
256
257static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
258{
259 unsigned long flags;
260 struct buffer_head *first;
261 struct buffer_head *tmp;
262 struct page *page;
263 int page_uptodate = 1;
264
265 BUG_ON(!buffer_async_read(bh));
266
267 page = bh->b_page;
268 if (uptodate) {
269 set_buffer_uptodate(bh);
270 } else {
271 clear_buffer_uptodate(bh);
272 if (!quiet_error(bh))
273 buffer_io_error(bh);
274 SetPageError(page);
275 }
276
277
278
279
280
281
282 first = page_buffers(page);
283 local_irq_save(flags);
284 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
285 clear_buffer_async_read(bh);
286 unlock_buffer(bh);
287 tmp = bh;
288 do {
289 if (!buffer_uptodate(tmp))
290 page_uptodate = 0;
291 if (buffer_async_read(tmp)) {
292 BUG_ON(!buffer_locked(tmp));
293 goto still_busy;
294 }
295 tmp = tmp->b_this_page;
296 } while (tmp != bh);
297 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
298 local_irq_restore(flags);
299
300
301
302
303
304 if (page_uptodate && !PageError(page))
305 SetPageUptodate(page);
306 unlock_page(page);
307 return;
308
309still_busy:
310 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
311 local_irq_restore(flags);
312 return;
313}
314
315
316
317
318
319void end_buffer_async_write(struct buffer_head *bh, int uptodate)
320{
321 char b[BDEVNAME_SIZE];
322 unsigned long flags;
323 struct buffer_head *first;
324 struct buffer_head *tmp;
325 struct page *page;
326
327 BUG_ON(!buffer_async_write(bh));
328
329 page = bh->b_page;
330 if (uptodate) {
331 set_buffer_uptodate(bh);
332 } else {
333 if (!quiet_error(bh)) {
334 buffer_io_error(bh);
335 printk(KERN_WARNING "lost page write due to "
336 "I/O error on %s\n",
337 bdevname(bh->b_bdev, b));
338 }
339 set_bit(AS_EIO, &page->mapping->flags);
340 set_buffer_write_io_error(bh);
341 clear_buffer_uptodate(bh);
342 SetPageError(page);
343 }
344
345 first = page_buffers(page);
346 local_irq_save(flags);
347 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
348
349 clear_buffer_async_write(bh);
350 unlock_buffer(bh);
351 tmp = bh->b_this_page;
352 while (tmp != bh) {
353 if (buffer_async_write(tmp)) {
354 BUG_ON(!buffer_locked(tmp));
355 goto still_busy;
356 }
357 tmp = tmp->b_this_page;
358 }
359 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
360 local_irq_restore(flags);
361 end_page_writeback(page);
362 return;
363
364still_busy:
365 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
366 local_irq_restore(flags);
367 return;
368}
369EXPORT_SYMBOL(end_buffer_async_write);
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392static void mark_buffer_async_read(struct buffer_head *bh)
393{
394 bh->b_end_io = end_buffer_async_read;
395 set_buffer_async_read(bh);
396}
397
398static void mark_buffer_async_write_endio(struct buffer_head *bh,
399 bh_end_io_t *handler)
400{
401 bh->b_end_io = handler;
402 set_buffer_async_write(bh);
403}
404
405void mark_buffer_async_write(struct buffer_head *bh)
406{
407 mark_buffer_async_write_endio(bh, end_buffer_async_write);
408}
409EXPORT_SYMBOL(mark_buffer_async_write);
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464static void __remove_assoc_queue(struct buffer_head *bh)
465{
466 list_del_init(&bh->b_assoc_buffers);
467 WARN_ON(!bh->b_assoc_map);
468 if (buffer_write_io_error(bh))
469 set_bit(AS_EIO, &bh->b_assoc_map->flags);
470 bh->b_assoc_map = NULL;
471}
472
473int inode_has_buffers(struct inode *inode)
474{
475 return !list_empty(&inode->i_data.private_list);
476}
477
478
479
480
481
482
483
484
485
486
487
488static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
489{
490 struct buffer_head *bh;
491 struct list_head *p;
492 int err = 0;
493
494 spin_lock(lock);
495repeat:
496 list_for_each_prev(p, list) {
497 bh = BH_ENTRY(p);
498 if (buffer_locked(bh)) {
499 get_bh(bh);
500 spin_unlock(lock);
501 wait_on_buffer(bh);
502 if (!buffer_uptodate(bh))
503 err = -EIO;
504 brelse(bh);
505 spin_lock(lock);
506 goto repeat;
507 }
508 }
509 spin_unlock(lock);
510 return err;
511}
512
513static void do_thaw_one(struct super_block *sb, void *unused)
514{
515 char b[BDEVNAME_SIZE];
516 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
517 printk(KERN_WARNING "Emergency Thaw on %s\n",
518 bdevname(sb->s_bdev, b));
519}
520
521static void do_thaw_all(struct work_struct *work)
522{
523 iterate_supers(do_thaw_one, NULL);
524 kfree(work);
525 printk(KERN_WARNING "Emergency Thaw complete\n");
526}
527
528
529
530
531
532
533void emergency_thaw_all(void)
534{
535 struct work_struct *work;
536
537 work = kmalloc(sizeof(*work), GFP_ATOMIC);
538 if (work) {
539 INIT_WORK(work, do_thaw_all);
540 schedule_work(work);
541 }
542}
543
544
545
546
547
548
549
550
551
552
553
554
555int sync_mapping_buffers(struct address_space *mapping)
556{
557 struct address_space *buffer_mapping = mapping->private_data;
558
559 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
560 return 0;
561
562 return fsync_buffers_list(&buffer_mapping->private_lock,
563 &mapping->private_list);
564}
565EXPORT_SYMBOL(sync_mapping_buffers);
566
567
568
569
570
571
572
573void write_boundary_block(struct block_device *bdev,
574 sector_t bblock, unsigned blocksize)
575{
576 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
577 if (bh) {
578 if (buffer_dirty(bh))
579 ll_rw_block(WRITE, 1, &bh);
580 put_bh(bh);
581 }
582}
583
584void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
585{
586 struct address_space *mapping = inode->i_mapping;
587 struct address_space *buffer_mapping = bh->b_page->mapping;
588
589 mark_buffer_dirty(bh);
590 if (!mapping->private_data) {
591 mapping->private_data = buffer_mapping;
592 } else {
593 BUG_ON(mapping->private_data != buffer_mapping);
594 }
595 if (!bh->b_assoc_map) {
596 spin_lock(&buffer_mapping->private_lock);
597 list_move_tail(&bh->b_assoc_buffers,
598 &mapping->private_list);
599 bh->b_assoc_map = mapping;
600 spin_unlock(&buffer_mapping->private_lock);
601 }
602}
603EXPORT_SYMBOL(mark_buffer_dirty_inode);
604
605
606
607
608
609
610
611
612static void __set_page_dirty(struct page *page,
613 struct address_space *mapping, int warn)
614{
615 spin_lock_irq(&mapping->tree_lock);
616 if (page->mapping) {
617 WARN_ON_ONCE(warn && !PageUptodate(page));
618 account_page_dirtied(page, mapping);
619 radix_tree_tag_set(&mapping->page_tree,
620 page_index(page), PAGECACHE_TAG_DIRTY);
621 }
622 spin_unlock_irq(&mapping->tree_lock);
623 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
624}
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651int __set_page_dirty_buffers(struct page *page)
652{
653 int newly_dirty;
654 struct address_space *mapping = page_mapping(page);
655
656 if (unlikely(!mapping))
657 return !TestSetPageDirty(page);
658
659 spin_lock(&mapping->private_lock);
660 if (page_has_buffers(page)) {
661 struct buffer_head *head = page_buffers(page);
662 struct buffer_head *bh = head;
663
664 do {
665 set_buffer_dirty(bh);
666 bh = bh->b_this_page;
667 } while (bh != head);
668 }
669 newly_dirty = !TestSetPageDirty(page);
670 spin_unlock(&mapping->private_lock);
671
672 if (newly_dirty)
673 __set_page_dirty(page, mapping, 1);
674 return newly_dirty;
675}
676EXPORT_SYMBOL(__set_page_dirty_buffers);
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
698{
699 struct buffer_head *bh;
700 struct list_head tmp;
701 struct address_space *mapping;
702 int err = 0, err2;
703 struct blk_plug plug;
704
705 INIT_LIST_HEAD(&tmp);
706 blk_start_plug(&plug);
707
708 spin_lock(lock);
709 while (!list_empty(list)) {
710 bh = BH_ENTRY(list->next);
711 mapping = bh->b_assoc_map;
712 __remove_assoc_queue(bh);
713
714
715 smp_mb();
716 if (buffer_dirty(bh) || buffer_locked(bh)) {
717 list_add(&bh->b_assoc_buffers, &tmp);
718 bh->b_assoc_map = mapping;
719 if (buffer_dirty(bh)) {
720 get_bh(bh);
721 spin_unlock(lock);
722
723
724
725
726
727
728
729 write_dirty_buffer(bh, WRITE_SYNC);
730
731
732
733
734
735
736
737 brelse(bh);
738 spin_lock(lock);
739 }
740 }
741 }
742
743 spin_unlock(lock);
744 blk_finish_plug(&plug);
745 spin_lock(lock);
746
747 while (!list_empty(&tmp)) {
748 bh = BH_ENTRY(tmp.prev);
749 get_bh(bh);
750 mapping = bh->b_assoc_map;
751 __remove_assoc_queue(bh);
752
753
754 smp_mb();
755 if (buffer_dirty(bh)) {
756 list_add(&bh->b_assoc_buffers,
757 &mapping->private_list);
758 bh->b_assoc_map = mapping;
759 }
760 spin_unlock(lock);
761 wait_on_buffer(bh);
762 if (!buffer_uptodate(bh))
763 err = -EIO;
764 brelse(bh);
765 spin_lock(lock);
766 }
767
768 spin_unlock(lock);
769 err2 = osync_buffers_list(lock, list);
770 if (err)
771 return err;
772 else
773 return err2;
774}
775
776
777
778
779
780
781
782
783
784
785void invalidate_inode_buffers(struct inode *inode)
786{
787 if (inode_has_buffers(inode)) {
788 struct address_space *mapping = &inode->i_data;
789 struct list_head *list = &mapping->private_list;
790 struct address_space *buffer_mapping = mapping->private_data;
791
792 spin_lock(&buffer_mapping->private_lock);
793 while (!list_empty(list))
794 __remove_assoc_queue(BH_ENTRY(list->next));
795 spin_unlock(&buffer_mapping->private_lock);
796 }
797}
798EXPORT_SYMBOL(invalidate_inode_buffers);
799
800
801
802
803
804
805
806int remove_inode_buffers(struct inode *inode)
807{
808 int ret = 1;
809
810 if (inode_has_buffers(inode)) {
811 struct address_space *mapping = &inode->i_data;
812 struct list_head *list = &mapping->private_list;
813 struct address_space *buffer_mapping = mapping->private_data;
814
815 spin_lock(&buffer_mapping->private_lock);
816 while (!list_empty(list)) {
817 struct buffer_head *bh = BH_ENTRY(list->next);
818 if (buffer_dirty(bh)) {
819 ret = 0;
820 break;
821 }
822 __remove_assoc_queue(bh);
823 }
824 spin_unlock(&buffer_mapping->private_lock);
825 }
826 return ret;
827}
828
829
830
831
832
833
834
835
836
837
838struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
839 int retry)
840{
841 struct buffer_head *bh, *head;
842 long offset;
843
844try_again:
845 head = NULL;
846 offset = PAGE_SIZE;
847 while ((offset -= size) >= 0) {
848 bh = alloc_buffer_head(GFP_NOFS);
849 if (!bh)
850 goto no_grow;
851
852 bh->b_this_page = head;
853 bh->b_blocknr = -1;
854 head = bh;
855
856 bh->b_size = size;
857
858
859 set_bh_page(bh, page, offset);
860
861 init_buffer(bh, NULL, NULL);
862 }
863 return head;
864
865
866
867no_grow:
868 if (head) {
869 do {
870 bh = head;
871 head = head->b_this_page;
872 free_buffer_head(bh);
873 } while (head);
874 }
875
876
877
878
879
880
881
882 if (!retry)
883 return NULL;
884
885
886
887
888
889
890
891 free_more_memory();
892 goto try_again;
893}
894EXPORT_SYMBOL_GPL(alloc_page_buffers);
895
896static inline void
897link_dev_buffers(struct page *page, struct buffer_head *head)
898{
899 struct buffer_head *bh, *tail;
900
901 bh = head;
902 do {
903 tail = bh;
904 bh = bh->b_this_page;
905 } while (bh);
906 tail->b_this_page = head;
907 attach_page_buffers(page, head);
908}
909
910static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
911{
912 sector_t retval = ~((sector_t)0);
913 loff_t sz = i_size_read(bdev->bd_inode);
914
915 if (sz) {
916 unsigned int sizebits = blksize_bits(size);
917 retval = (sz >> sizebits);
918 }
919 return retval;
920}
921
922
923
924
925static sector_t
926init_page_buffers(struct page *page, struct block_device *bdev,
927 sector_t block, int size)
928{
929 struct buffer_head *head = page_buffers(page);
930 struct buffer_head *bh = head;
931 int uptodate = PageUptodate(page);
932 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
933
934 do {
935 if (!buffer_mapped(bh)) {
936 init_buffer(bh, NULL, NULL);
937 bh->b_bdev = bdev;
938 bh->b_blocknr = block;
939 if (uptodate)
940 set_buffer_uptodate(bh);
941 if (block < end_block)
942 set_buffer_mapped(bh);
943 }
944 block++;
945 bh = bh->b_this_page;
946 } while (bh != head);
947
948
949
950
951 return end_block;
952}
953
954
955
956
957
958
959static int
960grow_dev_page(struct block_device *bdev, sector_t block,
961 pgoff_t index, int size, int sizebits)
962{
963 struct inode *inode = bdev->bd_inode;
964 struct page *page;
965 struct buffer_head *bh;
966 sector_t end_block;
967 int ret = 0;
968
969 page = find_or_create_page(inode->i_mapping, index,
970 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
971 if (!page)
972 return ret;
973
974 BUG_ON(!PageLocked(page));
975
976 if (page_has_buffers(page)) {
977 bh = page_buffers(page);
978 if (bh->b_size == size) {
979 end_block = init_page_buffers(page, bdev,
980 index << sizebits, size);
981 goto done;
982 }
983 if (!try_to_free_buffers(page))
984 goto failed;
985 }
986
987
988
989
990 bh = alloc_page_buffers(page, size, 0);
991 if (!bh)
992 goto failed;
993
994
995
996
997
998
999 spin_lock(&inode->i_mapping->private_lock);
1000 link_dev_buffers(page, bh);
1001 end_block = init_page_buffers(page, bdev, index << sizebits, size);
1002 spin_unlock(&inode->i_mapping->private_lock);
1003done:
1004 ret = (block < end_block) ? 1 : -ENXIO;
1005failed:
1006 unlock_page(page);
1007 page_cache_release(page);
1008 return ret;
1009}
1010
1011
1012
1013
1014
1015static int
1016grow_buffers(struct block_device *bdev, sector_t block, int size)
1017{
1018 pgoff_t index;
1019 int sizebits;
1020
1021 sizebits = -1;
1022 do {
1023 sizebits++;
1024 } while ((size << sizebits) < PAGE_SIZE);
1025
1026 index = block >> sizebits;
1027
1028
1029
1030
1031
1032 if (unlikely(index != block >> sizebits)) {
1033 char b[BDEVNAME_SIZE];
1034
1035 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1036 "device %s\n",
1037 __func__, (unsigned long long)block,
1038 bdevname(bdev, b));
1039 return -EIO;
1040 }
1041
1042
1043 return grow_dev_page(bdev, block, index, size, sizebits);
1044}
1045
1046static struct buffer_head *
1047__getblk_slow(struct block_device *bdev, sector_t block, int size)
1048{
1049
1050 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1051 (size < 512 || size > PAGE_SIZE))) {
1052 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1053 size);
1054 printk(KERN_ERR "logical block size: %d\n",
1055 bdev_logical_block_size(bdev));
1056
1057 dump_stack();
1058 return NULL;
1059 }
1060
1061 for (;;) {
1062 struct buffer_head *bh;
1063 int ret;
1064
1065 bh = __find_get_block(bdev, block, size);
1066 if (bh)
1067 return bh;
1068
1069 ret = grow_buffers(bdev, block, size);
1070 if (ret < 0)
1071 return NULL;
1072 if (ret == 0)
1073 free_more_memory();
1074 }
1075}
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112void mark_buffer_dirty(struct buffer_head *bh)
1113{
1114 WARN_ON_ONCE(!buffer_uptodate(bh));
1115
1116
1117
1118
1119
1120
1121
1122 if (buffer_dirty(bh)) {
1123 smp_mb();
1124 if (buffer_dirty(bh))
1125 return;
1126 }
1127
1128 if (!test_set_buffer_dirty(bh)) {
1129 struct page *page = bh->b_page;
1130 if (!TestSetPageDirty(page)) {
1131 struct address_space *mapping = page_mapping(page);
1132 if (mapping)
1133 __set_page_dirty(page, mapping, 0);
1134 }
1135 }
1136}
1137EXPORT_SYMBOL(mark_buffer_dirty);
1138
1139
1140
1141
1142
1143
1144
1145
1146void __brelse(struct buffer_head * buf)
1147{
1148 if (atomic_read(&buf->b_count)) {
1149 put_bh(buf);
1150 return;
1151 }
1152 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1153}
1154EXPORT_SYMBOL(__brelse);
1155
1156
1157
1158
1159
1160void __bforget(struct buffer_head *bh)
1161{
1162 clear_buffer_dirty(bh);
1163 if (bh->b_assoc_map) {
1164 struct address_space *buffer_mapping = bh->b_page->mapping;
1165
1166 spin_lock(&buffer_mapping->private_lock);
1167 list_del_init(&bh->b_assoc_buffers);
1168 bh->b_assoc_map = NULL;
1169 spin_unlock(&buffer_mapping->private_lock);
1170 }
1171 __brelse(bh);
1172}
1173EXPORT_SYMBOL(__bforget);
1174
1175static struct buffer_head *__bread_slow(struct buffer_head *bh)
1176{
1177 lock_buffer(bh);
1178 if (buffer_uptodate(bh)) {
1179 unlock_buffer(bh);
1180 return bh;
1181 } else {
1182 get_bh(bh);
1183 bh->b_end_io = end_buffer_read_sync;
1184 submit_bh(READ, bh);
1185 wait_on_buffer(bh);
1186 if (buffer_uptodate(bh))
1187 return bh;
1188 }
1189 brelse(bh);
1190 return NULL;
1191}
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207#define BH_LRU_SIZE 8
1208
1209struct bh_lru {
1210 struct buffer_head *bhs[BH_LRU_SIZE];
1211};
1212
1213static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1214
1215#ifdef CONFIG_SMP
1216#define bh_lru_lock() local_irq_disable()
1217#define bh_lru_unlock() local_irq_enable()
1218#else
1219#define bh_lru_lock() preempt_disable()
1220#define bh_lru_unlock() preempt_enable()
1221#endif
1222
1223static inline void check_irqs_on(void)
1224{
1225#ifdef irqs_disabled
1226 BUG_ON(irqs_disabled());
1227#endif
1228}
1229
1230
1231
1232
1233static void bh_lru_install(struct buffer_head *bh)
1234{
1235 struct buffer_head *evictee = NULL;
1236
1237 check_irqs_on();
1238 bh_lru_lock();
1239 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1240 struct buffer_head *bhs[BH_LRU_SIZE];
1241 int in;
1242 int out = 0;
1243
1244 get_bh(bh);
1245 bhs[out++] = bh;
1246 for (in = 0; in < BH_LRU_SIZE; in++) {
1247 struct buffer_head *bh2 =
1248 __this_cpu_read(bh_lrus.bhs[in]);
1249
1250 if (bh2 == bh) {
1251 __brelse(bh2);
1252 } else {
1253 if (out >= BH_LRU_SIZE) {
1254 BUG_ON(evictee != NULL);
1255 evictee = bh2;
1256 } else {
1257 bhs[out++] = bh2;
1258 }
1259 }
1260 }
1261 while (out < BH_LRU_SIZE)
1262 bhs[out++] = NULL;
1263 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1264 }
1265 bh_lru_unlock();
1266
1267 if (evictee)
1268 __brelse(evictee);
1269}
1270
1271
1272
1273
1274static struct buffer_head *
1275lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1276{
1277 struct buffer_head *ret = NULL;
1278 unsigned int i;
1279
1280 check_irqs_on();
1281 bh_lru_lock();
1282 for (i = 0; i < BH_LRU_SIZE; i++) {
1283 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1284
1285 if (bh && bh->b_bdev == bdev &&
1286 bh->b_blocknr == block && bh->b_size == size) {
1287 if (i) {
1288 while (i) {
1289 __this_cpu_write(bh_lrus.bhs[i],
1290 __this_cpu_read(bh_lrus.bhs[i - 1]));
1291 i--;
1292 }
1293 __this_cpu_write(bh_lrus.bhs[0], bh);
1294 }
1295 get_bh(bh);
1296 ret = bh;
1297 break;
1298 }
1299 }
1300 bh_lru_unlock();
1301 return ret;
1302}
1303
1304
1305
1306
1307
1308
1309struct buffer_head *
1310__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1311{
1312 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1313
1314 if (bh == NULL) {
1315 bh = __find_get_block_slow(bdev, block);
1316 if (bh)
1317 bh_lru_install(bh);
1318 }
1319 if (bh)
1320 touch_buffer(bh);
1321 return bh;
1322}
1323EXPORT_SYMBOL(__find_get_block);
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333struct buffer_head *
1334__getblk(struct block_device *bdev, sector_t block, unsigned size)
1335{
1336 struct buffer_head *bh = __find_get_block(bdev, block, size);
1337
1338 might_sleep();
1339 if (bh == NULL)
1340 bh = __getblk_slow(bdev, block, size);
1341 return bh;
1342}
1343EXPORT_SYMBOL(__getblk);
1344
1345
1346
1347
1348void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1349{
1350 struct buffer_head *bh = __getblk(bdev, block, size);
1351 if (likely(bh)) {
1352 ll_rw_block(READA, 1, &bh);
1353 brelse(bh);
1354 }
1355}
1356EXPORT_SYMBOL(__breadahead);
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367struct buffer_head *
1368__bread(struct block_device *bdev, sector_t block, unsigned size)
1369{
1370 struct buffer_head *bh = __getblk(bdev, block, size);
1371
1372 if (likely(bh) && !buffer_uptodate(bh))
1373 bh = __bread_slow(bh);
1374 return bh;
1375}
1376EXPORT_SYMBOL(__bread);
1377
1378
1379
1380
1381
1382
1383static void invalidate_bh_lru(void *arg)
1384{
1385 struct bh_lru *b = &get_cpu_var(bh_lrus);
1386 int i;
1387
1388 for (i = 0; i < BH_LRU_SIZE; i++) {
1389 brelse(b->bhs[i]);
1390 b->bhs[i] = NULL;
1391 }
1392 put_cpu_var(bh_lrus);
1393}
1394
1395static bool has_bh_in_lru(int cpu, void *dummy)
1396{
1397 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1398 int i;
1399
1400 for (i = 0; i < BH_LRU_SIZE; i++) {
1401 if (b->bhs[i])
1402 return 1;
1403 }
1404
1405 return 0;
1406}
1407
1408void invalidate_bh_lrus(void)
1409{
1410 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1411}
1412EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1413
1414void set_bh_page(struct buffer_head *bh,
1415 struct page *page, unsigned long offset)
1416{
1417 bh->b_page = page;
1418 BUG_ON(offset >= PAGE_SIZE);
1419 if (PageHighMem(page))
1420
1421
1422
1423 bh->b_data = (char *)(0 + offset);
1424 else
1425 bh->b_data = page_address(page) + offset;
1426}
1427EXPORT_SYMBOL(set_bh_page);
1428
1429
1430
1431
1432static void discard_buffer(struct buffer_head * bh)
1433{
1434 lock_buffer(bh);
1435 clear_buffer_dirty(bh);
1436 bh->b_bdev = NULL;
1437 clear_buffer_mapped(bh);
1438 clear_buffer_req(bh);
1439 clear_buffer_new(bh);
1440 clear_buffer_delay(bh);
1441 clear_buffer_unwritten(bh);
1442 unlock_buffer(bh);
1443}
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460void block_invalidatepage(struct page *page, unsigned long offset)
1461{
1462 struct buffer_head *head, *bh, *next;
1463 unsigned int curr_off = 0;
1464
1465 BUG_ON(!PageLocked(page));
1466 if (!page_has_buffers(page))
1467 goto out;
1468
1469 head = page_buffers(page);
1470 bh = head;
1471 do {
1472 unsigned int next_off = curr_off + bh->b_size;
1473 next = bh->b_this_page;
1474
1475
1476
1477
1478 if (offset <= curr_off)
1479 discard_buffer(bh);
1480 curr_off = next_off;
1481 bh = next;
1482 } while (bh != head);
1483
1484
1485
1486
1487
1488
1489 if (offset == 0)
1490 try_to_release_page(page, 0);
1491out:
1492 return;
1493}
1494EXPORT_SYMBOL(block_invalidatepage);
1495
1496
1497
1498
1499
1500
1501void create_empty_buffers(struct page *page,
1502 unsigned long blocksize, unsigned long b_state)
1503{
1504 struct buffer_head *bh, *head, *tail;
1505
1506 head = alloc_page_buffers(page, blocksize, 1);
1507 bh = head;
1508 do {
1509 bh->b_state |= b_state;
1510 tail = bh;
1511 bh = bh->b_this_page;
1512 } while (bh);
1513 tail->b_this_page = head;
1514
1515 spin_lock(&page->mapping->private_lock);
1516 if (PageUptodate(page) || PageDirty(page)) {
1517 bh = head;
1518 do {
1519 if (PageDirty(page))
1520 set_buffer_dirty(bh);
1521 if (PageUptodate(page))
1522 set_buffer_uptodate(bh);
1523 bh = bh->b_this_page;
1524 } while (bh != head);
1525 }
1526 attach_page_buffers(page, head);
1527 spin_unlock(&page->mapping->private_lock);
1528}
1529EXPORT_SYMBOL(create_empty_buffers);
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1548{
1549 struct buffer_head *old_bh;
1550
1551 might_sleep();
1552
1553 old_bh = __find_get_block_slow(bdev, block);
1554 if (old_bh) {
1555 clear_buffer_dirty(old_bh);
1556 wait_on_buffer(old_bh);
1557 clear_buffer_req(old_bh);
1558 __brelse(old_bh);
1559 }
1560}
1561EXPORT_SYMBOL(unmap_underlying_metadata);
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571static inline int block_size_bits(unsigned int blocksize)
1572{
1573 return ilog2(blocksize);
1574}
1575
1576static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1577{
1578 BUG_ON(!PageLocked(page));
1579
1580 if (!page_has_buffers(page))
1581 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1582 return page_buffers(page);
1583}
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614static int __block_write_full_page(struct inode *inode, struct page *page,
1615 get_block_t *get_block, struct writeback_control *wbc,
1616 bh_end_io_t *handler)
1617{
1618 int err;
1619 sector_t block;
1620 sector_t last_block;
1621 struct buffer_head *bh, *head;
1622 unsigned int blocksize, bbits;
1623 int nr_underway = 0;
1624 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1625 WRITE_SYNC : WRITE);
1626
1627 head = create_page_buffers(page, inode,
1628 (1 << BH_Dirty)|(1 << BH_Uptodate));
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640 bh = head;
1641 blocksize = bh->b_size;
1642 bbits = block_size_bits(blocksize);
1643
1644 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1645 last_block = (i_size_read(inode) - 1) >> bbits;
1646
1647
1648
1649
1650
1651 do {
1652 if (block > last_block) {
1653
1654
1655
1656
1657
1658
1659
1660
1661 clear_buffer_dirty(bh);
1662 set_buffer_uptodate(bh);
1663 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1664 buffer_dirty(bh)) {
1665 WARN_ON(bh->b_size != blocksize);
1666 err = get_block(inode, block, bh, 1);
1667 if (err)
1668 goto recover;
1669 clear_buffer_delay(bh);
1670 if (buffer_new(bh)) {
1671
1672 clear_buffer_new(bh);
1673 unmap_underlying_metadata(bh->b_bdev,
1674 bh->b_blocknr);
1675 }
1676 }
1677 bh = bh->b_this_page;
1678 block++;
1679 } while (bh != head);
1680
1681 do {
1682 if (!buffer_mapped(bh))
1683 continue;
1684
1685
1686
1687
1688
1689
1690
1691 if (wbc->sync_mode != WB_SYNC_NONE) {
1692 lock_buffer(bh);
1693 } else if (!trylock_buffer(bh)) {
1694 redirty_page_for_writepage(wbc, page);
1695 continue;
1696 }
1697 if (test_clear_buffer_dirty(bh)) {
1698 mark_buffer_async_write_endio(bh, handler);
1699 } else {
1700 unlock_buffer(bh);
1701 }
1702 } while ((bh = bh->b_this_page) != head);
1703
1704
1705
1706
1707
1708 BUG_ON(PageWriteback(page));
1709 set_page_writeback(page);
1710
1711 do {
1712 struct buffer_head *next = bh->b_this_page;
1713 if (buffer_async_write(bh)) {
1714 submit_bh(write_op, bh);
1715 nr_underway++;
1716 }
1717 bh = next;
1718 } while (bh != head);
1719 unlock_page(page);
1720
1721 err = 0;
1722done:
1723 if (nr_underway == 0) {
1724
1725
1726
1727
1728
1729 end_page_writeback(page);
1730
1731
1732
1733
1734
1735 }
1736 return err;
1737
1738recover:
1739
1740
1741
1742
1743
1744
1745 bh = head;
1746
1747 do {
1748 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1749 !buffer_delay(bh)) {
1750 lock_buffer(bh);
1751 mark_buffer_async_write_endio(bh, handler);
1752 } else {
1753
1754
1755
1756
1757 clear_buffer_dirty(bh);
1758 }
1759 } while ((bh = bh->b_this_page) != head);
1760 SetPageError(page);
1761 BUG_ON(PageWriteback(page));
1762 mapping_set_error(page->mapping, err);
1763 set_page_writeback(page);
1764 do {
1765 struct buffer_head *next = bh->b_this_page;
1766 if (buffer_async_write(bh)) {
1767 clear_buffer_dirty(bh);
1768 submit_bh(write_op, bh);
1769 nr_underway++;
1770 }
1771 bh = next;
1772 } while (bh != head);
1773 unlock_page(page);
1774 goto done;
1775}
1776
1777
1778
1779
1780
1781
1782void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1783{
1784 unsigned int block_start, block_end;
1785 struct buffer_head *head, *bh;
1786
1787 BUG_ON(!PageLocked(page));
1788 if (!page_has_buffers(page))
1789 return;
1790
1791 bh = head = page_buffers(page);
1792 block_start = 0;
1793 do {
1794 block_end = block_start + bh->b_size;
1795
1796 if (buffer_new(bh)) {
1797 if (block_end > from && block_start < to) {
1798 if (!PageUptodate(page)) {
1799 unsigned start, size;
1800
1801 start = max(from, block_start);
1802 size = min(to, block_end) - start;
1803
1804 zero_user(page, start, size);
1805 set_buffer_uptodate(bh);
1806 }
1807
1808 clear_buffer_new(bh);
1809 mark_buffer_dirty(bh);
1810 }
1811 }
1812
1813 block_start = block_end;
1814 bh = bh->b_this_page;
1815 } while (bh != head);
1816}
1817EXPORT_SYMBOL(page_zero_new_buffers);
1818
1819int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1820 get_block_t *get_block)
1821{
1822 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1823 unsigned to = from + len;
1824 struct inode *inode = page->mapping->host;
1825 unsigned block_start, block_end;
1826 sector_t block;
1827 int err = 0;
1828 unsigned blocksize, bbits;
1829 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1830
1831 BUG_ON(!PageLocked(page));
1832 BUG_ON(from > PAGE_CACHE_SIZE);
1833 BUG_ON(to > PAGE_CACHE_SIZE);
1834 BUG_ON(from > to);
1835
1836 head = create_page_buffers(page, inode, 0);
1837 blocksize = head->b_size;
1838 bbits = block_size_bits(blocksize);
1839
1840 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1841
1842 for(bh = head, block_start = 0; bh != head || !block_start;
1843 block++, block_start=block_end, bh = bh->b_this_page) {
1844 block_end = block_start + blocksize;
1845 if (block_end <= from || block_start >= to) {
1846 if (PageUptodate(page)) {
1847 if (!buffer_uptodate(bh))
1848 set_buffer_uptodate(bh);
1849 }
1850 continue;
1851 }
1852 if (buffer_new(bh))
1853 clear_buffer_new(bh);
1854 if (!buffer_mapped(bh)) {
1855 WARN_ON(bh->b_size != blocksize);
1856 err = get_block(inode, block, bh, 1);
1857 if (err)
1858 break;
1859 if (buffer_new(bh)) {
1860 unmap_underlying_metadata(bh->b_bdev,
1861 bh->b_blocknr);
1862 if (PageUptodate(page)) {
1863 clear_buffer_new(bh);
1864 set_buffer_uptodate(bh);
1865 mark_buffer_dirty(bh);
1866 continue;
1867 }
1868 if (block_end > to || block_start < from)
1869 zero_user_segments(page,
1870 to, block_end,
1871 block_start, from);
1872 continue;
1873 }
1874 }
1875 if (PageUptodate(page)) {
1876 if (!buffer_uptodate(bh))
1877 set_buffer_uptodate(bh);
1878 continue;
1879 }
1880 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1881 !buffer_unwritten(bh) &&
1882 (block_start < from || block_end > to)) {
1883 ll_rw_block(READ, 1, &bh);
1884 *wait_bh++=bh;
1885 }
1886 }
1887
1888
1889
1890 while(wait_bh > wait) {
1891 wait_on_buffer(*--wait_bh);
1892 if (!buffer_uptodate(*wait_bh))
1893 err = -EIO;
1894 }
1895 if (unlikely(err))
1896 page_zero_new_buffers(page, from, to);
1897 return err;
1898}
1899EXPORT_SYMBOL(__block_write_begin);
1900
1901static int __block_commit_write(struct inode *inode, struct page *page,
1902 unsigned from, unsigned to)
1903{
1904 unsigned block_start, block_end;
1905 int partial = 0;
1906 unsigned blocksize;
1907 struct buffer_head *bh, *head;
1908
1909 bh = head = page_buffers(page);
1910 blocksize = bh->b_size;
1911
1912 block_start = 0;
1913 do {
1914 block_end = block_start + blocksize;
1915 if (block_end <= from || block_start >= to) {
1916 if (!buffer_uptodate(bh))
1917 partial = 1;
1918 } else {
1919 set_buffer_uptodate(bh);
1920 mark_buffer_dirty(bh);
1921 }
1922 clear_buffer_new(bh);
1923
1924 block_start = block_end;
1925 bh = bh->b_this_page;
1926 } while (bh != head);
1927
1928
1929
1930
1931
1932
1933
1934 if (!partial)
1935 SetPageUptodate(page);
1936 return 0;
1937}
1938
1939
1940
1941
1942
1943
1944
1945int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
1946 unsigned flags, struct page **pagep, get_block_t *get_block)
1947{
1948 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1949 struct page *page;
1950 int status;
1951
1952 page = grab_cache_page_write_begin(mapping, index, flags);
1953 if (!page)
1954 return -ENOMEM;
1955
1956 status = __block_write_begin(page, pos, len, get_block);
1957 if (unlikely(status)) {
1958 unlock_page(page);
1959 page_cache_release(page);
1960 page = NULL;
1961 }
1962
1963 *pagep = page;
1964 return status;
1965}
1966EXPORT_SYMBOL(block_write_begin);
1967
1968int block_write_end(struct file *file, struct address_space *mapping,
1969 loff_t pos, unsigned len, unsigned copied,
1970 struct page *page, void *fsdata)
1971{
1972 struct inode *inode = mapping->host;
1973 unsigned start;
1974
1975 start = pos & (PAGE_CACHE_SIZE - 1);
1976
1977 if (unlikely(copied < len)) {
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990 if (!PageUptodate(page))
1991 copied = 0;
1992
1993 page_zero_new_buffers(page, start+copied, start+len);
1994 }
1995 flush_dcache_page(page);
1996
1997
1998 __block_commit_write(inode, page, start, start+copied);
1999
2000 return copied;
2001}
2002EXPORT_SYMBOL(block_write_end);
2003
2004int generic_write_end(struct file *file, struct address_space *mapping,
2005 loff_t pos, unsigned len, unsigned copied,
2006 struct page *page, void *fsdata)
2007{
2008 struct inode *inode = mapping->host;
2009 int i_size_changed = 0;
2010
2011 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2012
2013
2014
2015
2016
2017
2018
2019
2020 if (pos+copied > inode->i_size) {
2021 i_size_write(inode, pos+copied);
2022 i_size_changed = 1;
2023 }
2024
2025 unlock_page(page);
2026 page_cache_release(page);
2027
2028
2029
2030
2031
2032
2033
2034 if (i_size_changed)
2035 mark_inode_dirty(inode);
2036
2037 return copied;
2038}
2039EXPORT_SYMBOL(generic_write_end);
2040
2041
2042
2043
2044
2045
2046
2047
2048int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2049 unsigned long from)
2050{
2051 unsigned block_start, block_end, blocksize;
2052 unsigned to;
2053 struct buffer_head *bh, *head;
2054 int ret = 1;
2055
2056 if (!page_has_buffers(page))
2057 return 0;
2058
2059 head = page_buffers(page);
2060 blocksize = head->b_size;
2061 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2062 to = from + to;
2063 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2064 return 0;
2065
2066 bh = head;
2067 block_start = 0;
2068 do {
2069 block_end = block_start + blocksize;
2070 if (block_end > from && block_start < to) {
2071 if (!buffer_uptodate(bh)) {
2072 ret = 0;
2073 break;
2074 }
2075 if (block_end >= to)
2076 break;
2077 }
2078 block_start = block_end;
2079 bh = bh->b_this_page;
2080 } while (bh != head);
2081
2082 return ret;
2083}
2084EXPORT_SYMBOL(block_is_partially_uptodate);
2085
2086
2087
2088
2089
2090
2091
2092
2093int block_read_full_page(struct page *page, get_block_t *get_block)
2094{
2095 struct inode *inode = page->mapping->host;
2096 sector_t iblock, lblock;
2097 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2098 unsigned int blocksize, bbits;
2099 int nr, i;
2100 int fully_mapped = 1;
2101
2102 head = create_page_buffers(page, inode, 0);
2103 blocksize = head->b_size;
2104 bbits = block_size_bits(blocksize);
2105
2106 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2107 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2108 bh = head;
2109 nr = 0;
2110 i = 0;
2111
2112 do {
2113 if (buffer_uptodate(bh))
2114 continue;
2115
2116 if (!buffer_mapped(bh)) {
2117 int err = 0;
2118
2119 fully_mapped = 0;
2120 if (iblock < lblock) {
2121 WARN_ON(bh->b_size != blocksize);
2122 err = get_block(inode, iblock, bh, 0);
2123 if (err)
2124 SetPageError(page);
2125 }
2126 if (!buffer_mapped(bh)) {
2127 zero_user(page, i * blocksize, blocksize);
2128 if (!err)
2129 set_buffer_uptodate(bh);
2130 continue;
2131 }
2132
2133
2134
2135
2136 if (buffer_uptodate(bh))
2137 continue;
2138 }
2139 arr[nr++] = bh;
2140 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2141
2142 if (fully_mapped)
2143 SetPageMappedToDisk(page);
2144
2145 if (!nr) {
2146
2147
2148
2149
2150 if (!PageError(page))
2151 SetPageUptodate(page);
2152 unlock_page(page);
2153 return 0;
2154 }
2155
2156
2157 for (i = 0; i < nr; i++) {
2158 bh = arr[i];
2159 lock_buffer(bh);
2160 mark_buffer_async_read(bh);
2161 }
2162
2163
2164
2165
2166
2167
2168 for (i = 0; i < nr; i++) {
2169 bh = arr[i];
2170 if (buffer_uptodate(bh))
2171 end_buffer_async_read(bh, 1);
2172 else
2173 submit_bh(READ, bh);
2174 }
2175 return 0;
2176}
2177EXPORT_SYMBOL(block_read_full_page);
2178
2179
2180
2181
2182
2183int generic_cont_expand_simple(struct inode *inode, loff_t size)
2184{
2185 struct address_space *mapping = inode->i_mapping;
2186 struct page *page;
2187 void *fsdata;
2188 int err;
2189
2190 err = inode_newsize_ok(inode, size);
2191 if (err)
2192 goto out;
2193
2194 err = pagecache_write_begin(NULL, mapping, size, 0,
2195 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2196 &page, &fsdata);
2197 if (err)
2198 goto out;
2199
2200 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2201 BUG_ON(err > 0);
2202
2203out:
2204 return err;
2205}
2206EXPORT_SYMBOL(generic_cont_expand_simple);
2207
2208static int cont_expand_zero(struct file *file, struct address_space *mapping,
2209 loff_t pos, loff_t *bytes)
2210{
2211 struct inode *inode = mapping->host;
2212 unsigned blocksize = 1 << inode->i_blkbits;
2213 struct page *page;
2214 void *fsdata;
2215 pgoff_t index, curidx;
2216 loff_t curpos;
2217 unsigned zerofrom, offset, len;
2218 int err = 0;
2219
2220 index = pos >> PAGE_CACHE_SHIFT;
2221 offset = pos & ~PAGE_CACHE_MASK;
2222
2223 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2224 zerofrom = curpos & ~PAGE_CACHE_MASK;
2225 if (zerofrom & (blocksize-1)) {
2226 *bytes |= (blocksize-1);
2227 (*bytes)++;
2228 }
2229 len = PAGE_CACHE_SIZE - zerofrom;
2230
2231 err = pagecache_write_begin(file, mapping, curpos, len,
2232 AOP_FLAG_UNINTERRUPTIBLE,
2233 &page, &fsdata);
2234 if (err)
2235 goto out;
2236 zero_user(page, zerofrom, len);
2237 err = pagecache_write_end(file, mapping, curpos, len, len,
2238 page, fsdata);
2239 if (err < 0)
2240 goto out;
2241 BUG_ON(err != len);
2242 err = 0;
2243
2244 balance_dirty_pages_ratelimited(mapping);
2245 }
2246
2247
2248 if (index == curidx) {
2249 zerofrom = curpos & ~PAGE_CACHE_MASK;
2250
2251 if (offset <= zerofrom) {
2252 goto out;
2253 }
2254 if (zerofrom & (blocksize-1)) {
2255 *bytes |= (blocksize-1);
2256 (*bytes)++;
2257 }
2258 len = offset - zerofrom;
2259
2260 err = pagecache_write_begin(file, mapping, curpos, len,
2261 AOP_FLAG_UNINTERRUPTIBLE,
2262 &page, &fsdata);
2263 if (err)
2264 goto out;
2265 zero_user(page, zerofrom, len);
2266 err = pagecache_write_end(file, mapping, curpos, len, len,
2267 page, fsdata);
2268 if (err < 0)
2269 goto out;
2270 BUG_ON(err != len);
2271 err = 0;
2272 }
2273out:
2274 return err;
2275}
2276
2277
2278
2279
2280
2281int cont_write_begin(struct file *file, struct address_space *mapping,
2282 loff_t pos, unsigned len, unsigned flags,
2283 struct page **pagep, void **fsdata,
2284 get_block_t *get_block, loff_t *bytes)
2285{
2286 struct inode *inode = mapping->host;
2287 unsigned blocksize = 1 << inode->i_blkbits;
2288 unsigned zerofrom;
2289 int err;
2290
2291 err = cont_expand_zero(file, mapping, pos, bytes);
2292 if (err)
2293 return err;
2294
2295 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2296 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2297 *bytes |= (blocksize-1);
2298 (*bytes)++;
2299 }
2300
2301 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2302}
2303EXPORT_SYMBOL(cont_write_begin);
2304
2305int block_commit_write(struct page *page, unsigned from, unsigned to)
2306{
2307 struct inode *inode = page->mapping->host;
2308 __block_commit_write(inode,page,from,to);
2309 return 0;
2310}
2311EXPORT_SYMBOL(block_commit_write);
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2332 get_block_t get_block)
2333{
2334 struct page *page = vmf->page;
2335 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
2336 unsigned long end;
2337 loff_t size;
2338 int ret;
2339
2340 lock_page(page);
2341 size = i_size_read(inode);
2342 if ((page->mapping != inode->i_mapping) ||
2343 (page_offset(page) > size)) {
2344
2345 ret = -EFAULT;
2346 goto out_unlock;
2347 }
2348
2349
2350 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2351 end = size & ~PAGE_CACHE_MASK;
2352 else
2353 end = PAGE_CACHE_SIZE;
2354
2355 ret = __block_write_begin(page, 0, end, get_block);
2356 if (!ret)
2357 ret = block_commit_write(page, 0, end);
2358
2359 if (unlikely(ret < 0))
2360 goto out_unlock;
2361 set_page_dirty(page);
2362 wait_on_page_writeback(page);
2363 return 0;
2364out_unlock:
2365 unlock_page(page);
2366 return ret;
2367}
2368EXPORT_SYMBOL(__block_page_mkwrite);
2369
2370int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2371 get_block_t get_block)
2372{
2373 int ret;
2374 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2375
2376 sb_start_pagefault(sb);
2377
2378
2379
2380
2381
2382 file_update_time(vma->vm_file);
2383
2384 ret = __block_page_mkwrite(vma, vmf, get_block);
2385 sb_end_pagefault(sb);
2386 return block_page_mkwrite_return(ret);
2387}
2388EXPORT_SYMBOL(block_page_mkwrite);
2389
2390
2391
2392
2393
2394
2395static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2396{
2397 __end_buffer_read_notouch(bh, uptodate);
2398}
2399
2400
2401
2402
2403
2404
2405static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2406{
2407 struct buffer_head *bh;
2408
2409 BUG_ON(!PageLocked(page));
2410
2411 spin_lock(&page->mapping->private_lock);
2412 bh = head;
2413 do {
2414 if (PageDirty(page))
2415 set_buffer_dirty(bh);
2416 if (!bh->b_this_page)
2417 bh->b_this_page = head;
2418 bh = bh->b_this_page;
2419 } while (bh != head);
2420 attach_page_buffers(page, head);
2421 spin_unlock(&page->mapping->private_lock);
2422}
2423
2424
2425
2426
2427
2428
2429int nobh_write_begin(struct address_space *mapping,
2430 loff_t pos, unsigned len, unsigned flags,
2431 struct page **pagep, void **fsdata,
2432 get_block_t *get_block)
2433{
2434 struct inode *inode = mapping->host;
2435 const unsigned blkbits = inode->i_blkbits;
2436 const unsigned blocksize = 1 << blkbits;
2437 struct buffer_head *head, *bh;
2438 struct page *page;
2439 pgoff_t index;
2440 unsigned from, to;
2441 unsigned block_in_page;
2442 unsigned block_start, block_end;
2443 sector_t block_in_file;
2444 int nr_reads = 0;
2445 int ret = 0;
2446 int is_mapped_to_disk = 1;
2447
2448 index = pos >> PAGE_CACHE_SHIFT;
2449 from = pos & (PAGE_CACHE_SIZE - 1);
2450 to = from + len;
2451
2452 page = grab_cache_page_write_begin(mapping, index, flags);
2453 if (!page)
2454 return -ENOMEM;
2455 *pagep = page;
2456 *fsdata = NULL;
2457
2458 if (page_has_buffers(page)) {
2459 ret = __block_write_begin(page, pos, len, get_block);
2460 if (unlikely(ret))
2461 goto out_release;
2462 return ret;
2463 }
2464
2465 if (PageMappedToDisk(page))
2466 return 0;
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477 head = alloc_page_buffers(page, blocksize, 0);
2478 if (!head) {
2479 ret = -ENOMEM;
2480 goto out_release;
2481 }
2482
2483 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2484
2485
2486
2487
2488
2489
2490 for (block_start = 0, block_in_page = 0, bh = head;
2491 block_start < PAGE_CACHE_SIZE;
2492 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2493 int create;
2494
2495 block_end = block_start + blocksize;
2496 bh->b_state = 0;
2497 create = 1;
2498 if (block_start >= to)
2499 create = 0;
2500 ret = get_block(inode, block_in_file + block_in_page,
2501 bh, create);
2502 if (ret)
2503 goto failed;
2504 if (!buffer_mapped(bh))
2505 is_mapped_to_disk = 0;
2506 if (buffer_new(bh))
2507 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2508 if (PageUptodate(page)) {
2509 set_buffer_uptodate(bh);
2510 continue;
2511 }
2512 if (buffer_new(bh) || !buffer_mapped(bh)) {
2513 zero_user_segments(page, block_start, from,
2514 to, block_end);
2515 continue;
2516 }
2517 if (buffer_uptodate(bh))
2518 continue;
2519 if (block_start < from || block_end > to) {
2520 lock_buffer(bh);
2521 bh->b_end_io = end_buffer_read_nobh;
2522 submit_bh(READ, bh);
2523 nr_reads++;
2524 }
2525 }
2526
2527 if (nr_reads) {
2528
2529
2530
2531
2532
2533 for (bh = head; bh; bh = bh->b_this_page) {
2534 wait_on_buffer(bh);
2535 if (!buffer_uptodate(bh))
2536 ret = -EIO;
2537 }
2538 if (ret)
2539 goto failed;
2540 }
2541
2542 if (is_mapped_to_disk)
2543 SetPageMappedToDisk(page);
2544
2545 *fsdata = head;
2546
2547 return 0;
2548
2549failed:
2550 BUG_ON(!ret);
2551
2552
2553
2554
2555
2556
2557
2558 attach_nobh_buffers(page, head);
2559 page_zero_new_buffers(page, from, to);
2560
2561out_release:
2562 unlock_page(page);
2563 page_cache_release(page);
2564 *pagep = NULL;
2565
2566 return ret;
2567}
2568EXPORT_SYMBOL(nobh_write_begin);
2569
2570int nobh_write_end(struct file *file, struct address_space *mapping,
2571 loff_t pos, unsigned len, unsigned copied,
2572 struct page *page, void *fsdata)
2573{
2574 struct inode *inode = page->mapping->host;
2575 struct buffer_head *head = fsdata;
2576 struct buffer_head *bh;
2577 BUG_ON(fsdata != NULL && page_has_buffers(page));
2578
2579 if (unlikely(copied < len) && head)
2580 attach_nobh_buffers(page, head);
2581 if (page_has_buffers(page))
2582 return generic_write_end(file, mapping, pos, len,
2583 copied, page, fsdata);
2584
2585 SetPageUptodate(page);
2586 set_page_dirty(page);
2587 if (pos+copied > inode->i_size) {
2588 i_size_write(inode, pos+copied);
2589 mark_inode_dirty(inode);
2590 }
2591
2592 unlock_page(page);
2593 page_cache_release(page);
2594
2595 while (head) {
2596 bh = head;
2597 head = head->b_this_page;
2598 free_buffer_head(bh);
2599 }
2600
2601 return copied;
2602}
2603EXPORT_SYMBOL(nobh_write_end);
2604
2605
2606
2607
2608
2609
2610int nobh_writepage(struct page *page, get_block_t *get_block,
2611 struct writeback_control *wbc)
2612{
2613 struct inode * const inode = page->mapping->host;
2614 loff_t i_size = i_size_read(inode);
2615 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2616 unsigned offset;
2617 int ret;
2618
2619
2620 if (page->index < end_index)
2621 goto out;
2622
2623
2624 offset = i_size & (PAGE_CACHE_SIZE-1);
2625 if (page->index >= end_index+1 || !offset) {
2626
2627
2628
2629
2630
2631#if 0
2632
2633 if (page->mapping->a_ops->invalidatepage)
2634 page->mapping->a_ops->invalidatepage(page, offset);
2635#endif
2636 unlock_page(page);
2637 return 0;
2638 }
2639
2640
2641
2642
2643
2644
2645
2646
2647 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2648out:
2649 ret = mpage_writepage(page, get_block, wbc);
2650 if (ret == -EAGAIN)
2651 ret = __block_write_full_page(inode, page, get_block, wbc,
2652 end_buffer_async_write);
2653 return ret;
2654}
2655EXPORT_SYMBOL(nobh_writepage);
2656
2657int nobh_truncate_page(struct address_space *mapping,
2658 loff_t from, get_block_t *get_block)
2659{
2660 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2661 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2662 unsigned blocksize;
2663 sector_t iblock;
2664 unsigned length, pos;
2665 struct inode *inode = mapping->host;
2666 struct page *page;
2667 struct buffer_head map_bh;
2668 int err;
2669
2670 blocksize = 1 << inode->i_blkbits;
2671 length = offset & (blocksize - 1);
2672
2673
2674 if (!length)
2675 return 0;
2676
2677 length = blocksize - length;
2678 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2679
2680 page = grab_cache_page(mapping, index);
2681 err = -ENOMEM;
2682 if (!page)
2683 goto out;
2684
2685 if (page_has_buffers(page)) {
2686has_buffers:
2687 unlock_page(page);
2688 page_cache_release(page);
2689 return block_truncate_page(mapping, from, get_block);
2690 }
2691
2692
2693 pos = blocksize;
2694 while (offset >= pos) {
2695 iblock++;
2696 pos += blocksize;
2697 }
2698
2699 map_bh.b_size = blocksize;
2700 map_bh.b_state = 0;
2701 err = get_block(inode, iblock, &map_bh, 0);
2702 if (err)
2703 goto unlock;
2704
2705 if (!buffer_mapped(&map_bh))
2706 goto unlock;
2707
2708
2709 if (!PageUptodate(page)) {
2710 err = mapping->a_ops->readpage(NULL, page);
2711 if (err) {
2712 page_cache_release(page);
2713 goto out;
2714 }
2715 lock_page(page);
2716 if (!PageUptodate(page)) {
2717 err = -EIO;
2718 goto unlock;
2719 }
2720 if (page_has_buffers(page))
2721 goto has_buffers;
2722 }
2723 zero_user(page, offset, length);
2724 set_page_dirty(page);
2725 err = 0;
2726
2727unlock:
2728 unlock_page(page);
2729 page_cache_release(page);
2730out:
2731 return err;
2732}
2733EXPORT_SYMBOL(nobh_truncate_page);
2734
2735int block_truncate_page(struct address_space *mapping,
2736 loff_t from, get_block_t *get_block)
2737{
2738 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2739 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2740 unsigned blocksize;
2741 sector_t iblock;
2742 unsigned length, pos;
2743 struct inode *inode = mapping->host;
2744 struct page *page;
2745 struct buffer_head *bh;
2746 int err;
2747
2748 blocksize = 1 << inode->i_blkbits;
2749 length = offset & (blocksize - 1);
2750
2751
2752 if (!length)
2753 return 0;
2754
2755 length = blocksize - length;
2756 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2757
2758 page = grab_cache_page(mapping, index);
2759 err = -ENOMEM;
2760 if (!page)
2761 goto out;
2762
2763 if (!page_has_buffers(page))
2764 create_empty_buffers(page, blocksize, 0);
2765
2766
2767 bh = page_buffers(page);
2768 pos = blocksize;
2769 while (offset >= pos) {
2770 bh = bh->b_this_page;
2771 iblock++;
2772 pos += blocksize;
2773 }
2774
2775 err = 0;
2776 if (!buffer_mapped(bh)) {
2777 WARN_ON(bh->b_size != blocksize);
2778 err = get_block(inode, iblock, bh, 0);
2779 if (err)
2780 goto unlock;
2781
2782 if (!buffer_mapped(bh))
2783 goto unlock;
2784 }
2785
2786
2787 if (PageUptodate(page))
2788 set_buffer_uptodate(bh);
2789
2790 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2791 err = -EIO;
2792 ll_rw_block(READ, 1, &bh);
2793 wait_on_buffer(bh);
2794
2795 if (!buffer_uptodate(bh))
2796 goto unlock;
2797 }
2798
2799 zero_user(page, offset, length);
2800 mark_buffer_dirty(bh);
2801 err = 0;
2802
2803unlock:
2804 unlock_page(page);
2805 page_cache_release(page);
2806out:
2807 return err;
2808}
2809EXPORT_SYMBOL(block_truncate_page);
2810
2811
2812
2813
2814
2815int block_write_full_page_endio(struct page *page, get_block_t *get_block,
2816 struct writeback_control *wbc, bh_end_io_t *handler)
2817{
2818 struct inode * const inode = page->mapping->host;
2819 loff_t i_size = i_size_read(inode);
2820 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2821 unsigned offset;
2822
2823
2824 if (page->index < end_index)
2825 return __block_write_full_page(inode, page, get_block, wbc,
2826 handler);
2827
2828
2829 offset = i_size & (PAGE_CACHE_SIZE-1);
2830 if (page->index >= end_index+1 || !offset) {
2831
2832
2833
2834
2835
2836 do_invalidatepage(page, 0);
2837 unlock_page(page);
2838 return 0;
2839 }
2840
2841
2842
2843
2844
2845
2846
2847
2848 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2849 return __block_write_full_page(inode, page, get_block, wbc, handler);
2850}
2851EXPORT_SYMBOL(block_write_full_page_endio);
2852
2853
2854
2855
2856int block_write_full_page(struct page *page, get_block_t *get_block,
2857 struct writeback_control *wbc)
2858{
2859 return block_write_full_page_endio(page, get_block, wbc,
2860 end_buffer_async_write);
2861}
2862EXPORT_SYMBOL(block_write_full_page);
2863
2864sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2865 get_block_t *get_block)
2866{
2867 struct buffer_head tmp;
2868 struct inode *inode = mapping->host;
2869 tmp.b_state = 0;
2870 tmp.b_blocknr = 0;
2871 tmp.b_size = 1 << inode->i_blkbits;
2872 get_block(inode, block, &tmp, 0);
2873 return tmp.b_blocknr;
2874}
2875EXPORT_SYMBOL(generic_block_bmap);
2876
2877static void end_bio_bh_io_sync(struct bio *bio, int err)
2878{
2879 struct buffer_head *bh = bio->bi_private;
2880
2881 if (err == -EOPNOTSUPP) {
2882 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2883 }
2884
2885 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2886 set_bit(BH_Quiet, &bh->b_state);
2887
2888 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2889 bio_put(bio);
2890}
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2905{
2906 sector_t maxsector;
2907 unsigned bytes;
2908
2909 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2910 if (!maxsector)
2911 return;
2912
2913
2914
2915
2916
2917
2918 if (unlikely(bio->bi_sector >= maxsector))
2919 return;
2920
2921 maxsector -= bio->bi_sector;
2922 bytes = bio->bi_size;
2923 if (likely((bytes >> 9) <= maxsector))
2924 return;
2925
2926
2927 bytes = maxsector << 9;
2928
2929
2930 bio->bi_size = bytes;
2931 bio->bi_io_vec[0].bv_len = bytes;
2932
2933
2934 if ((rw & RW_MASK) == READ) {
2935 void *kaddr = kmap_atomic(bh->b_page);
2936 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
2937 kunmap_atomic(kaddr);
2938 flush_dcache_page(bh->b_page);
2939 }
2940}
2941
2942int submit_bh(int rw, struct buffer_head * bh)
2943{
2944 struct bio *bio;
2945 int ret = 0;
2946
2947 BUG_ON(!buffer_locked(bh));
2948 BUG_ON(!buffer_mapped(bh));
2949 BUG_ON(!bh->b_end_io);
2950 BUG_ON(buffer_delay(bh));
2951 BUG_ON(buffer_unwritten(bh));
2952
2953
2954
2955
2956 if (test_set_buffer_req(bh) && (rw & WRITE))
2957 clear_buffer_write_io_error(bh);
2958
2959
2960
2961
2962
2963 bio = bio_alloc(GFP_NOIO, 1);
2964
2965 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
2966 bio->bi_bdev = bh->b_bdev;
2967 bio->bi_io_vec[0].bv_page = bh->b_page;
2968 bio->bi_io_vec[0].bv_len = bh->b_size;
2969 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
2970
2971 bio->bi_vcnt = 1;
2972 bio->bi_idx = 0;
2973 bio->bi_size = bh->b_size;
2974
2975 bio->bi_end_io = end_bio_bh_io_sync;
2976 bio->bi_private = bh;
2977
2978
2979 guard_bh_eod(rw, bio, bh);
2980
2981 bio_get(bio);
2982 submit_bio(rw, bio);
2983
2984 if (bio_flagged(bio, BIO_EOPNOTSUPP))
2985 ret = -EOPNOTSUPP;
2986
2987 bio_put(bio);
2988 return ret;
2989}
2990EXPORT_SYMBOL(submit_bh);
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3018{
3019 int i;
3020
3021 for (i = 0; i < nr; i++) {
3022 struct buffer_head *bh = bhs[i];
3023
3024 if (!trylock_buffer(bh))
3025 continue;
3026 if (rw == WRITE) {
3027 if (test_clear_buffer_dirty(bh)) {
3028 bh->b_end_io = end_buffer_write_sync;
3029 get_bh(bh);
3030 submit_bh(WRITE, bh);
3031 continue;
3032 }
3033 } else {
3034 if (!buffer_uptodate(bh)) {
3035 bh->b_end_io = end_buffer_read_sync;
3036 get_bh(bh);
3037 submit_bh(rw, bh);
3038 continue;
3039 }
3040 }
3041 unlock_buffer(bh);
3042 }
3043}
3044EXPORT_SYMBOL(ll_rw_block);
3045
3046void write_dirty_buffer(struct buffer_head *bh, int rw)
3047{
3048 lock_buffer(bh);
3049 if (!test_clear_buffer_dirty(bh)) {
3050 unlock_buffer(bh);
3051 return;
3052 }
3053 bh->b_end_io = end_buffer_write_sync;
3054 get_bh(bh);
3055 submit_bh(rw, bh);
3056}
3057EXPORT_SYMBOL(write_dirty_buffer);
3058
3059
3060
3061
3062
3063
3064int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3065{
3066 int ret = 0;
3067
3068 WARN_ON(atomic_read(&bh->b_count) < 1);
3069 lock_buffer(bh);
3070 if (test_clear_buffer_dirty(bh)) {
3071 get_bh(bh);
3072 bh->b_end_io = end_buffer_write_sync;
3073 ret = submit_bh(rw, bh);
3074 wait_on_buffer(bh);
3075 if (!ret && !buffer_uptodate(bh))
3076 ret = -EIO;
3077 } else {
3078 unlock_buffer(bh);
3079 }
3080 return ret;
3081}
3082EXPORT_SYMBOL(__sync_dirty_buffer);
3083
3084int sync_dirty_buffer(struct buffer_head *bh)
3085{
3086 return __sync_dirty_buffer(bh, WRITE_SYNC);
3087}
3088EXPORT_SYMBOL(sync_dirty_buffer);
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110static inline int buffer_busy(struct buffer_head *bh)
3111{
3112 return atomic_read(&bh->b_count) |
3113 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3114}
3115
3116static int
3117drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3118{
3119 struct buffer_head *head = page_buffers(page);
3120 struct buffer_head *bh;
3121
3122 bh = head;
3123 do {
3124 if (buffer_write_io_error(bh) && page->mapping)
3125 set_bit(AS_EIO, &page->mapping->flags);
3126 if (buffer_busy(bh))
3127 goto failed;
3128 bh = bh->b_this_page;
3129 } while (bh != head);
3130
3131 do {
3132 struct buffer_head *next = bh->b_this_page;
3133
3134 if (bh->b_assoc_map)
3135 __remove_assoc_queue(bh);
3136 bh = next;
3137 } while (bh != head);
3138 *buffers_to_free = head;
3139 __clear_page_buffers(page);
3140 return 1;
3141failed:
3142 return 0;
3143}
3144
3145int try_to_free_buffers(struct page *page)
3146{
3147 struct address_space * const mapping = page->mapping;
3148 struct buffer_head *buffers_to_free = NULL;
3149 int ret = 0;
3150
3151 BUG_ON(!PageLocked(page));
3152 if (PageWriteback(page))
3153 return 0;
3154
3155 if (mapping == NULL) {
3156 ret = drop_buffers(page, &buffers_to_free);
3157 goto out;
3158 }
3159
3160 spin_lock(&mapping->private_lock);
3161 ret = drop_buffers(page, &buffers_to_free);
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177 if (ret)
3178 cancel_dirty_page(page, PAGE_CACHE_SIZE);
3179 spin_unlock(&mapping->private_lock);
3180out:
3181 if (buffers_to_free) {
3182 struct buffer_head *bh = buffers_to_free;
3183
3184 do {
3185 struct buffer_head *next = bh->b_this_page;
3186 free_buffer_head(bh);
3187 bh = next;
3188 } while (bh != buffers_to_free);
3189 }
3190 return ret;
3191}
3192EXPORT_SYMBOL(try_to_free_buffers);
3193
3194
3195
3196
3197
3198
3199
3200
3201SYSCALL_DEFINE2(bdflush, int, func, long, data)
3202{
3203 static int msg_count;
3204
3205 if (!capable(CAP_SYS_ADMIN))
3206 return -EPERM;
3207
3208 if (msg_count < 5) {
3209 msg_count++;
3210 printk(KERN_INFO
3211 "warning: process `%s' used the obsolete bdflush"
3212 " system call\n", current->comm);
3213 printk(KERN_INFO "Fix your initscripts?\n");
3214 }
3215
3216 if (func == 1)
3217 do_exit(0);
3218 return 0;
3219}
3220
3221
3222
3223
3224static struct kmem_cache *bh_cachep __read_mostly;
3225
3226
3227
3228
3229
3230static int max_buffer_heads;
3231
3232int buffer_heads_over_limit;
3233
3234struct bh_accounting {
3235 int nr;
3236 int ratelimit;
3237};
3238
3239static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3240
3241static void recalc_bh_state(void)
3242{
3243 int i;
3244 int tot = 0;
3245
3246 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3247 return;
3248 __this_cpu_write(bh_accounting.ratelimit, 0);
3249 for_each_online_cpu(i)
3250 tot += per_cpu(bh_accounting, i).nr;
3251 buffer_heads_over_limit = (tot > max_buffer_heads);
3252}
3253
3254struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3255{
3256 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3257 if (ret) {
3258 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3259 preempt_disable();
3260 __this_cpu_inc(bh_accounting.nr);
3261 recalc_bh_state();
3262 preempt_enable();
3263 }
3264 return ret;
3265}
3266EXPORT_SYMBOL(alloc_buffer_head);
3267
3268void free_buffer_head(struct buffer_head *bh)
3269{
3270 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3271 kmem_cache_free(bh_cachep, bh);
3272 preempt_disable();
3273 __this_cpu_dec(bh_accounting.nr);
3274 recalc_bh_state();
3275 preempt_enable();
3276}
3277EXPORT_SYMBOL(free_buffer_head);
3278
3279static void buffer_exit_cpu(int cpu)
3280{
3281 int i;
3282 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3283
3284 for (i = 0; i < BH_LRU_SIZE; i++) {
3285 brelse(b->bhs[i]);
3286 b->bhs[i] = NULL;
3287 }
3288 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3289 per_cpu(bh_accounting, cpu).nr = 0;
3290}
3291
3292static int buffer_cpu_notify(struct notifier_block *self,
3293 unsigned long action, void *hcpu)
3294{
3295 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3296 buffer_exit_cpu((unsigned long)hcpu);
3297 return NOTIFY_OK;
3298}
3299
3300
3301
3302
3303
3304
3305
3306
3307int bh_uptodate_or_lock(struct buffer_head *bh)
3308{
3309 if (!buffer_uptodate(bh)) {
3310 lock_buffer(bh);
3311 if (!buffer_uptodate(bh))
3312 return 0;
3313 unlock_buffer(bh);
3314 }
3315 return 1;
3316}
3317EXPORT_SYMBOL(bh_uptodate_or_lock);
3318
3319
3320
3321
3322
3323
3324
3325int bh_submit_read(struct buffer_head *bh)
3326{
3327 BUG_ON(!buffer_locked(bh));
3328
3329 if (buffer_uptodate(bh)) {
3330 unlock_buffer(bh);
3331 return 0;
3332 }
3333
3334 get_bh(bh);
3335 bh->b_end_io = end_buffer_read_sync;
3336 submit_bh(READ, bh);
3337 wait_on_buffer(bh);
3338 if (buffer_uptodate(bh))
3339 return 0;
3340 return -EIO;
3341}
3342EXPORT_SYMBOL(bh_submit_read);
3343
3344void __init buffer_init(void)
3345{
3346 int nrpages;
3347
3348 bh_cachep = kmem_cache_create("buffer_head",
3349 sizeof(struct buffer_head), 0,
3350 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3351 SLAB_MEM_SPREAD),
3352 NULL);
3353
3354
3355
3356
3357 nrpages = (nr_free_buffer_pages() * 10) / 100;
3358 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3359 hotcpu_notifier(buffer_cpu_notify, 0);
3360}
3361