1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel.h>
22#include <linux/syscalls.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/capability.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/module.h>
33#include <linux/writeback.h>
34#include <linux/hash.h>
35#include <linux/suspend.h>
36#include <linux/buffer_head.h>
37#include <linux/task_io_accounting_ops.h>
38#include <linux/bio.h>
39#include <linux/notifier.h>
40#include <linux/cpu.h>
41#include <linux/bitops.h>
42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h>
44
45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
46
47#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
48
49inline void
50init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
51{
52 bh->b_end_io = handler;
53 bh->b_private = private;
54}
55EXPORT_SYMBOL(init_buffer);
56
57static int sleep_on_buffer(void *word)
58{
59 io_schedule();
60 return 0;
61}
62
63void __lock_buffer(struct buffer_head *bh)
64{
65 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
66 TASK_UNINTERRUPTIBLE);
67}
68EXPORT_SYMBOL(__lock_buffer);
69
70void unlock_buffer(struct buffer_head *bh)
71{
72 clear_bit_unlock(BH_Lock, &bh->b_state);
73 smp_mb__after_clear_bit();
74 wake_up_bit(&bh->b_state, BH_Lock);
75}
76EXPORT_SYMBOL(unlock_buffer);
77
78
79
80
81
82
83void __wait_on_buffer(struct buffer_head * bh)
84{
85 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
86}
87EXPORT_SYMBOL(__wait_on_buffer);
88
89static void
90__clear_page_buffers(struct page *page)
91{
92 ClearPagePrivate(page);
93 set_page_private(page, 0);
94 page_cache_release(page);
95}
96
97
98static int quiet_error(struct buffer_head *bh)
99{
100 if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
101 return 0;
102 return 1;
103}
104
105
106static void buffer_io_error(struct buffer_head *bh)
107{
108 char b[BDEVNAME_SIZE];
109 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
110 bdevname(bh->b_bdev, b),
111 (unsigned long long)bh->b_blocknr);
112}
113
114
115
116
117
118
119
120
121
122static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
123{
124 if (uptodate) {
125 set_buffer_uptodate(bh);
126 } else {
127
128 clear_buffer_uptodate(bh);
129 }
130 unlock_buffer(bh);
131}
132
133
134
135
136
137void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
138{
139 __end_buffer_read_notouch(bh, uptodate);
140 put_bh(bh);
141}
142EXPORT_SYMBOL(end_buffer_read_sync);
143
144void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
145{
146 char b[BDEVNAME_SIZE];
147
148 if (uptodate) {
149 set_buffer_uptodate(bh);
150 } else {
151 if (!quiet_error(bh)) {
152 buffer_io_error(bh);
153 printk(KERN_WARNING "lost page write due to "
154 "I/O error on %s\n",
155 bdevname(bh->b_bdev, b));
156 }
157 set_buffer_write_io_error(bh);
158 clear_buffer_uptodate(bh);
159 }
160 unlock_buffer(bh);
161 put_bh(bh);
162}
163EXPORT_SYMBOL(end_buffer_write_sync);
164
165
166
167
168
169
170
171
172
173
174
175
176static struct buffer_head *
177__find_get_block_slow(struct block_device *bdev, sector_t block)
178{
179 struct inode *bd_inode = bdev->bd_inode;
180 struct address_space *bd_mapping = bd_inode->i_mapping;
181 struct buffer_head *ret = NULL;
182 pgoff_t index;
183 struct buffer_head *bh;
184 struct buffer_head *head;
185 struct page *page;
186 int all_mapped = 1;
187
188 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
189 page = find_get_page(bd_mapping, index);
190 if (!page)
191 goto out;
192
193 spin_lock(&bd_mapping->private_lock);
194 if (!page_has_buffers(page))
195 goto out_unlock;
196 head = page_buffers(page);
197 bh = head;
198 do {
199 if (!buffer_mapped(bh))
200 all_mapped = 0;
201 else if (bh->b_blocknr == block) {
202 ret = bh;
203 get_bh(bh);
204 goto out_unlock;
205 }
206 bh = bh->b_this_page;
207 } while (bh != head);
208
209
210
211
212
213
214 if (all_mapped) {
215 char b[BDEVNAME_SIZE];
216
217 printk("__find_get_block_slow() failed. "
218 "block=%llu, b_blocknr=%llu\n",
219 (unsigned long long)block,
220 (unsigned long long)bh->b_blocknr);
221 printk("b_state=0x%08lx, b_size=%zu\n",
222 bh->b_state, bh->b_size);
223 printk("device %s blocksize: %d\n", bdevname(bdev, b),
224 1 << bd_inode->i_blkbits);
225 }
226out_unlock:
227 spin_unlock(&bd_mapping->private_lock);
228 page_cache_release(page);
229out:
230 return ret;
231}
232
233
234
235
236static void free_more_memory(void)
237{
238 struct zone *zone;
239 int nid;
240
241 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
242 yield();
243
244 for_each_online_node(nid) {
245 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
246 gfp_zone(GFP_NOFS), NULL,
247 &zone);
248 if (zone)
249 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
250 GFP_NOFS, NULL);
251 }
252}
253
254
255
256
257
258static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
259{
260 unsigned long flags;
261 struct buffer_head *first;
262 struct buffer_head *tmp;
263 struct page *page;
264 int page_uptodate = 1;
265
266 BUG_ON(!buffer_async_read(bh));
267
268 page = bh->b_page;
269 if (uptodate) {
270 set_buffer_uptodate(bh);
271 } else {
272 clear_buffer_uptodate(bh);
273 if (!quiet_error(bh))
274 buffer_io_error(bh);
275 SetPageError(page);
276 }
277
278
279
280
281
282
283 first = page_buffers(page);
284 local_irq_save(flags);
285 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
286 clear_buffer_async_read(bh);
287 unlock_buffer(bh);
288 tmp = bh;
289 do {
290 if (!buffer_uptodate(tmp))
291 page_uptodate = 0;
292 if (buffer_async_read(tmp)) {
293 BUG_ON(!buffer_locked(tmp));
294 goto still_busy;
295 }
296 tmp = tmp->b_this_page;
297 } while (tmp != bh);
298 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
299 local_irq_restore(flags);
300
301
302
303
304
305 if (page_uptodate && !PageError(page))
306 SetPageUptodate(page);
307 unlock_page(page);
308 return;
309
310still_busy:
311 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
312 local_irq_restore(flags);
313 return;
314}
315
316
317
318
319
320void end_buffer_async_write(struct buffer_head *bh, int uptodate)
321{
322 char b[BDEVNAME_SIZE];
323 unsigned long flags;
324 struct buffer_head *first;
325 struct buffer_head *tmp;
326 struct page *page;
327
328 BUG_ON(!buffer_async_write(bh));
329
330 page = bh->b_page;
331 if (uptodate) {
332 set_buffer_uptodate(bh);
333 } else {
334 if (!quiet_error(bh)) {
335 buffer_io_error(bh);
336 printk(KERN_WARNING "lost page write due to "
337 "I/O error on %s\n",
338 bdevname(bh->b_bdev, b));
339 }
340 set_bit(AS_EIO, &page->mapping->flags);
341 set_buffer_write_io_error(bh);
342 clear_buffer_uptodate(bh);
343 SetPageError(page);
344 }
345
346 first = page_buffers(page);
347 local_irq_save(flags);
348 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
349
350 clear_buffer_async_write(bh);
351 unlock_buffer(bh);
352 tmp = bh->b_this_page;
353 while (tmp != bh) {
354 if (buffer_async_write(tmp)) {
355 BUG_ON(!buffer_locked(tmp));
356 goto still_busy;
357 }
358 tmp = tmp->b_this_page;
359 }
360 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
361 local_irq_restore(flags);
362 end_page_writeback(page);
363 return;
364
365still_busy:
366 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
367 local_irq_restore(flags);
368 return;
369}
370EXPORT_SYMBOL(end_buffer_async_write);
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393static void mark_buffer_async_read(struct buffer_head *bh)
394{
395 bh->b_end_io = end_buffer_async_read;
396 set_buffer_async_read(bh);
397}
398
399static void mark_buffer_async_write_endio(struct buffer_head *bh,
400 bh_end_io_t *handler)
401{
402 bh->b_end_io = handler;
403 set_buffer_async_write(bh);
404}
405
406void mark_buffer_async_write(struct buffer_head *bh)
407{
408 mark_buffer_async_write_endio(bh, end_buffer_async_write);
409}
410EXPORT_SYMBOL(mark_buffer_async_write);
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465static void __remove_assoc_queue(struct buffer_head *bh)
466{
467 list_del_init(&bh->b_assoc_buffers);
468 WARN_ON(!bh->b_assoc_map);
469 if (buffer_write_io_error(bh))
470 set_bit(AS_EIO, &bh->b_assoc_map->flags);
471 bh->b_assoc_map = NULL;
472}
473
474int inode_has_buffers(struct inode *inode)
475{
476 return !list_empty(&inode->i_data.private_list);
477}
478
479
480
481
482
483
484
485
486
487
488
489static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
490{
491 struct buffer_head *bh;
492 struct list_head *p;
493 int err = 0;
494
495 spin_lock(lock);
496repeat:
497 list_for_each_prev(p, list) {
498 bh = BH_ENTRY(p);
499 if (buffer_locked(bh)) {
500 get_bh(bh);
501 spin_unlock(lock);
502 wait_on_buffer(bh);
503 if (!buffer_uptodate(bh))
504 err = -EIO;
505 brelse(bh);
506 spin_lock(lock);
507 goto repeat;
508 }
509 }
510 spin_unlock(lock);
511 return err;
512}
513
514static void do_thaw_one(struct super_block *sb, void *unused)
515{
516 char b[BDEVNAME_SIZE];
517 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
518 printk(KERN_WARNING "Emergency Thaw on %s\n",
519 bdevname(sb->s_bdev, b));
520}
521
522static void do_thaw_all(struct work_struct *work)
523{
524 iterate_supers(do_thaw_one, NULL);
525 kfree(work);
526 printk(KERN_WARNING "Emergency Thaw complete\n");
527}
528
529
530
531
532
533
534void emergency_thaw_all(void)
535{
536 struct work_struct *work;
537
538 work = kmalloc(sizeof(*work), GFP_ATOMIC);
539 if (work) {
540 INIT_WORK(work, do_thaw_all);
541 schedule_work(work);
542 }
543}
544
545
546
547
548
549
550
551
552
553
554
555
556int sync_mapping_buffers(struct address_space *mapping)
557{
558 struct address_space *buffer_mapping = mapping->assoc_mapping;
559
560 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
561 return 0;
562
563 return fsync_buffers_list(&buffer_mapping->private_lock,
564 &mapping->private_list);
565}
566EXPORT_SYMBOL(sync_mapping_buffers);
567
568
569
570
571
572
573
574void write_boundary_block(struct block_device *bdev,
575 sector_t bblock, unsigned blocksize)
576{
577 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
578 if (bh) {
579 if (buffer_dirty(bh))
580 ll_rw_block(WRITE, 1, &bh);
581 put_bh(bh);
582 }
583}
584
585void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
586{
587 struct address_space *mapping = inode->i_mapping;
588 struct address_space *buffer_mapping = bh->b_page->mapping;
589
590 mark_buffer_dirty(bh);
591 if (!mapping->assoc_mapping) {
592 mapping->assoc_mapping = buffer_mapping;
593 } else {
594 BUG_ON(mapping->assoc_mapping != buffer_mapping);
595 }
596 if (!bh->b_assoc_map) {
597 spin_lock(&buffer_mapping->private_lock);
598 list_move_tail(&bh->b_assoc_buffers,
599 &mapping->private_list);
600 bh->b_assoc_map = mapping;
601 spin_unlock(&buffer_mapping->private_lock);
602 }
603}
604EXPORT_SYMBOL(mark_buffer_dirty_inode);
605
606
607
608
609
610
611
612
613static void __set_page_dirty(struct page *page,
614 struct address_space *mapping, int warn)
615{
616 spin_lock_irq(&mapping->tree_lock);
617 if (page->mapping) {
618 WARN_ON_ONCE(warn && !PageUptodate(page));
619 account_page_dirtied(page, mapping);
620 radix_tree_tag_set(&mapping->page_tree,
621 page_index(page), PAGECACHE_TAG_DIRTY);
622 }
623 spin_unlock_irq(&mapping->tree_lock);
624 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
625}
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652int __set_page_dirty_buffers(struct page *page)
653{
654 int newly_dirty;
655 struct address_space *mapping = page_mapping(page);
656
657 if (unlikely(!mapping))
658 return !TestSetPageDirty(page);
659
660 spin_lock(&mapping->private_lock);
661 if (page_has_buffers(page)) {
662 struct buffer_head *head = page_buffers(page);
663 struct buffer_head *bh = head;
664
665 do {
666 set_buffer_dirty(bh);
667 bh = bh->b_this_page;
668 } while (bh != head);
669 }
670 newly_dirty = !TestSetPageDirty(page);
671 spin_unlock(&mapping->private_lock);
672
673 if (newly_dirty)
674 __set_page_dirty(page, mapping, 1);
675 return newly_dirty;
676}
677EXPORT_SYMBOL(__set_page_dirty_buffers);
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
699{
700 struct buffer_head *bh;
701 struct list_head tmp;
702 struct address_space *mapping;
703 int err = 0, err2;
704 struct blk_plug plug;
705
706 INIT_LIST_HEAD(&tmp);
707 blk_start_plug(&plug);
708
709 spin_lock(lock);
710 while (!list_empty(list)) {
711 bh = BH_ENTRY(list->next);
712 mapping = bh->b_assoc_map;
713 __remove_assoc_queue(bh);
714
715
716 smp_mb();
717 if (buffer_dirty(bh) || buffer_locked(bh)) {
718 list_add(&bh->b_assoc_buffers, &tmp);
719 bh->b_assoc_map = mapping;
720 if (buffer_dirty(bh)) {
721 get_bh(bh);
722 spin_unlock(lock);
723
724
725
726
727
728
729
730 write_dirty_buffer(bh, WRITE_SYNC);
731
732
733
734
735
736
737
738 brelse(bh);
739 spin_lock(lock);
740 }
741 }
742 }
743
744 spin_unlock(lock);
745 blk_finish_plug(&plug);
746 spin_lock(lock);
747
748 while (!list_empty(&tmp)) {
749 bh = BH_ENTRY(tmp.prev);
750 get_bh(bh);
751 mapping = bh->b_assoc_map;
752 __remove_assoc_queue(bh);
753
754
755 smp_mb();
756 if (buffer_dirty(bh)) {
757 list_add(&bh->b_assoc_buffers,
758 &mapping->private_list);
759 bh->b_assoc_map = mapping;
760 }
761 spin_unlock(lock);
762 wait_on_buffer(bh);
763 if (!buffer_uptodate(bh))
764 err = -EIO;
765 brelse(bh);
766 spin_lock(lock);
767 }
768
769 spin_unlock(lock);
770 err2 = osync_buffers_list(lock, list);
771 if (err)
772 return err;
773 else
774 return err2;
775}
776
777
778
779
780
781
782
783
784
785
786void invalidate_inode_buffers(struct inode *inode)
787{
788 if (inode_has_buffers(inode)) {
789 struct address_space *mapping = &inode->i_data;
790 struct list_head *list = &mapping->private_list;
791 struct address_space *buffer_mapping = mapping->assoc_mapping;
792
793 spin_lock(&buffer_mapping->private_lock);
794 while (!list_empty(list))
795 __remove_assoc_queue(BH_ENTRY(list->next));
796 spin_unlock(&buffer_mapping->private_lock);
797 }
798}
799EXPORT_SYMBOL(invalidate_inode_buffers);
800
801
802
803
804
805
806
807int remove_inode_buffers(struct inode *inode)
808{
809 int ret = 1;
810
811 if (inode_has_buffers(inode)) {
812 struct address_space *mapping = &inode->i_data;
813 struct list_head *list = &mapping->private_list;
814 struct address_space *buffer_mapping = mapping->assoc_mapping;
815
816 spin_lock(&buffer_mapping->private_lock);
817 while (!list_empty(list)) {
818 struct buffer_head *bh = BH_ENTRY(list->next);
819 if (buffer_dirty(bh)) {
820 ret = 0;
821 break;
822 }
823 __remove_assoc_queue(bh);
824 }
825 spin_unlock(&buffer_mapping->private_lock);
826 }
827 return ret;
828}
829
830
831
832
833
834
835
836
837
838
839struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
840 int retry)
841{
842 struct buffer_head *bh, *head;
843 long offset;
844
845try_again:
846 head = NULL;
847 offset = PAGE_SIZE;
848 while ((offset -= size) >= 0) {
849 bh = alloc_buffer_head(GFP_NOFS);
850 if (!bh)
851 goto no_grow;
852
853 bh->b_bdev = NULL;
854 bh->b_this_page = head;
855 bh->b_blocknr = -1;
856 head = bh;
857
858 bh->b_state = 0;
859 atomic_set(&bh->b_count, 0);
860 bh->b_size = size;
861
862
863 set_bh_page(bh, page, offset);
864
865 init_buffer(bh, NULL, NULL);
866 }
867 return head;
868
869
870
871no_grow:
872 if (head) {
873 do {
874 bh = head;
875 head = head->b_this_page;
876 free_buffer_head(bh);
877 } while (head);
878 }
879
880
881
882
883
884
885
886 if (!retry)
887 return NULL;
888
889
890
891
892
893
894
895 free_more_memory();
896 goto try_again;
897}
898EXPORT_SYMBOL_GPL(alloc_page_buffers);
899
900static inline void
901link_dev_buffers(struct page *page, struct buffer_head *head)
902{
903 struct buffer_head *bh, *tail;
904
905 bh = head;
906 do {
907 tail = bh;
908 bh = bh->b_this_page;
909 } while (bh);
910 tail->b_this_page = head;
911 attach_page_buffers(page, head);
912}
913
914
915
916
917static void
918init_page_buffers(struct page *page, struct block_device *bdev,
919 sector_t block, int size)
920{
921 struct buffer_head *head = page_buffers(page);
922 struct buffer_head *bh = head;
923 int uptodate = PageUptodate(page);
924
925 do {
926 if (!buffer_mapped(bh)) {
927 init_buffer(bh, NULL, NULL);
928 bh->b_bdev = bdev;
929 bh->b_blocknr = block;
930 if (uptodate)
931 set_buffer_uptodate(bh);
932 set_buffer_mapped(bh);
933 }
934 block++;
935 bh = bh->b_this_page;
936 } while (bh != head);
937}
938
939
940
941
942
943
944static struct page *
945grow_dev_page(struct block_device *bdev, sector_t block,
946 pgoff_t index, int size)
947{
948 struct inode *inode = bdev->bd_inode;
949 struct page *page;
950 struct buffer_head *bh;
951
952 page = find_or_create_page(inode->i_mapping, index,
953 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
954 if (!page)
955 return NULL;
956
957 BUG_ON(!PageLocked(page));
958
959 if (page_has_buffers(page)) {
960 bh = page_buffers(page);
961 if (bh->b_size == size) {
962 init_page_buffers(page, bdev, block, size);
963 return page;
964 }
965 if (!try_to_free_buffers(page))
966 goto failed;
967 }
968
969
970
971
972 bh = alloc_page_buffers(page, size, 0);
973 if (!bh)
974 goto failed;
975
976
977
978
979
980
981 spin_lock(&inode->i_mapping->private_lock);
982 link_dev_buffers(page, bh);
983 init_page_buffers(page, bdev, block, size);
984 spin_unlock(&inode->i_mapping->private_lock);
985 return page;
986
987failed:
988 BUG();
989 unlock_page(page);
990 page_cache_release(page);
991 return NULL;
992}
993
994
995
996
997
998static int
999grow_buffers(struct block_device *bdev, sector_t block, int size)
1000{
1001 struct page *page;
1002 pgoff_t index;
1003 int sizebits;
1004
1005 sizebits = -1;
1006 do {
1007 sizebits++;
1008 } while ((size << sizebits) < PAGE_SIZE);
1009
1010 index = block >> sizebits;
1011
1012
1013
1014
1015
1016 if (unlikely(index != block >> sizebits)) {
1017 char b[BDEVNAME_SIZE];
1018
1019 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1020 "device %s\n",
1021 __func__, (unsigned long long)block,
1022 bdevname(bdev, b));
1023 return -EIO;
1024 }
1025 block = index << sizebits;
1026
1027 page = grow_dev_page(bdev, block, index, size);
1028 if (!page)
1029 return 0;
1030 unlock_page(page);
1031 page_cache_release(page);
1032 return 1;
1033}
1034
1035static struct buffer_head *
1036__getblk_slow(struct block_device *bdev, sector_t block, int size)
1037{
1038
1039 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1040 (size < 512 || size > PAGE_SIZE))) {
1041 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1042 size);
1043 printk(KERN_ERR "logical block size: %d\n",
1044 bdev_logical_block_size(bdev));
1045
1046 dump_stack();
1047 return NULL;
1048 }
1049
1050 for (;;) {
1051 struct buffer_head * bh;
1052 int ret;
1053
1054 bh = __find_get_block(bdev, block, size);
1055 if (bh)
1056 return bh;
1057
1058 ret = grow_buffers(bdev, block, size);
1059 if (ret < 0)
1060 return NULL;
1061 if (ret == 0)
1062 free_more_memory();
1063 }
1064}
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101void mark_buffer_dirty(struct buffer_head *bh)
1102{
1103 WARN_ON_ONCE(!buffer_uptodate(bh));
1104
1105
1106
1107
1108
1109
1110
1111 if (buffer_dirty(bh)) {
1112 smp_mb();
1113 if (buffer_dirty(bh))
1114 return;
1115 }
1116
1117 if (!test_set_buffer_dirty(bh)) {
1118 struct page *page = bh->b_page;
1119 if (!TestSetPageDirty(page)) {
1120 struct address_space *mapping = page_mapping(page);
1121 if (mapping)
1122 __set_page_dirty(page, mapping, 0);
1123 }
1124 }
1125}
1126EXPORT_SYMBOL(mark_buffer_dirty);
1127
1128
1129
1130
1131
1132
1133
1134
1135void __brelse(struct buffer_head * buf)
1136{
1137 if (atomic_read(&buf->b_count)) {
1138 put_bh(buf);
1139 return;
1140 }
1141 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1142}
1143EXPORT_SYMBOL(__brelse);
1144
1145
1146
1147
1148
1149void __bforget(struct buffer_head *bh)
1150{
1151 clear_buffer_dirty(bh);
1152 if (bh->b_assoc_map) {
1153 struct address_space *buffer_mapping = bh->b_page->mapping;
1154
1155 spin_lock(&buffer_mapping->private_lock);
1156 list_del_init(&bh->b_assoc_buffers);
1157 bh->b_assoc_map = NULL;
1158 spin_unlock(&buffer_mapping->private_lock);
1159 }
1160 __brelse(bh);
1161}
1162EXPORT_SYMBOL(__bforget);
1163
1164static struct buffer_head *__bread_slow(struct buffer_head *bh)
1165{
1166 lock_buffer(bh);
1167 if (buffer_uptodate(bh)) {
1168 unlock_buffer(bh);
1169 return bh;
1170 } else {
1171 get_bh(bh);
1172 bh->b_end_io = end_buffer_read_sync;
1173 submit_bh(READ, bh);
1174 wait_on_buffer(bh);
1175 if (buffer_uptodate(bh))
1176 return bh;
1177 }
1178 brelse(bh);
1179 return NULL;
1180}
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196#define BH_LRU_SIZE 8
1197
1198struct bh_lru {
1199 struct buffer_head *bhs[BH_LRU_SIZE];
1200};
1201
1202static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1203
1204#ifdef CONFIG_SMP
1205#define bh_lru_lock() local_irq_disable()
1206#define bh_lru_unlock() local_irq_enable()
1207#else
1208#define bh_lru_lock() preempt_disable()
1209#define bh_lru_unlock() preempt_enable()
1210#endif
1211
1212static inline void check_irqs_on(void)
1213{
1214#ifdef irqs_disabled
1215 BUG_ON(irqs_disabled());
1216#endif
1217}
1218
1219
1220
1221
1222static void bh_lru_install(struct buffer_head *bh)
1223{
1224 struct buffer_head *evictee = NULL;
1225
1226 check_irqs_on();
1227 bh_lru_lock();
1228 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1229 struct buffer_head *bhs[BH_LRU_SIZE];
1230 int in;
1231 int out = 0;
1232
1233 get_bh(bh);
1234 bhs[out++] = bh;
1235 for (in = 0; in < BH_LRU_SIZE; in++) {
1236 struct buffer_head *bh2 =
1237 __this_cpu_read(bh_lrus.bhs[in]);
1238
1239 if (bh2 == bh) {
1240 __brelse(bh2);
1241 } else {
1242 if (out >= BH_LRU_SIZE) {
1243 BUG_ON(evictee != NULL);
1244 evictee = bh2;
1245 } else {
1246 bhs[out++] = bh2;
1247 }
1248 }
1249 }
1250 while (out < BH_LRU_SIZE)
1251 bhs[out++] = NULL;
1252 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1253 }
1254 bh_lru_unlock();
1255
1256 if (evictee)
1257 __brelse(evictee);
1258}
1259
1260
1261
1262
1263static struct buffer_head *
1264lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1265{
1266 struct buffer_head *ret = NULL;
1267 unsigned int i;
1268
1269 check_irqs_on();
1270 bh_lru_lock();
1271 for (i = 0; i < BH_LRU_SIZE; i++) {
1272 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1273
1274 if (bh && bh->b_bdev == bdev &&
1275 bh->b_blocknr == block && bh->b_size == size) {
1276 if (i) {
1277 while (i) {
1278 __this_cpu_write(bh_lrus.bhs[i],
1279 __this_cpu_read(bh_lrus.bhs[i - 1]));
1280 i--;
1281 }
1282 __this_cpu_write(bh_lrus.bhs[0], bh);
1283 }
1284 get_bh(bh);
1285 ret = bh;
1286 break;
1287 }
1288 }
1289 bh_lru_unlock();
1290 return ret;
1291}
1292
1293
1294
1295
1296
1297
1298struct buffer_head *
1299__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1300{
1301 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1302
1303 if (bh == NULL) {
1304 bh = __find_get_block_slow(bdev, block);
1305 if (bh)
1306 bh_lru_install(bh);
1307 }
1308 if (bh)
1309 touch_buffer(bh);
1310 return bh;
1311}
1312EXPORT_SYMBOL(__find_get_block);
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326struct buffer_head *
1327__getblk(struct block_device *bdev, sector_t block, unsigned size)
1328{
1329 struct buffer_head *bh = __find_get_block(bdev, block, size);
1330
1331 might_sleep();
1332 if (bh == NULL)
1333 bh = __getblk_slow(bdev, block, size);
1334 return bh;
1335}
1336EXPORT_SYMBOL(__getblk);
1337
1338
1339
1340
1341void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1342{
1343 struct buffer_head *bh = __getblk(bdev, block, size);
1344 if (likely(bh)) {
1345 ll_rw_block(READA, 1, &bh);
1346 brelse(bh);
1347 }
1348}
1349EXPORT_SYMBOL(__breadahead);
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360struct buffer_head *
1361__bread(struct block_device *bdev, sector_t block, unsigned size)
1362{
1363 struct buffer_head *bh = __getblk(bdev, block, size);
1364
1365 if (likely(bh) && !buffer_uptodate(bh))
1366 bh = __bread_slow(bh);
1367 return bh;
1368}
1369EXPORT_SYMBOL(__bread);
1370
1371
1372
1373
1374
1375
1376static void invalidate_bh_lru(void *arg)
1377{
1378 struct bh_lru *b = &get_cpu_var(bh_lrus);
1379 int i;
1380
1381 for (i = 0; i < BH_LRU_SIZE; i++) {
1382 brelse(b->bhs[i]);
1383 b->bhs[i] = NULL;
1384 }
1385 put_cpu_var(bh_lrus);
1386}
1387
1388void invalidate_bh_lrus(void)
1389{
1390 on_each_cpu(invalidate_bh_lru, NULL, 1);
1391}
1392EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1393
1394void set_bh_page(struct buffer_head *bh,
1395 struct page *page, unsigned long offset)
1396{
1397 bh->b_page = page;
1398 BUG_ON(offset >= PAGE_SIZE);
1399 if (PageHighMem(page))
1400
1401
1402
1403 bh->b_data = (char *)(0 + offset);
1404 else
1405 bh->b_data = page_address(page) + offset;
1406}
1407EXPORT_SYMBOL(set_bh_page);
1408
1409
1410
1411
1412static void discard_buffer(struct buffer_head * bh)
1413{
1414 lock_buffer(bh);
1415 clear_buffer_dirty(bh);
1416 bh->b_bdev = NULL;
1417 clear_buffer_mapped(bh);
1418 clear_buffer_req(bh);
1419 clear_buffer_new(bh);
1420 clear_buffer_delay(bh);
1421 clear_buffer_unwritten(bh);
1422 unlock_buffer(bh);
1423}
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440void block_invalidatepage(struct page *page, unsigned long offset)
1441{
1442 struct buffer_head *head, *bh, *next;
1443 unsigned int curr_off = 0;
1444
1445 BUG_ON(!PageLocked(page));
1446 if (!page_has_buffers(page))
1447 goto out;
1448
1449 head = page_buffers(page);
1450 bh = head;
1451 do {
1452 unsigned int next_off = curr_off + bh->b_size;
1453 next = bh->b_this_page;
1454
1455
1456
1457
1458 if (offset <= curr_off)
1459 discard_buffer(bh);
1460 curr_off = next_off;
1461 bh = next;
1462 } while (bh != head);
1463
1464
1465
1466
1467
1468
1469 if (offset == 0)
1470 try_to_release_page(page, 0);
1471out:
1472 return;
1473}
1474EXPORT_SYMBOL(block_invalidatepage);
1475
1476
1477
1478
1479
1480
1481void create_empty_buffers(struct page *page,
1482 unsigned long blocksize, unsigned long b_state)
1483{
1484 struct buffer_head *bh, *head, *tail;
1485
1486 head = alloc_page_buffers(page, blocksize, 1);
1487 bh = head;
1488 do {
1489 bh->b_state |= b_state;
1490 tail = bh;
1491 bh = bh->b_this_page;
1492 } while (bh);
1493 tail->b_this_page = head;
1494
1495 spin_lock(&page->mapping->private_lock);
1496 if (PageUptodate(page) || PageDirty(page)) {
1497 bh = head;
1498 do {
1499 if (PageDirty(page))
1500 set_buffer_dirty(bh);
1501 if (PageUptodate(page))
1502 set_buffer_uptodate(bh);
1503 bh = bh->b_this_page;
1504 } while (bh != head);
1505 }
1506 attach_page_buffers(page, head);
1507 spin_unlock(&page->mapping->private_lock);
1508}
1509EXPORT_SYMBOL(create_empty_buffers);
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1528{
1529 struct buffer_head *old_bh;
1530
1531 might_sleep();
1532
1533 old_bh = __find_get_block_slow(bdev, block);
1534 if (old_bh) {
1535 clear_buffer_dirty(old_bh);
1536 wait_on_buffer(old_bh);
1537 clear_buffer_req(old_bh);
1538 __brelse(old_bh);
1539 }
1540}
1541EXPORT_SYMBOL(unmap_underlying_metadata);
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572static int __block_write_full_page(struct inode *inode, struct page *page,
1573 get_block_t *get_block, struct writeback_control *wbc,
1574 bh_end_io_t *handler)
1575{
1576 int err;
1577 sector_t block;
1578 sector_t last_block;
1579 struct buffer_head *bh, *head;
1580 const unsigned blocksize = 1 << inode->i_blkbits;
1581 int nr_underway = 0;
1582 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1583 WRITE_SYNC : WRITE);
1584
1585 BUG_ON(!PageLocked(page));
1586
1587 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1588
1589 if (!page_has_buffers(page)) {
1590 create_empty_buffers(page, blocksize,
1591 (1 << BH_Dirty)|(1 << BH_Uptodate));
1592 }
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1605 head = page_buffers(page);
1606 bh = head;
1607
1608
1609
1610
1611
1612 do {
1613 if (block > last_block) {
1614
1615
1616
1617
1618
1619
1620
1621
1622 clear_buffer_dirty(bh);
1623 set_buffer_uptodate(bh);
1624 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1625 buffer_dirty(bh)) {
1626 WARN_ON(bh->b_size != blocksize);
1627 err = get_block(inode, block, bh, 1);
1628 if (err)
1629 goto recover;
1630 clear_buffer_delay(bh);
1631 if (buffer_new(bh)) {
1632
1633 clear_buffer_new(bh);
1634 unmap_underlying_metadata(bh->b_bdev,
1635 bh->b_blocknr);
1636 }
1637 }
1638 bh = bh->b_this_page;
1639 block++;
1640 } while (bh != head);
1641
1642 do {
1643 if (!buffer_mapped(bh))
1644 continue;
1645
1646
1647
1648
1649
1650
1651
1652 if (wbc->sync_mode != WB_SYNC_NONE) {
1653 lock_buffer(bh);
1654 } else if (!trylock_buffer(bh)) {
1655 redirty_page_for_writepage(wbc, page);
1656 continue;
1657 }
1658 if (test_clear_buffer_dirty(bh)) {
1659 mark_buffer_async_write_endio(bh, handler);
1660 } else {
1661 unlock_buffer(bh);
1662 }
1663 } while ((bh = bh->b_this_page) != head);
1664
1665
1666
1667
1668
1669 BUG_ON(PageWriteback(page));
1670 set_page_writeback(page);
1671
1672 do {
1673 struct buffer_head *next = bh->b_this_page;
1674 if (buffer_async_write(bh)) {
1675 submit_bh(write_op, bh);
1676 nr_underway++;
1677 }
1678 bh = next;
1679 } while (bh != head);
1680 unlock_page(page);
1681
1682 err = 0;
1683done:
1684 if (nr_underway == 0) {
1685
1686
1687
1688
1689
1690 end_page_writeback(page);
1691
1692
1693
1694
1695
1696 }
1697 return err;
1698
1699recover:
1700
1701
1702
1703
1704
1705
1706 bh = head;
1707
1708 do {
1709 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1710 !buffer_delay(bh)) {
1711 lock_buffer(bh);
1712 mark_buffer_async_write_endio(bh, handler);
1713 } else {
1714
1715
1716
1717
1718 clear_buffer_dirty(bh);
1719 }
1720 } while ((bh = bh->b_this_page) != head);
1721 SetPageError(page);
1722 BUG_ON(PageWriteback(page));
1723 mapping_set_error(page->mapping, err);
1724 set_page_writeback(page);
1725 do {
1726 struct buffer_head *next = bh->b_this_page;
1727 if (buffer_async_write(bh)) {
1728 clear_buffer_dirty(bh);
1729 submit_bh(write_op, bh);
1730 nr_underway++;
1731 }
1732 bh = next;
1733 } while (bh != head);
1734 unlock_page(page);
1735 goto done;
1736}
1737
1738
1739
1740
1741
1742
1743void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1744{
1745 unsigned int block_start, block_end;
1746 struct buffer_head *head, *bh;
1747
1748 BUG_ON(!PageLocked(page));
1749 if (!page_has_buffers(page))
1750 return;
1751
1752 bh = head = page_buffers(page);
1753 block_start = 0;
1754 do {
1755 block_end = block_start + bh->b_size;
1756
1757 if (buffer_new(bh)) {
1758 if (block_end > from && block_start < to) {
1759 if (!PageUptodate(page)) {
1760 unsigned start, size;
1761
1762 start = max(from, block_start);
1763 size = min(to, block_end) - start;
1764
1765 zero_user(page, start, size);
1766 set_buffer_uptodate(bh);
1767 }
1768
1769 clear_buffer_new(bh);
1770 mark_buffer_dirty(bh);
1771 }
1772 }
1773
1774 block_start = block_end;
1775 bh = bh->b_this_page;
1776 } while (bh != head);
1777}
1778EXPORT_SYMBOL(page_zero_new_buffers);
1779
1780int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1781 get_block_t *get_block)
1782{
1783 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1784 unsigned to = from + len;
1785 struct inode *inode = page->mapping->host;
1786 unsigned block_start, block_end;
1787 sector_t block;
1788 int err = 0;
1789 unsigned blocksize, bbits;
1790 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1791
1792 BUG_ON(!PageLocked(page));
1793 BUG_ON(from > PAGE_CACHE_SIZE);
1794 BUG_ON(to > PAGE_CACHE_SIZE);
1795 BUG_ON(from > to);
1796
1797 blocksize = 1 << inode->i_blkbits;
1798 if (!page_has_buffers(page))
1799 create_empty_buffers(page, blocksize, 0);
1800 head = page_buffers(page);
1801
1802 bbits = inode->i_blkbits;
1803 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1804
1805 for(bh = head, block_start = 0; bh != head || !block_start;
1806 block++, block_start=block_end, bh = bh->b_this_page) {
1807 block_end = block_start + blocksize;
1808 if (block_end <= from || block_start >= to) {
1809 if (PageUptodate(page)) {
1810 if (!buffer_uptodate(bh))
1811 set_buffer_uptodate(bh);
1812 }
1813 continue;
1814 }
1815 if (buffer_new(bh))
1816 clear_buffer_new(bh);
1817 if (!buffer_mapped(bh)) {
1818 WARN_ON(bh->b_size != blocksize);
1819 err = get_block(inode, block, bh, 1);
1820 if (err)
1821 break;
1822 if (buffer_new(bh)) {
1823 unmap_underlying_metadata(bh->b_bdev,
1824 bh->b_blocknr);
1825 if (PageUptodate(page)) {
1826 clear_buffer_new(bh);
1827 set_buffer_uptodate(bh);
1828 mark_buffer_dirty(bh);
1829 continue;
1830 }
1831 if (block_end > to || block_start < from)
1832 zero_user_segments(page,
1833 to, block_end,
1834 block_start, from);
1835 continue;
1836 }
1837 }
1838 if (PageUptodate(page)) {
1839 if (!buffer_uptodate(bh))
1840 set_buffer_uptodate(bh);
1841 continue;
1842 }
1843 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1844 !buffer_unwritten(bh) &&
1845 (block_start < from || block_end > to)) {
1846 ll_rw_block(READ, 1, &bh);
1847 *wait_bh++=bh;
1848 }
1849 }
1850
1851
1852
1853 while(wait_bh > wait) {
1854 wait_on_buffer(*--wait_bh);
1855 if (!buffer_uptodate(*wait_bh))
1856 err = -EIO;
1857 }
1858 if (unlikely(err))
1859 page_zero_new_buffers(page, from, to);
1860 return err;
1861}
1862EXPORT_SYMBOL(__block_write_begin);
1863
1864static int __block_commit_write(struct inode *inode, struct page *page,
1865 unsigned from, unsigned to)
1866{
1867 unsigned block_start, block_end;
1868 int partial = 0;
1869 unsigned blocksize;
1870 struct buffer_head *bh, *head;
1871
1872 blocksize = 1 << inode->i_blkbits;
1873
1874 for(bh = head = page_buffers(page), block_start = 0;
1875 bh != head || !block_start;
1876 block_start=block_end, bh = bh->b_this_page) {
1877 block_end = block_start + blocksize;
1878 if (block_end <= from || block_start >= to) {
1879 if (!buffer_uptodate(bh))
1880 partial = 1;
1881 } else {
1882 set_buffer_uptodate(bh);
1883 mark_buffer_dirty(bh);
1884 }
1885 clear_buffer_new(bh);
1886 }
1887
1888
1889
1890
1891
1892
1893
1894 if (!partial)
1895 SetPageUptodate(page);
1896 return 0;
1897}
1898
1899
1900
1901
1902
1903
1904
1905int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
1906 unsigned flags, struct page **pagep, get_block_t *get_block)
1907{
1908 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1909 struct page *page;
1910 int status;
1911
1912 page = grab_cache_page_write_begin(mapping, index, flags);
1913 if (!page)
1914 return -ENOMEM;
1915
1916 status = __block_write_begin(page, pos, len, get_block);
1917 if (unlikely(status)) {
1918 unlock_page(page);
1919 page_cache_release(page);
1920 page = NULL;
1921 }
1922
1923 *pagep = page;
1924 return status;
1925}
1926EXPORT_SYMBOL(block_write_begin);
1927
1928int block_write_end(struct file *file, struct address_space *mapping,
1929 loff_t pos, unsigned len, unsigned copied,
1930 struct page *page, void *fsdata)
1931{
1932 struct inode *inode = mapping->host;
1933 unsigned start;
1934
1935 start = pos & (PAGE_CACHE_SIZE - 1);
1936
1937 if (unlikely(copied < len)) {
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950 if (!PageUptodate(page))
1951 copied = 0;
1952
1953 page_zero_new_buffers(page, start+copied, start+len);
1954 }
1955 flush_dcache_page(page);
1956
1957
1958 __block_commit_write(inode, page, start, start+copied);
1959
1960 return copied;
1961}
1962EXPORT_SYMBOL(block_write_end);
1963
1964int generic_write_end(struct file *file, struct address_space *mapping,
1965 loff_t pos, unsigned len, unsigned copied,
1966 struct page *page, void *fsdata)
1967{
1968 struct inode *inode = mapping->host;
1969 int i_size_changed = 0;
1970
1971 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1972
1973
1974
1975
1976
1977
1978
1979
1980 if (pos+copied > inode->i_size) {
1981 i_size_write(inode, pos+copied);
1982 i_size_changed = 1;
1983 }
1984
1985 unlock_page(page);
1986 page_cache_release(page);
1987
1988
1989
1990
1991
1992
1993
1994 if (i_size_changed)
1995 mark_inode_dirty(inode);
1996
1997 return copied;
1998}
1999EXPORT_SYMBOL(generic_write_end);
2000
2001
2002
2003
2004
2005
2006
2007
2008int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2009 unsigned long from)
2010{
2011 struct inode *inode = page->mapping->host;
2012 unsigned block_start, block_end, blocksize;
2013 unsigned to;
2014 struct buffer_head *bh, *head;
2015 int ret = 1;
2016
2017 if (!page_has_buffers(page))
2018 return 0;
2019
2020 blocksize = 1 << inode->i_blkbits;
2021 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2022 to = from + to;
2023 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2024 return 0;
2025
2026 head = page_buffers(page);
2027 bh = head;
2028 block_start = 0;
2029 do {
2030 block_end = block_start + blocksize;
2031 if (block_end > from && block_start < to) {
2032 if (!buffer_uptodate(bh)) {
2033 ret = 0;
2034 break;
2035 }
2036 if (block_end >= to)
2037 break;
2038 }
2039 block_start = block_end;
2040 bh = bh->b_this_page;
2041 } while (bh != head);
2042
2043 return ret;
2044}
2045EXPORT_SYMBOL(block_is_partially_uptodate);
2046
2047
2048
2049
2050
2051
2052
2053
2054int block_read_full_page(struct page *page, get_block_t *get_block)
2055{
2056 struct inode *inode = page->mapping->host;
2057 sector_t iblock, lblock;
2058 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2059 unsigned int blocksize;
2060 int nr, i;
2061 int fully_mapped = 1;
2062
2063 BUG_ON(!PageLocked(page));
2064 blocksize = 1 << inode->i_blkbits;
2065 if (!page_has_buffers(page))
2066 create_empty_buffers(page, blocksize, 0);
2067 head = page_buffers(page);
2068
2069 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2070 lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
2071 bh = head;
2072 nr = 0;
2073 i = 0;
2074
2075 do {
2076 if (buffer_uptodate(bh))
2077 continue;
2078
2079 if (!buffer_mapped(bh)) {
2080 int err = 0;
2081
2082 fully_mapped = 0;
2083 if (iblock < lblock) {
2084 WARN_ON(bh->b_size != blocksize);
2085 err = get_block(inode, iblock, bh, 0);
2086 if (err)
2087 SetPageError(page);
2088 }
2089 if (!buffer_mapped(bh)) {
2090 zero_user(page, i * blocksize, blocksize);
2091 if (!err)
2092 set_buffer_uptodate(bh);
2093 continue;
2094 }
2095
2096
2097
2098
2099 if (buffer_uptodate(bh))
2100 continue;
2101 }
2102 arr[nr++] = bh;
2103 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2104
2105 if (fully_mapped)
2106 SetPageMappedToDisk(page);
2107
2108 if (!nr) {
2109
2110
2111
2112
2113 if (!PageError(page))
2114 SetPageUptodate(page);
2115 unlock_page(page);
2116 return 0;
2117 }
2118
2119
2120 for (i = 0; i < nr; i++) {
2121 bh = arr[i];
2122 lock_buffer(bh);
2123 mark_buffer_async_read(bh);
2124 }
2125
2126
2127
2128
2129
2130
2131 for (i = 0; i < nr; i++) {
2132 bh = arr[i];
2133 if (buffer_uptodate(bh))
2134 end_buffer_async_read(bh, 1);
2135 else
2136 submit_bh(READ, bh);
2137 }
2138 return 0;
2139}
2140EXPORT_SYMBOL(block_read_full_page);
2141
2142
2143
2144
2145
2146int generic_cont_expand_simple(struct inode *inode, loff_t size)
2147{
2148 struct address_space *mapping = inode->i_mapping;
2149 struct page *page;
2150 void *fsdata;
2151 int err;
2152
2153 err = inode_newsize_ok(inode, size);
2154 if (err)
2155 goto out;
2156
2157 err = pagecache_write_begin(NULL, mapping, size, 0,
2158 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2159 &page, &fsdata);
2160 if (err)
2161 goto out;
2162
2163 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2164 BUG_ON(err > 0);
2165
2166out:
2167 return err;
2168}
2169EXPORT_SYMBOL(generic_cont_expand_simple);
2170
2171static int cont_expand_zero(struct file *file, struct address_space *mapping,
2172 loff_t pos, loff_t *bytes)
2173{
2174 struct inode *inode = mapping->host;
2175 unsigned blocksize = 1 << inode->i_blkbits;
2176 struct page *page;
2177 void *fsdata;
2178 pgoff_t index, curidx;
2179 loff_t curpos;
2180 unsigned zerofrom, offset, len;
2181 int err = 0;
2182
2183 index = pos >> PAGE_CACHE_SHIFT;
2184 offset = pos & ~PAGE_CACHE_MASK;
2185
2186 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2187 zerofrom = curpos & ~PAGE_CACHE_MASK;
2188 if (zerofrom & (blocksize-1)) {
2189 *bytes |= (blocksize-1);
2190 (*bytes)++;
2191 }
2192 len = PAGE_CACHE_SIZE - zerofrom;
2193
2194 err = pagecache_write_begin(file, mapping, curpos, len,
2195 AOP_FLAG_UNINTERRUPTIBLE,
2196 &page, &fsdata);
2197 if (err)
2198 goto out;
2199 zero_user(page, zerofrom, len);
2200 err = pagecache_write_end(file, mapping, curpos, len, len,
2201 page, fsdata);
2202 if (err < 0)
2203 goto out;
2204 BUG_ON(err != len);
2205 err = 0;
2206
2207 balance_dirty_pages_ratelimited(mapping);
2208 }
2209
2210
2211 if (index == curidx) {
2212 zerofrom = curpos & ~PAGE_CACHE_MASK;
2213
2214 if (offset <= zerofrom) {
2215 goto out;
2216 }
2217 if (zerofrom & (blocksize-1)) {
2218 *bytes |= (blocksize-1);
2219 (*bytes)++;
2220 }
2221 len = offset - zerofrom;
2222
2223 err = pagecache_write_begin(file, mapping, curpos, len,
2224 AOP_FLAG_UNINTERRUPTIBLE,
2225 &page, &fsdata);
2226 if (err)
2227 goto out;
2228 zero_user(page, zerofrom, len);
2229 err = pagecache_write_end(file, mapping, curpos, len, len,
2230 page, fsdata);
2231 if (err < 0)
2232 goto out;
2233 BUG_ON(err != len);
2234 err = 0;
2235 }
2236out:
2237 return err;
2238}
2239
2240
2241
2242
2243
2244int cont_write_begin(struct file *file, struct address_space *mapping,
2245 loff_t pos, unsigned len, unsigned flags,
2246 struct page **pagep, void **fsdata,
2247 get_block_t *get_block, loff_t *bytes)
2248{
2249 struct inode *inode = mapping->host;
2250 unsigned blocksize = 1 << inode->i_blkbits;
2251 unsigned zerofrom;
2252 int err;
2253
2254 err = cont_expand_zero(file, mapping, pos, bytes);
2255 if (err)
2256 return err;
2257
2258 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2259 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2260 *bytes |= (blocksize-1);
2261 (*bytes)++;
2262 }
2263
2264 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2265}
2266EXPORT_SYMBOL(cont_write_begin);
2267
2268int block_commit_write(struct page *page, unsigned from, unsigned to)
2269{
2270 struct inode *inode = page->mapping->host;
2271 __block_commit_write(inode,page,from,to);
2272 return 0;
2273}
2274EXPORT_SYMBOL(block_commit_write);
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2295 get_block_t get_block)
2296{
2297 struct page *page = vmf->page;
2298 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
2299 unsigned long end;
2300 loff_t size;
2301 int ret;
2302
2303 lock_page(page);
2304 size = i_size_read(inode);
2305 if ((page->mapping != inode->i_mapping) ||
2306 (page_offset(page) > size)) {
2307
2308 ret = -EFAULT;
2309 goto out_unlock;
2310 }
2311
2312
2313 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
2314 end = size & ~PAGE_CACHE_MASK;
2315 else
2316 end = PAGE_CACHE_SIZE;
2317
2318 ret = __block_write_begin(page, 0, end, get_block);
2319 if (!ret)
2320 ret = block_commit_write(page, 0, end);
2321
2322 if (unlikely(ret < 0))
2323 goto out_unlock;
2324
2325
2326
2327
2328
2329
2330
2331 set_page_dirty(page);
2332 if (inode->i_sb->s_frozen != SB_UNFROZEN) {
2333 ret = -EAGAIN;
2334 goto out_unlock;
2335 }
2336 wait_on_page_writeback(page);
2337 return 0;
2338out_unlock:
2339 unlock_page(page);
2340 return ret;
2341}
2342EXPORT_SYMBOL(__block_page_mkwrite);
2343
2344int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2345 get_block_t get_block)
2346{
2347 int ret;
2348 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2349
2350
2351
2352
2353
2354 vfs_check_frozen(sb, SB_FREEZE_WRITE);
2355 ret = __block_page_mkwrite(vma, vmf, get_block);
2356 return block_page_mkwrite_return(ret);
2357}
2358EXPORT_SYMBOL(block_page_mkwrite);
2359
2360
2361
2362
2363
2364
2365static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2366{
2367 __end_buffer_read_notouch(bh, uptodate);
2368}
2369
2370
2371
2372
2373
2374
2375static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2376{
2377 struct buffer_head *bh;
2378
2379 BUG_ON(!PageLocked(page));
2380
2381 spin_lock(&page->mapping->private_lock);
2382 bh = head;
2383 do {
2384 if (PageDirty(page))
2385 set_buffer_dirty(bh);
2386 if (!bh->b_this_page)
2387 bh->b_this_page = head;
2388 bh = bh->b_this_page;
2389 } while (bh != head);
2390 attach_page_buffers(page, head);
2391 spin_unlock(&page->mapping->private_lock);
2392}
2393
2394
2395
2396
2397
2398
2399int nobh_write_begin(struct address_space *mapping,
2400 loff_t pos, unsigned len, unsigned flags,
2401 struct page **pagep, void **fsdata,
2402 get_block_t *get_block)
2403{
2404 struct inode *inode = mapping->host;
2405 const unsigned blkbits = inode->i_blkbits;
2406 const unsigned blocksize = 1 << blkbits;
2407 struct buffer_head *head, *bh;
2408 struct page *page;
2409 pgoff_t index;
2410 unsigned from, to;
2411 unsigned block_in_page;
2412 unsigned block_start, block_end;
2413 sector_t block_in_file;
2414 int nr_reads = 0;
2415 int ret = 0;
2416 int is_mapped_to_disk = 1;
2417
2418 index = pos >> PAGE_CACHE_SHIFT;
2419 from = pos & (PAGE_CACHE_SIZE - 1);
2420 to = from + len;
2421
2422 page = grab_cache_page_write_begin(mapping, index, flags);
2423 if (!page)
2424 return -ENOMEM;
2425 *pagep = page;
2426 *fsdata = NULL;
2427
2428 if (page_has_buffers(page)) {
2429 ret = __block_write_begin(page, pos, len, get_block);
2430 if (unlikely(ret))
2431 goto out_release;
2432 return ret;
2433 }
2434
2435 if (PageMappedToDisk(page))
2436 return 0;
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447 head = alloc_page_buffers(page, blocksize, 0);
2448 if (!head) {
2449 ret = -ENOMEM;
2450 goto out_release;
2451 }
2452
2453 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2454
2455
2456
2457
2458
2459
2460 for (block_start = 0, block_in_page = 0, bh = head;
2461 block_start < PAGE_CACHE_SIZE;
2462 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2463 int create;
2464
2465 block_end = block_start + blocksize;
2466 bh->b_state = 0;
2467 create = 1;
2468 if (block_start >= to)
2469 create = 0;
2470 ret = get_block(inode, block_in_file + block_in_page,
2471 bh, create);
2472 if (ret)
2473 goto failed;
2474 if (!buffer_mapped(bh))
2475 is_mapped_to_disk = 0;
2476 if (buffer_new(bh))
2477 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2478 if (PageUptodate(page)) {
2479 set_buffer_uptodate(bh);
2480 continue;
2481 }
2482 if (buffer_new(bh) || !buffer_mapped(bh)) {
2483 zero_user_segments(page, block_start, from,
2484 to, block_end);
2485 continue;
2486 }
2487 if (buffer_uptodate(bh))
2488 continue;
2489 if (block_start < from || block_end > to) {
2490 lock_buffer(bh);
2491 bh->b_end_io = end_buffer_read_nobh;
2492 submit_bh(READ, bh);
2493 nr_reads++;
2494 }
2495 }
2496
2497 if (nr_reads) {
2498
2499
2500
2501
2502
2503 for (bh = head; bh; bh = bh->b_this_page) {
2504 wait_on_buffer(bh);
2505 if (!buffer_uptodate(bh))
2506 ret = -EIO;
2507 }
2508 if (ret)
2509 goto failed;
2510 }
2511
2512 if (is_mapped_to_disk)
2513 SetPageMappedToDisk(page);
2514
2515 *fsdata = head;
2516
2517 return 0;
2518
2519failed:
2520 BUG_ON(!ret);
2521
2522
2523
2524
2525
2526
2527
2528 attach_nobh_buffers(page, head);
2529 page_zero_new_buffers(page, from, to);
2530
2531out_release:
2532 unlock_page(page);
2533 page_cache_release(page);
2534 *pagep = NULL;
2535
2536 return ret;
2537}
2538EXPORT_SYMBOL(nobh_write_begin);
2539
2540int nobh_write_end(struct file *file, struct address_space *mapping,
2541 loff_t pos, unsigned len, unsigned copied,
2542 struct page *page, void *fsdata)
2543{
2544 struct inode *inode = page->mapping->host;
2545 struct buffer_head *head = fsdata;
2546 struct buffer_head *bh;
2547 BUG_ON(fsdata != NULL && page_has_buffers(page));
2548
2549 if (unlikely(copied < len) && head)
2550 attach_nobh_buffers(page, head);
2551 if (page_has_buffers(page))
2552 return generic_write_end(file, mapping, pos, len,
2553 copied, page, fsdata);
2554
2555 SetPageUptodate(page);
2556 set_page_dirty(page);
2557 if (pos+copied > inode->i_size) {
2558 i_size_write(inode, pos+copied);
2559 mark_inode_dirty(inode);
2560 }
2561
2562 unlock_page(page);
2563 page_cache_release(page);
2564
2565 while (head) {
2566 bh = head;
2567 head = head->b_this_page;
2568 free_buffer_head(bh);
2569 }
2570
2571 return copied;
2572}
2573EXPORT_SYMBOL(nobh_write_end);
2574
2575
2576
2577
2578
2579
2580int nobh_writepage(struct page *page, get_block_t *get_block,
2581 struct writeback_control *wbc)
2582{
2583 struct inode * const inode = page->mapping->host;
2584 loff_t i_size = i_size_read(inode);
2585 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2586 unsigned offset;
2587 int ret;
2588
2589
2590 if (page->index < end_index)
2591 goto out;
2592
2593
2594 offset = i_size & (PAGE_CACHE_SIZE-1);
2595 if (page->index >= end_index+1 || !offset) {
2596
2597
2598
2599
2600
2601#if 0
2602
2603 if (page->mapping->a_ops->invalidatepage)
2604 page->mapping->a_ops->invalidatepage(page, offset);
2605#endif
2606 unlock_page(page);
2607 return 0;
2608 }
2609
2610
2611
2612
2613
2614
2615
2616
2617 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2618out:
2619 ret = mpage_writepage(page, get_block, wbc);
2620 if (ret == -EAGAIN)
2621 ret = __block_write_full_page(inode, page, get_block, wbc,
2622 end_buffer_async_write);
2623 return ret;
2624}
2625EXPORT_SYMBOL(nobh_writepage);
2626
2627int nobh_truncate_page(struct address_space *mapping,
2628 loff_t from, get_block_t *get_block)
2629{
2630 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2631 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2632 unsigned blocksize;
2633 sector_t iblock;
2634 unsigned length, pos;
2635 struct inode *inode = mapping->host;
2636 struct page *page;
2637 struct buffer_head map_bh;
2638 int err;
2639
2640 blocksize = 1 << inode->i_blkbits;
2641 length = offset & (blocksize - 1);
2642
2643
2644 if (!length)
2645 return 0;
2646
2647 length = blocksize - length;
2648 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2649
2650 page = grab_cache_page(mapping, index);
2651 err = -ENOMEM;
2652 if (!page)
2653 goto out;
2654
2655 if (page_has_buffers(page)) {
2656has_buffers:
2657 unlock_page(page);
2658 page_cache_release(page);
2659 return block_truncate_page(mapping, from, get_block);
2660 }
2661
2662
2663 pos = blocksize;
2664 while (offset >= pos) {
2665 iblock++;
2666 pos += blocksize;
2667 }
2668
2669 map_bh.b_size = blocksize;
2670 map_bh.b_state = 0;
2671 err = get_block(inode, iblock, &map_bh, 0);
2672 if (err)
2673 goto unlock;
2674
2675 if (!buffer_mapped(&map_bh))
2676 goto unlock;
2677
2678
2679 if (!PageUptodate(page)) {
2680 err = mapping->a_ops->readpage(NULL, page);
2681 if (err) {
2682 page_cache_release(page);
2683 goto out;
2684 }
2685 lock_page(page);
2686 if (!PageUptodate(page)) {
2687 err = -EIO;
2688 goto unlock;
2689 }
2690 if (page_has_buffers(page))
2691 goto has_buffers;
2692 }
2693 zero_user(page, offset, length);
2694 set_page_dirty(page);
2695 err = 0;
2696
2697unlock:
2698 unlock_page(page);
2699 page_cache_release(page);
2700out:
2701 return err;
2702}
2703EXPORT_SYMBOL(nobh_truncate_page);
2704
2705int block_truncate_page(struct address_space *mapping,
2706 loff_t from, get_block_t *get_block)
2707{
2708 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2709 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2710 unsigned blocksize;
2711 sector_t iblock;
2712 unsigned length, pos;
2713 struct inode *inode = mapping->host;
2714 struct page *page;
2715 struct buffer_head *bh;
2716 int err;
2717
2718 blocksize = 1 << inode->i_blkbits;
2719 length = offset & (blocksize - 1);
2720
2721
2722 if (!length)
2723 return 0;
2724
2725 length = blocksize - length;
2726 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2727
2728 page = grab_cache_page(mapping, index);
2729 err = -ENOMEM;
2730 if (!page)
2731 goto out;
2732
2733 if (!page_has_buffers(page))
2734 create_empty_buffers(page, blocksize, 0);
2735
2736
2737 bh = page_buffers(page);
2738 pos = blocksize;
2739 while (offset >= pos) {
2740 bh = bh->b_this_page;
2741 iblock++;
2742 pos += blocksize;
2743 }
2744
2745 err = 0;
2746 if (!buffer_mapped(bh)) {
2747 WARN_ON(bh->b_size != blocksize);
2748 err = get_block(inode, iblock, bh, 0);
2749 if (err)
2750 goto unlock;
2751
2752 if (!buffer_mapped(bh))
2753 goto unlock;
2754 }
2755
2756
2757 if (PageUptodate(page))
2758 set_buffer_uptodate(bh);
2759
2760 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2761 err = -EIO;
2762 ll_rw_block(READ, 1, &bh);
2763 wait_on_buffer(bh);
2764
2765 if (!buffer_uptodate(bh))
2766 goto unlock;
2767 }
2768
2769 zero_user(page, offset, length);
2770 mark_buffer_dirty(bh);
2771 err = 0;
2772
2773unlock:
2774 unlock_page(page);
2775 page_cache_release(page);
2776out:
2777 return err;
2778}
2779EXPORT_SYMBOL(block_truncate_page);
2780
2781
2782
2783
2784
2785int block_write_full_page_endio(struct page *page, get_block_t *get_block,
2786 struct writeback_control *wbc, bh_end_io_t *handler)
2787{
2788 struct inode * const inode = page->mapping->host;
2789 loff_t i_size = i_size_read(inode);
2790 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2791 unsigned offset;
2792
2793
2794 if (page->index < end_index)
2795 return __block_write_full_page(inode, page, get_block, wbc,
2796 handler);
2797
2798
2799 offset = i_size & (PAGE_CACHE_SIZE-1);
2800 if (page->index >= end_index+1 || !offset) {
2801
2802
2803
2804
2805
2806 do_invalidatepage(page, 0);
2807 unlock_page(page);
2808 return 0;
2809 }
2810
2811
2812
2813
2814
2815
2816
2817
2818 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2819 return __block_write_full_page(inode, page, get_block, wbc, handler);
2820}
2821EXPORT_SYMBOL(block_write_full_page_endio);
2822
2823
2824
2825
2826int block_write_full_page(struct page *page, get_block_t *get_block,
2827 struct writeback_control *wbc)
2828{
2829 return block_write_full_page_endio(page, get_block, wbc,
2830 end_buffer_async_write);
2831}
2832EXPORT_SYMBOL(block_write_full_page);
2833
2834sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2835 get_block_t *get_block)
2836{
2837 struct buffer_head tmp;
2838 struct inode *inode = mapping->host;
2839 tmp.b_state = 0;
2840 tmp.b_blocknr = 0;
2841 tmp.b_size = 1 << inode->i_blkbits;
2842 get_block(inode, block, &tmp, 0);
2843 return tmp.b_blocknr;
2844}
2845EXPORT_SYMBOL(generic_block_bmap);
2846
2847static void end_bio_bh_io_sync(struct bio *bio, int err)
2848{
2849 struct buffer_head *bh = bio->bi_private;
2850
2851 if (err == -EOPNOTSUPP) {
2852 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2853 }
2854
2855 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2856 set_bit(BH_Quiet, &bh->b_state);
2857
2858 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2859 bio_put(bio);
2860}
2861
2862int submit_bh(int rw, struct buffer_head * bh)
2863{
2864 struct bio *bio;
2865 int ret = 0;
2866
2867 BUG_ON(!buffer_locked(bh));
2868 BUG_ON(!buffer_mapped(bh));
2869 BUG_ON(!bh->b_end_io);
2870 BUG_ON(buffer_delay(bh));
2871 BUG_ON(buffer_unwritten(bh));
2872
2873
2874
2875
2876 if (test_set_buffer_req(bh) && (rw & WRITE))
2877 clear_buffer_write_io_error(bh);
2878
2879
2880
2881
2882
2883 bio = bio_alloc(GFP_NOIO, 1);
2884
2885 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
2886 bio->bi_bdev = bh->b_bdev;
2887 bio->bi_io_vec[0].bv_page = bh->b_page;
2888 bio->bi_io_vec[0].bv_len = bh->b_size;
2889 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
2890
2891 bio->bi_vcnt = 1;
2892 bio->bi_idx = 0;
2893 bio->bi_size = bh->b_size;
2894
2895 bio->bi_end_io = end_bio_bh_io_sync;
2896 bio->bi_private = bh;
2897
2898 bio_get(bio);
2899 submit_bio(rw, bio);
2900
2901 if (bio_flagged(bio, BIO_EOPNOTSUPP))
2902 ret = -EOPNOTSUPP;
2903
2904 bio_put(bio);
2905 return ret;
2906}
2907EXPORT_SYMBOL(submit_bh);
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2935{
2936 int i;
2937
2938 for (i = 0; i < nr; i++) {
2939 struct buffer_head *bh = bhs[i];
2940
2941 if (!trylock_buffer(bh))
2942 continue;
2943 if (rw == WRITE) {
2944 if (test_clear_buffer_dirty(bh)) {
2945 bh->b_end_io = end_buffer_write_sync;
2946 get_bh(bh);
2947 submit_bh(WRITE, bh);
2948 continue;
2949 }
2950 } else {
2951 if (!buffer_uptodate(bh)) {
2952 bh->b_end_io = end_buffer_read_sync;
2953 get_bh(bh);
2954 submit_bh(rw, bh);
2955 continue;
2956 }
2957 }
2958 unlock_buffer(bh);
2959 }
2960}
2961EXPORT_SYMBOL(ll_rw_block);
2962
2963void write_dirty_buffer(struct buffer_head *bh, int rw)
2964{
2965 lock_buffer(bh);
2966 if (!test_clear_buffer_dirty(bh)) {
2967 unlock_buffer(bh);
2968 return;
2969 }
2970 bh->b_end_io = end_buffer_write_sync;
2971 get_bh(bh);
2972 submit_bh(rw, bh);
2973}
2974EXPORT_SYMBOL(write_dirty_buffer);
2975
2976
2977
2978
2979
2980
2981int __sync_dirty_buffer(struct buffer_head *bh, int rw)
2982{
2983 int ret = 0;
2984
2985 WARN_ON(atomic_read(&bh->b_count) < 1);
2986 lock_buffer(bh);
2987 if (test_clear_buffer_dirty(bh)) {
2988 get_bh(bh);
2989 bh->b_end_io = end_buffer_write_sync;
2990 ret = submit_bh(rw, bh);
2991 wait_on_buffer(bh);
2992 if (!ret && !buffer_uptodate(bh))
2993 ret = -EIO;
2994 } else {
2995 unlock_buffer(bh);
2996 }
2997 return ret;
2998}
2999EXPORT_SYMBOL(__sync_dirty_buffer);
3000
3001int sync_dirty_buffer(struct buffer_head *bh)
3002{
3003 return __sync_dirty_buffer(bh, WRITE_SYNC);
3004}
3005EXPORT_SYMBOL(sync_dirty_buffer);
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027static inline int buffer_busy(struct buffer_head *bh)
3028{
3029 return atomic_read(&bh->b_count) |
3030 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3031}
3032
3033static int
3034drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3035{
3036 struct buffer_head *head = page_buffers(page);
3037 struct buffer_head *bh;
3038
3039 bh = head;
3040 do {
3041 if (buffer_write_io_error(bh) && page->mapping)
3042 set_bit(AS_EIO, &page->mapping->flags);
3043 if (buffer_busy(bh))
3044 goto failed;
3045 bh = bh->b_this_page;
3046 } while (bh != head);
3047
3048 do {
3049 struct buffer_head *next = bh->b_this_page;
3050
3051 if (bh->b_assoc_map)
3052 __remove_assoc_queue(bh);
3053 bh = next;
3054 } while (bh != head);
3055 *buffers_to_free = head;
3056 __clear_page_buffers(page);
3057 return 1;
3058failed:
3059 return 0;
3060}
3061
3062int try_to_free_buffers(struct page *page)
3063{
3064 struct address_space * const mapping = page->mapping;
3065 struct buffer_head *buffers_to_free = NULL;
3066 int ret = 0;
3067
3068 BUG_ON(!PageLocked(page));
3069 if (PageWriteback(page))
3070 return 0;
3071
3072 if (mapping == NULL) {
3073 ret = drop_buffers(page, &buffers_to_free);
3074 goto out;
3075 }
3076
3077 spin_lock(&mapping->private_lock);
3078 ret = drop_buffers(page, &buffers_to_free);
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094 if (ret)
3095 cancel_dirty_page(page, PAGE_CACHE_SIZE);
3096 spin_unlock(&mapping->private_lock);
3097out:
3098 if (buffers_to_free) {
3099 struct buffer_head *bh = buffers_to_free;
3100
3101 do {
3102 struct buffer_head *next = bh->b_this_page;
3103 free_buffer_head(bh);
3104 bh = next;
3105 } while (bh != buffers_to_free);
3106 }
3107 return ret;
3108}
3109EXPORT_SYMBOL(try_to_free_buffers);
3110
3111
3112
3113
3114
3115
3116
3117
3118SYSCALL_DEFINE2(bdflush, int, func, long, data)
3119{
3120 static int msg_count;
3121
3122 if (!capable(CAP_SYS_ADMIN))
3123 return -EPERM;
3124
3125 if (msg_count < 5) {
3126 msg_count++;
3127 printk(KERN_INFO
3128 "warning: process `%s' used the obsolete bdflush"
3129 " system call\n", current->comm);
3130 printk(KERN_INFO "Fix your initscripts?\n");
3131 }
3132
3133 if (func == 1)
3134 do_exit(0);
3135 return 0;
3136}
3137
3138
3139
3140
3141static struct kmem_cache *bh_cachep;
3142
3143
3144
3145
3146
3147static int max_buffer_heads;
3148
3149int buffer_heads_over_limit;
3150
3151struct bh_accounting {
3152 int nr;
3153 int ratelimit;
3154};
3155
3156static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3157
3158static void recalc_bh_state(void)
3159{
3160 int i;
3161 int tot = 0;
3162
3163 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3164 return;
3165 __this_cpu_write(bh_accounting.ratelimit, 0);
3166 for_each_online_cpu(i)
3167 tot += per_cpu(bh_accounting, i).nr;
3168 buffer_heads_over_limit = (tot > max_buffer_heads);
3169}
3170
3171struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3172{
3173 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3174 if (ret) {
3175 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3176 preempt_disable();
3177 __this_cpu_inc(bh_accounting.nr);
3178 recalc_bh_state();
3179 preempt_enable();
3180 }
3181 return ret;
3182}
3183EXPORT_SYMBOL(alloc_buffer_head);
3184
3185void free_buffer_head(struct buffer_head *bh)
3186{
3187 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3188 kmem_cache_free(bh_cachep, bh);
3189 preempt_disable();
3190 __this_cpu_dec(bh_accounting.nr);
3191 recalc_bh_state();
3192 preempt_enable();
3193}
3194EXPORT_SYMBOL(free_buffer_head);
3195
3196static void buffer_exit_cpu(int cpu)
3197{
3198 int i;
3199 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3200
3201 for (i = 0; i < BH_LRU_SIZE; i++) {
3202 brelse(b->bhs[i]);
3203 b->bhs[i] = NULL;
3204 }
3205 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3206 per_cpu(bh_accounting, cpu).nr = 0;
3207}
3208
3209static int buffer_cpu_notify(struct notifier_block *self,
3210 unsigned long action, void *hcpu)
3211{
3212 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3213 buffer_exit_cpu((unsigned long)hcpu);
3214 return NOTIFY_OK;
3215}
3216
3217
3218
3219
3220
3221
3222
3223
3224int bh_uptodate_or_lock(struct buffer_head *bh)
3225{
3226 if (!buffer_uptodate(bh)) {
3227 lock_buffer(bh);
3228 if (!buffer_uptodate(bh))
3229 return 0;
3230 unlock_buffer(bh);
3231 }
3232 return 1;
3233}
3234EXPORT_SYMBOL(bh_uptodate_or_lock);
3235
3236
3237
3238
3239
3240
3241
3242int bh_submit_read(struct buffer_head *bh)
3243{
3244 BUG_ON(!buffer_locked(bh));
3245
3246 if (buffer_uptodate(bh)) {
3247 unlock_buffer(bh);
3248 return 0;
3249 }
3250
3251 get_bh(bh);
3252 bh->b_end_io = end_buffer_read_sync;
3253 submit_bh(READ, bh);
3254 wait_on_buffer(bh);
3255 if (buffer_uptodate(bh))
3256 return 0;
3257 return -EIO;
3258}
3259EXPORT_SYMBOL(bh_submit_read);
3260
3261void __init buffer_init(void)
3262{
3263 int nrpages;
3264
3265 bh_cachep = kmem_cache_create("buffer_head",
3266 sizeof(struct buffer_head), 0,
3267 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3268 SLAB_MEM_SPREAD),
3269 NULL);
3270
3271
3272
3273
3274 nrpages = (nr_free_buffer_pages() * 10) / 100;
3275 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3276 hotcpu_notifier(buffer_cpu_notify, 0);
3277}
3278