1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/config.h>
22#include <linux/kernel.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/smp_lock.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/highmem.h>
32#include <linux/module.h>
33#include <linux/writeback.h>
34#include <linux/hash.h>
35#include <linux/suspend.h>
36#include <linux/buffer_head.h>
37#include <linux/bio.h>
38#include <linux/notifier.h>
39#include <linux/cpu.h>
40#include <asm/bitops.h>
41
42static void invalidate_bh_lrus(void);
43
44#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
45
46
47
48
49#define BH_WAIT_TABLE_ORDER 7
50static struct bh_wait_queue_head {
51 wait_queue_head_t wqh;
52} ____cacheline_aligned_in_smp bh_wait_queue_heads[1<<BH_WAIT_TABLE_ORDER];
53
54
55
56
57
58void __buffer_error(char *file, int line)
59{
60 static int enough;
61
62 if (enough > 10)
63 return;
64 enough++;
65 printk("buffer layer error at %s:%d\n", file, line);
66#ifndef CONFIG_KALLSYMS
67 printk("Pass this trace through ksymoops for reporting\n");
68#endif
69 dump_stack();
70}
71EXPORT_SYMBOL(__buffer_error);
72
73inline void
74init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
75{
76 bh->b_end_io = handler;
77 bh->b_private = private;
78}
79
80
81
82
83
84wait_queue_head_t *bh_waitq_head(struct buffer_head *bh)
85{
86 return &bh_wait_queue_heads[hash_ptr(bh, BH_WAIT_TABLE_ORDER)].wqh;
87}
88EXPORT_SYMBOL(bh_waitq_head);
89
90void wake_up_buffer(struct buffer_head *bh)
91{
92 wait_queue_head_t *wq = bh_waitq_head(bh);
93
94 smp_mb();
95 if (waitqueue_active(wq))
96 wake_up_all(wq);
97}
98EXPORT_SYMBOL(wake_up_buffer);
99
100void unlock_buffer(struct buffer_head *bh)
101{
102
103
104
105
106
107
108 if (atomic_read(&bh->b_count) == 0 &&
109 !PageLocked(bh->b_page) &&
110 !PageWriteback(bh->b_page))
111 buffer_error();
112
113 clear_buffer_locked(bh);
114 smp_mb__after_clear_bit();
115 wake_up_buffer(bh);
116}
117
118
119
120
121
122
123void __wait_on_buffer(struct buffer_head * bh)
124{
125 wait_queue_head_t *wqh = bh_waitq_head(bh);
126 DEFINE_WAIT(wait);
127
128 if (atomic_read(&bh->b_count) == 0 &&
129 (!bh->b_page || !PageLocked(bh->b_page)))
130 buffer_error();
131
132 do {
133 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
134 if (buffer_locked(bh)) {
135 blk_run_queues();
136 io_schedule();
137 }
138 } while (buffer_locked(bh));
139 finish_wait(wqh, &wait);
140}
141
142static void
143__set_page_buffers(struct page *page, struct buffer_head *head)
144{
145 if (page_has_buffers(page))
146 buffer_error();
147 page_cache_get(page);
148 SetPagePrivate(page);
149 page->private = (unsigned long)head;
150}
151
152static void
153__clear_page_buffers(struct page *page)
154{
155 ClearPagePrivate(page);
156 page->private = 0;
157 page_cache_release(page);
158}
159
160static void buffer_io_error(struct buffer_head *bh)
161{
162 char b[BDEVNAME_SIZE];
163
164 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
165 bdevname(bh->b_bdev, b),
166 (unsigned long long)bh->b_blocknr);
167}
168
169
170
171
172
173void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
174{
175 if (uptodate) {
176 set_buffer_uptodate(bh);
177 } else {
178
179 clear_buffer_uptodate(bh);
180 }
181 unlock_buffer(bh);
182 put_bh(bh);
183}
184
185void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
186{
187 char b[BDEVNAME_SIZE];
188
189 if (uptodate) {
190 set_buffer_uptodate(bh);
191 } else {
192 buffer_io_error(bh);
193 printk(KERN_WARNING "lost page write due to I/O error on %s\n",
194 bdevname(bh->b_bdev, b));
195 set_buffer_write_io_error(bh);
196 clear_buffer_uptodate(bh);
197 }
198 unlock_buffer(bh);
199 put_bh(bh);
200}
201
202
203
204
205
206int sync_blockdev(struct block_device *bdev)
207{
208 int ret = 0;
209
210 if (bdev) {
211 int err;
212
213 ret = filemap_fdatawrite(bdev->bd_inode->i_mapping);
214 err = filemap_fdatawait(bdev->bd_inode->i_mapping);
215 if (!ret)
216 ret = err;
217 }
218 return ret;
219}
220EXPORT_SYMBOL(sync_blockdev);
221
222
223
224
225
226
227int fsync_super(struct super_block *sb)
228{
229 sync_inodes_sb(sb, 0);
230 DQUOT_SYNC(sb);
231 lock_super(sb);
232 if (sb->s_dirt && sb->s_op->write_super)
233 sb->s_op->write_super(sb);
234 unlock_super(sb);
235 if (sb->s_op->sync_fs)
236 sb->s_op->sync_fs(sb, 1);
237 sync_blockdev(sb->s_bdev);
238 sync_inodes_sb(sb, 1);
239
240 return sync_blockdev(sb->s_bdev);
241}
242
243
244
245
246
247
248int fsync_bdev(struct block_device *bdev)
249{
250 struct super_block *sb = get_super(bdev);
251 if (sb) {
252 int res = fsync_super(sb);
253 drop_super(sb);
254 return res;
255 }
256 return sync_blockdev(bdev);
257}
258
259
260
261
262
263static void do_sync(unsigned long wait)
264{
265 wakeup_bdflush(0);
266 sync_inodes(0);
267 DQUOT_SYNC(NULL);
268 sync_supers();
269 sync_filesystems(0);
270 sync_filesystems(wait);
271 sync_inodes(wait);
272 if (!wait)
273 printk("Emergency Sync complete\n");
274}
275
276asmlinkage long sys_sync(void)
277{
278 do_sync(1);
279 return 0;
280}
281
282void emergency_sync(void)
283{
284 pdflush_operation(do_sync, 0);
285}
286
287
288
289
290
291
292
293int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
294{
295 struct inode * inode = dentry->d_inode;
296 struct super_block * sb;
297 int ret;
298
299
300 write_inode_now(inode, 0);
301
302
303 sb = inode->i_sb;
304 lock_super(sb);
305 if (sb->s_op->write_super)
306 sb->s_op->write_super(sb);
307 unlock_super(sb);
308
309
310 ret = sync_blockdev(sb->s_bdev);
311 return ret;
312}
313
314asmlinkage long sys_fsync(unsigned int fd)
315{
316 struct file * file;
317 struct dentry * dentry;
318 struct inode * inode;
319 int ret, err;
320
321 ret = -EBADF;
322 file = fget(fd);
323 if (!file)
324 goto out;
325
326 dentry = file->f_dentry;
327 inode = dentry->d_inode;
328
329 ret = -EINVAL;
330 if (!file->f_op || !file->f_op->fsync) {
331
332 goto out_putf;
333 }
334
335
336 down(&inode->i_sem);
337 current->flags |= PF_SYNCWRITE;
338 ret = filemap_fdatawrite(inode->i_mapping);
339 err = file->f_op->fsync(file, dentry, 0);
340 if (!ret)
341 ret = err;
342 err = filemap_fdatawait(inode->i_mapping);
343 if (!ret)
344 ret = err;
345 current->flags &= ~PF_SYNCWRITE;
346 up(&inode->i_sem);
347
348out_putf:
349 fput(file);
350out:
351 return ret;
352}
353
354asmlinkage long sys_fdatasync(unsigned int fd)
355{
356 struct file * file;
357 struct dentry * dentry;
358 struct inode * inode;
359 int ret, err;
360
361 ret = -EBADF;
362 file = fget(fd);
363 if (!file)
364 goto out;
365
366 dentry = file->f_dentry;
367 inode = dentry->d_inode;
368
369 ret = -EINVAL;
370 if (!file->f_op || !file->f_op->fsync)
371 goto out_putf;
372
373 down(&inode->i_sem);
374 current->flags |= PF_SYNCWRITE;
375 ret = filemap_fdatawrite(inode->i_mapping);
376 err = file->f_op->fsync(file, dentry, 1);
377 if (!ret)
378 ret = err;
379 err = filemap_fdatawait(inode->i_mapping);
380 if (!ret)
381 ret = err;
382 current->flags &= ~PF_SYNCWRITE;
383 up(&inode->i_sem);
384
385out_putf:
386 fput(file);
387out:
388 return ret;
389}
390
391
392
393
394
395
396
397
398
399
400
401
402static struct buffer_head *
403__find_get_block_slow(struct block_device *bdev, sector_t block, int unused)
404{
405 struct inode *bd_inode = bdev->bd_inode;
406 struct address_space *bd_mapping = bd_inode->i_mapping;
407 struct buffer_head *ret = NULL;
408 unsigned long index;
409 struct buffer_head *bh;
410 struct buffer_head *head;
411 struct page *page;
412
413 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
414 page = find_get_page(bd_mapping, index);
415 if (!page)
416 goto out;
417
418 spin_lock(&bd_mapping->private_lock);
419 if (!page_has_buffers(page))
420 goto out_unlock;
421 head = page_buffers(page);
422 bh = head;
423 do {
424 if (bh->b_blocknr == block) {
425 ret = bh;
426 get_bh(bh);
427 goto out_unlock;
428 }
429 bh = bh->b_this_page;
430 } while (bh != head);
431 buffer_error();
432 printk("block=%llu, b_blocknr=%llu\n",
433 (unsigned long long)block, (unsigned long long)bh->b_blocknr);
434 printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size);
435out_unlock:
436 spin_unlock(&bd_mapping->private_lock);
437 page_cache_release(page);
438out:
439 return ret;
440}
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
475{
476 invalidate_bh_lrus();
477
478
479
480
481
482 invalidate_inode_pages(bdev->bd_inode->i_mapping);
483}
484
485
486
487
488static void free_more_memory(void)
489{
490 struct zone *zone;
491 pg_data_t *pgdat;
492
493 wakeup_bdflush(1024);
494 blk_run_queues();
495 yield();
496
497 for_each_pgdat(pgdat) {
498 zone = pgdat->node_zonelists[GFP_NOFS&GFP_ZONEMASK].zones[0];
499 if (zone)
500 try_to_free_pages(zone, GFP_NOFS, 0);
501 }
502}
503
504
505
506
507
508static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
509{
510 static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
511 unsigned long flags;
512 struct buffer_head *tmp;
513 struct page *page;
514 int page_uptodate = 1;
515
516 BUG_ON(!buffer_async_read(bh));
517
518 page = bh->b_page;
519 if (uptodate) {
520 set_buffer_uptodate(bh);
521 } else {
522 clear_buffer_uptodate(bh);
523 buffer_io_error(bh);
524 SetPageError(page);
525 }
526
527
528
529
530
531
532 spin_lock_irqsave(&page_uptodate_lock, flags);
533 clear_buffer_async_read(bh);
534 unlock_buffer(bh);
535 tmp = bh;
536 do {
537 if (!buffer_uptodate(tmp))
538 page_uptodate = 0;
539 if (buffer_async_read(tmp)) {
540 BUG_ON(!buffer_locked(tmp));
541 goto still_busy;
542 }
543 tmp = tmp->b_this_page;
544 } while (tmp != bh);
545 spin_unlock_irqrestore(&page_uptodate_lock, flags);
546
547
548
549
550
551 if (page_uptodate && !PageError(page))
552 SetPageUptodate(page);
553 unlock_page(page);
554 return;
555
556still_busy:
557 spin_unlock_irqrestore(&page_uptodate_lock, flags);
558 return;
559}
560
561
562
563
564
565void end_buffer_async_write(struct buffer_head *bh, int uptodate)
566{
567 char b[BDEVNAME_SIZE];
568 static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
569 unsigned long flags;
570 struct buffer_head *tmp;
571 struct page *page;
572
573 BUG_ON(!buffer_async_write(bh));
574
575 page = bh->b_page;
576 if (uptodate) {
577 set_buffer_uptodate(bh);
578 } else {
579 buffer_io_error(bh);
580 printk(KERN_WARNING "lost page write due to I/O error on %s\n",
581 bdevname(bh->b_bdev, b));
582 set_bit(AS_EIO, &page->mapping->flags);
583 clear_buffer_uptodate(bh);
584 SetPageError(page);
585 }
586
587 spin_lock_irqsave(&page_uptodate_lock, flags);
588 clear_buffer_async_write(bh);
589 unlock_buffer(bh);
590 tmp = bh->b_this_page;
591 while (tmp != bh) {
592 if (buffer_async_write(tmp)) {
593 BUG_ON(!buffer_locked(tmp));
594 goto still_busy;
595 }
596 tmp = tmp->b_this_page;
597 }
598 spin_unlock_irqrestore(&page_uptodate_lock, flags);
599 end_page_writeback(page);
600 return;
601
602still_busy:
603 spin_unlock_irqrestore(&page_uptodate_lock, flags);
604 return;
605}
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628void mark_buffer_async_read(struct buffer_head *bh)
629{
630 bh->b_end_io = end_buffer_async_read;
631 set_buffer_async_read(bh);
632}
633EXPORT_SYMBOL(mark_buffer_async_read);
634
635void mark_buffer_async_write(struct buffer_head *bh)
636{
637 bh->b_end_io = end_buffer_async_write;
638 set_buffer_async_write(bh);
639}
640EXPORT_SYMBOL(mark_buffer_async_write);
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692void buffer_insert_list(spinlock_t *lock,
693 struct buffer_head *bh, struct list_head *list)
694{
695 spin_lock(lock);
696 list_move_tail(&bh->b_assoc_buffers, list);
697 spin_unlock(lock);
698}
699
700
701
702
703static inline void __remove_assoc_queue(struct buffer_head *bh)
704{
705 list_del_init(&bh->b_assoc_buffers);
706}
707
708int inode_has_buffers(struct inode *inode)
709{
710 return !list_empty(&inode->i_data.private_list);
711}
712
713
714
715
716
717
718
719
720
721
722
723static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
724{
725 struct buffer_head *bh;
726 struct list_head *p;
727 int err = 0;
728
729 spin_lock(lock);
730repeat:
731 list_for_each_prev(p, list) {
732 bh = BH_ENTRY(p);
733 if (buffer_locked(bh)) {
734 get_bh(bh);
735 spin_unlock(lock);
736 wait_on_buffer(bh);
737 if (!buffer_uptodate(bh))
738 err = -EIO;
739 brelse(bh);
740 spin_lock(lock);
741 goto repeat;
742 }
743 }
744 spin_unlock(lock);
745 return err;
746}
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761int sync_mapping_buffers(struct address_space *mapping)
762{
763 struct address_space *buffer_mapping = mapping->assoc_mapping;
764
765 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
766 return 0;
767
768 return fsync_buffers_list(&buffer_mapping->private_lock,
769 &mapping->private_list);
770}
771EXPORT_SYMBOL(sync_mapping_buffers);
772
773
774
775
776
777
778
779void write_boundary_block(struct block_device *bdev,
780 sector_t bblock, unsigned blocksize)
781{
782 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
783 if (bh) {
784 if (buffer_dirty(bh))
785 ll_rw_block(WRITE, 1, &bh);
786 put_bh(bh);
787 }
788}
789
790void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
791{
792 struct address_space *mapping = inode->i_mapping;
793 struct address_space *buffer_mapping = bh->b_page->mapping;
794
795 mark_buffer_dirty(bh);
796 if (!mapping->assoc_mapping) {
797 mapping->assoc_mapping = buffer_mapping;
798 } else {
799 if (mapping->assoc_mapping != buffer_mapping)
800 BUG();
801 }
802 if (list_empty(&bh->b_assoc_buffers))
803 buffer_insert_list(&buffer_mapping->private_lock,
804 bh, &mapping->private_list);
805}
806EXPORT_SYMBOL(mark_buffer_dirty_inode);
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842int __set_page_dirty_buffers(struct page *page)
843{
844 struct address_space * const mapping = page->mapping;
845 int ret = 0;
846
847 if (mapping == NULL) {
848 SetPageDirty(page);
849 goto out;
850 }
851
852 spin_lock(&mapping->private_lock);
853 if (page_has_buffers(page)) {
854 struct buffer_head *head = page_buffers(page);
855 struct buffer_head *bh = head;
856
857 do {
858 if (buffer_uptodate(bh))
859 set_buffer_dirty(bh);
860 else
861 buffer_error();
862 bh = bh->b_this_page;
863 } while (bh != head);
864 }
865 spin_unlock(&mapping->private_lock);
866
867 if (!TestSetPageDirty(page)) {
868 spin_lock(&mapping->page_lock);
869 if (page->mapping) {
870 if (!mapping->backing_dev_info->memory_backed)
871 inc_page_state(nr_dirty);
872 list_del(&page->list);
873 list_add(&page->list, &mapping->dirty_pages);
874 }
875 spin_unlock(&mapping->page_lock);
876 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
877 }
878
879out:
880 return ret;
881}
882EXPORT_SYMBOL(__set_page_dirty_buffers);
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
904{
905 struct buffer_head *bh;
906 struct list_head tmp;
907 int err = 0, err2;
908
909 INIT_LIST_HEAD(&tmp);
910
911 spin_lock(lock);
912 while (!list_empty(list)) {
913 bh = BH_ENTRY(list->next);
914 list_del_init(&bh->b_assoc_buffers);
915 if (buffer_dirty(bh) || buffer_locked(bh)) {
916 list_add(&bh->b_assoc_buffers, &tmp);
917 if (buffer_dirty(bh)) {
918 get_bh(bh);
919 spin_unlock(lock);
920
921
922
923
924
925
926 wait_on_buffer(bh);
927 ll_rw_block(WRITE, 1, &bh);
928 brelse(bh);
929 spin_lock(lock);
930 }
931 }
932 }
933
934 while (!list_empty(&tmp)) {
935 bh = BH_ENTRY(tmp.prev);
936 __remove_assoc_queue(bh);
937 get_bh(bh);
938 spin_unlock(lock);
939 wait_on_buffer(bh);
940 if (!buffer_uptodate(bh))
941 err = -EIO;
942 brelse(bh);
943 spin_lock(lock);
944 }
945
946 spin_unlock(lock);
947 err2 = osync_buffers_list(lock, list);
948 if (err)
949 return err;
950 else
951 return err2;
952}
953
954
955
956
957
958
959
960
961
962
963void invalidate_inode_buffers(struct inode *inode)
964{
965 if (inode_has_buffers(inode)) {
966 struct address_space *mapping = &inode->i_data;
967 struct list_head *list = &mapping->private_list;
968 struct address_space *buffer_mapping = mapping->assoc_mapping;
969
970 spin_lock(&buffer_mapping->private_lock);
971 while (!list_empty(list))
972 __remove_assoc_queue(BH_ENTRY(list->next));
973 spin_unlock(&buffer_mapping->private_lock);
974 }
975}
976
977
978
979
980
981
982
983int remove_inode_buffers(struct inode *inode)
984{
985 int ret = 1;
986
987 if (inode_has_buffers(inode)) {
988 struct address_space *mapping = &inode->i_data;
989 struct list_head *list = &mapping->private_list;
990 struct address_space *buffer_mapping = mapping->assoc_mapping;
991
992 spin_lock(&buffer_mapping->private_lock);
993 while (!list_empty(list)) {
994 struct buffer_head *bh = BH_ENTRY(list->next);
995 if (buffer_dirty(bh)) {
996 ret = 0;
997 break;
998 }
999 __remove_assoc_queue(bh);
1000 }
1001 spin_unlock(&buffer_mapping->private_lock);
1002 }
1003 return ret;
1004}
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015static struct buffer_head *
1016create_buffers(struct page * page, unsigned long size, int retry)
1017{
1018 struct buffer_head *bh, *head;
1019 long offset;
1020
1021try_again:
1022 head = NULL;
1023 offset = PAGE_SIZE;
1024 while ((offset -= size) >= 0) {
1025 bh = alloc_buffer_head(GFP_NOFS);
1026 if (!bh)
1027 goto no_grow;
1028
1029 bh->b_bdev = NULL;
1030 bh->b_this_page = head;
1031 bh->b_blocknr = -1;
1032 head = bh;
1033
1034 bh->b_state = 0;
1035 atomic_set(&bh->b_count, 0);
1036 bh->b_size = size;
1037
1038
1039 set_bh_page(bh, page, offset);
1040
1041 bh->b_end_io = NULL;
1042 }
1043 return head;
1044
1045
1046
1047no_grow:
1048 if (head) {
1049 do {
1050 bh = head;
1051 head = head->b_this_page;
1052 free_buffer_head(bh);
1053 } while (head);
1054 }
1055
1056
1057
1058
1059
1060
1061
1062 if (!retry)
1063 return NULL;
1064
1065
1066
1067
1068
1069
1070
1071 free_more_memory();
1072 goto try_again;
1073}
1074
1075static inline void
1076link_dev_buffers(struct page *page, struct buffer_head *head)
1077{
1078 struct buffer_head *bh, *tail;
1079
1080 bh = head;
1081 do {
1082 tail = bh;
1083 bh = bh->b_this_page;
1084 } while (bh);
1085 tail->b_this_page = head;
1086 __set_page_buffers(page, head);
1087}
1088
1089
1090
1091
1092static void
1093init_page_buffers(struct page *page, struct block_device *bdev,
1094 int block, int size)
1095{
1096 struct buffer_head *head = page_buffers(page);
1097 struct buffer_head *bh = head;
1098 unsigned int b_state;
1099
1100 b_state = 1 << BH_Mapped;
1101 if (PageUptodate(page))
1102 b_state |= 1 << BH_Uptodate;
1103
1104 do {
1105 if (!(bh->b_state & (1 << BH_Mapped))) {
1106 init_buffer(bh, NULL, NULL);
1107 bh->b_bdev = bdev;
1108 bh->b_blocknr = block;
1109 bh->b_state = b_state;
1110 }
1111 block++;
1112 bh = bh->b_this_page;
1113 } while (bh != head);
1114}
1115
1116
1117
1118
1119
1120
1121static struct page *
1122grow_dev_page(struct block_device *bdev, unsigned long block,
1123 unsigned long index, int size)
1124{
1125 struct inode *inode = bdev->bd_inode;
1126 struct page *page;
1127 struct buffer_head *bh;
1128
1129 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
1130 if (!page)
1131 return NULL;
1132
1133 if (!PageLocked(page))
1134 BUG();
1135
1136 if (page_has_buffers(page)) {
1137 bh = page_buffers(page);
1138 if (bh->b_size == size)
1139 return page;
1140 if (!try_to_free_buffers(page))
1141 goto failed;
1142 }
1143
1144
1145
1146
1147 bh = create_buffers(page, size, 0);
1148 if (!bh)
1149 goto failed;
1150
1151
1152
1153
1154
1155
1156 spin_lock(&inode->i_mapping->private_lock);
1157 link_dev_buffers(page, bh);
1158 init_page_buffers(page, bdev, block, size);
1159 spin_unlock(&inode->i_mapping->private_lock);
1160 return page;
1161
1162failed:
1163 buffer_error();
1164 unlock_page(page);
1165 page_cache_release(page);
1166 return NULL;
1167}
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178static inline int
1179grow_buffers(struct block_device *bdev, unsigned long block, int size)
1180{
1181 struct page *page;
1182 unsigned long index;
1183 int sizebits;
1184
1185
1186 if (size & (bdev_hardsect_size(bdev)-1))
1187 BUG();
1188 if (size < 512 || size > PAGE_SIZE)
1189 BUG();
1190
1191 sizebits = -1;
1192 do {
1193 sizebits++;
1194 } while ((size << sizebits) < PAGE_SIZE);
1195
1196 index = block >> sizebits;
1197 block = index << sizebits;
1198
1199
1200 page = grow_dev_page(bdev, block, index, size);
1201 if (!page)
1202 return 0;
1203 unlock_page(page);
1204 page_cache_release(page);
1205 return 1;
1206}
1207
1208struct buffer_head *
1209__getblk_slow(struct block_device *bdev, sector_t block, int size)
1210{
1211 for (;;) {
1212 struct buffer_head * bh;
1213
1214 bh = __find_get_block(bdev, block, size);
1215 if (bh)
1216 return bh;
1217
1218 if (!grow_buffers(bdev, block, size))
1219 free_more_memory();
1220 }
1221}
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257void mark_buffer_dirty(struct buffer_head *bh)
1258{
1259 if (!buffer_uptodate(bh))
1260 buffer_error();
1261 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
1262 __set_page_dirty_nobuffers(bh->b_page);
1263}
1264
1265
1266
1267
1268
1269
1270
1271
1272void __brelse(struct buffer_head * buf)
1273{
1274 if (atomic_read(&buf->b_count)) {
1275 put_bh(buf);
1276 return;
1277 }
1278 printk(KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1279 buffer_error();
1280}
1281
1282
1283
1284
1285
1286void __bforget(struct buffer_head *bh)
1287{
1288 clear_buffer_dirty(bh);
1289 if (!list_empty(&bh->b_assoc_buffers)) {
1290 struct address_space *buffer_mapping = bh->b_page->mapping;
1291
1292 spin_lock(&buffer_mapping->private_lock);
1293 list_del_init(&bh->b_assoc_buffers);
1294 spin_unlock(&buffer_mapping->private_lock);
1295 }
1296 __brelse(bh);
1297}
1298
1299static struct buffer_head *__bread_slow(struct buffer_head *bh)
1300{
1301 lock_buffer(bh);
1302 if (buffer_uptodate(bh)) {
1303 unlock_buffer(bh);
1304 return bh;
1305 } else {
1306 if (buffer_dirty(bh))
1307 buffer_error();
1308 get_bh(bh);
1309 bh->b_end_io = end_buffer_read_sync;
1310 submit_bh(READ, bh);
1311 wait_on_buffer(bh);
1312 if (buffer_uptodate(bh))
1313 return bh;
1314 }
1315 brelse(bh);
1316 return NULL;
1317}
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333#define BH_LRU_SIZE 8
1334
1335struct bh_lru {
1336 struct buffer_head *bhs[BH_LRU_SIZE];
1337};
1338
1339static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{0}};
1340
1341#ifdef CONFIG_SMP
1342#define bh_lru_lock() local_irq_disable()
1343#define bh_lru_unlock() local_irq_enable()
1344#else
1345#define bh_lru_lock() preempt_disable()
1346#define bh_lru_unlock() preempt_enable()
1347#endif
1348
1349static inline void check_irqs_on(void)
1350{
1351#ifdef irqs_disabled
1352 BUG_ON(irqs_disabled());
1353#endif
1354}
1355
1356
1357
1358
1359static void bh_lru_install(struct buffer_head *bh)
1360{
1361 struct buffer_head *evictee = NULL;
1362 struct bh_lru *lru;
1363
1364 check_irqs_on();
1365 bh_lru_lock();
1366 lru = &__get_cpu_var(bh_lrus);
1367 if (lru->bhs[0] != bh) {
1368 struct buffer_head *bhs[BH_LRU_SIZE];
1369 int in;
1370 int out = 0;
1371
1372 get_bh(bh);
1373 bhs[out++] = bh;
1374 for (in = 0; in < BH_LRU_SIZE; in++) {
1375 struct buffer_head *bh2 = lru->bhs[in];
1376
1377 if (bh2 == bh) {
1378 __brelse(bh2);
1379 } else {
1380 if (out >= BH_LRU_SIZE) {
1381 BUG_ON(evictee != NULL);
1382 evictee = bh2;
1383 } else {
1384 bhs[out++] = bh2;
1385 }
1386 }
1387 }
1388 while (out < BH_LRU_SIZE)
1389 bhs[out++] = NULL;
1390 memcpy(lru->bhs, bhs, sizeof(bhs));
1391 }
1392 bh_lru_unlock();
1393
1394 if (evictee)
1395 __brelse(evictee);
1396}
1397
1398
1399
1400
1401static inline struct buffer_head *
1402lookup_bh_lru(struct block_device *bdev, sector_t block, int size)
1403{
1404 struct buffer_head *ret = NULL;
1405 struct bh_lru *lru;
1406 int i;
1407
1408 check_irqs_on();
1409 bh_lru_lock();
1410 lru = &__get_cpu_var(bh_lrus);
1411 for (i = 0; i < BH_LRU_SIZE; i++) {
1412 struct buffer_head *bh = lru->bhs[i];
1413
1414 if (bh && bh->b_bdev == bdev &&
1415 bh->b_blocknr == block && bh->b_size == size) {
1416 if (i) {
1417 while (i) {
1418 lru->bhs[i] = lru->bhs[i - 1];
1419 i--;
1420 }
1421 lru->bhs[0] = bh;
1422 }
1423 get_bh(bh);
1424 ret = bh;
1425 break;
1426 }
1427 }
1428 bh_lru_unlock();
1429 return ret;
1430}
1431
1432
1433
1434
1435
1436
1437struct buffer_head *
1438__find_get_block(struct block_device *bdev, sector_t block, int size)
1439{
1440 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1441
1442 if (bh == NULL) {
1443 bh = __find_get_block_slow(bdev, block, size);
1444 if (bh)
1445 bh_lru_install(bh);
1446 }
1447 if (bh)
1448 touch_buffer(bh);
1449 return bh;
1450}
1451EXPORT_SYMBOL(__find_get_block);
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465struct buffer_head *
1466__getblk(struct block_device *bdev, sector_t block, int size)
1467{
1468 struct buffer_head *bh = __find_get_block(bdev, block, size);
1469
1470 if (bh == NULL)
1471 bh = __getblk_slow(bdev, block, size);
1472 return bh;
1473}
1474EXPORT_SYMBOL(__getblk);
1475
1476
1477
1478
1479void __breadahead(struct block_device *bdev, sector_t block, int size)
1480{
1481 struct buffer_head *bh = __getblk(bdev, block, size);
1482 ll_rw_block(READA, 1, &bh);
1483 brelse(bh);
1484}
1485EXPORT_SYMBOL(__breadahead);
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495struct buffer_head *
1496__bread(struct block_device *bdev, sector_t block, int size)
1497{
1498 struct buffer_head *bh = __getblk(bdev, block, size);
1499
1500 if (!buffer_uptodate(bh))
1501 bh = __bread_slow(bh);
1502 return bh;
1503}
1504EXPORT_SYMBOL(__bread);
1505
1506
1507
1508
1509
1510
1511
1512static void invalidate_bh_lru(void *arg)
1513{
1514 struct bh_lru *b = &get_cpu_var(bh_lrus);
1515 int i;
1516
1517 for (i = 0; i < BH_LRU_SIZE; i++) {
1518 brelse(b->bhs[i]);
1519 b->bhs[i] = NULL;
1520 }
1521 put_cpu_var(bh_lrus);
1522}
1523
1524static void invalidate_bh_lrus(void)
1525{
1526 on_each_cpu(invalidate_bh_lru, NULL, 1, 1);
1527}
1528
1529void set_bh_page(struct buffer_head *bh,
1530 struct page *page, unsigned long offset)
1531{
1532 bh->b_page = page;
1533 if (offset >= PAGE_SIZE)
1534 BUG();
1535 if (PageHighMem(page))
1536
1537
1538
1539 bh->b_data = (char *)(0 + offset);
1540 else
1541 bh->b_data = page_address(page) + offset;
1542}
1543EXPORT_SYMBOL(set_bh_page);
1544
1545
1546
1547
1548static inline void discard_buffer(struct buffer_head * bh)
1549{
1550 lock_buffer(bh);
1551 clear_buffer_dirty(bh);
1552 bh->b_bdev = NULL;
1553 clear_buffer_mapped(bh);
1554 clear_buffer_req(bh);
1555 clear_buffer_new(bh);
1556 clear_buffer_delay(bh);
1557 unlock_buffer(bh);
1558}
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575int try_to_release_page(struct page *page, int gfp_mask)
1576{
1577 struct address_space * const mapping = page->mapping;
1578
1579 if (!PageLocked(page))
1580 BUG();
1581 if (PageWriteback(page))
1582 return 0;
1583
1584 if (mapping && mapping->a_ops->releasepage)
1585 return mapping->a_ops->releasepage(page, gfp_mask);
1586 return try_to_free_buffers(page);
1587}
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604int block_invalidatepage(struct page *page, unsigned long offset)
1605{
1606 struct buffer_head *head, *bh, *next;
1607 unsigned int curr_off = 0;
1608 int ret = 1;
1609
1610 BUG_ON(!PageLocked(page));
1611 if (!page_has_buffers(page))
1612 goto out;
1613
1614 head = page_buffers(page);
1615 bh = head;
1616 do {
1617 unsigned int next_off = curr_off + bh->b_size;
1618 next = bh->b_this_page;
1619
1620
1621
1622
1623 if (offset <= curr_off)
1624 discard_buffer(bh);
1625 curr_off = next_off;
1626 bh = next;
1627 } while (bh != head);
1628
1629
1630
1631
1632
1633
1634 if (offset == 0)
1635 ret = try_to_release_page(page, 0);
1636out:
1637 return ret;
1638}
1639EXPORT_SYMBOL(block_invalidatepage);
1640
1641
1642
1643
1644
1645
1646void create_empty_buffers(struct page *page,
1647 unsigned long blocksize, unsigned long b_state)
1648{
1649 struct buffer_head *bh, *head, *tail;
1650
1651 head = create_buffers(page, blocksize, 1);
1652 bh = head;
1653 do {
1654 bh->b_state |= b_state;
1655 tail = bh;
1656 bh = bh->b_this_page;
1657 } while (bh);
1658 tail->b_this_page = head;
1659
1660 spin_lock(&page->mapping->private_lock);
1661 if (PageUptodate(page) || PageDirty(page)) {
1662 bh = head;
1663 do {
1664 if (PageDirty(page))
1665 set_buffer_dirty(bh);
1666 if (PageUptodate(page))
1667 set_buffer_uptodate(bh);
1668 bh = bh->b_this_page;
1669 } while (bh != head);
1670 }
1671 __set_page_buffers(page, head);
1672 spin_unlock(&page->mapping->private_lock);
1673}
1674EXPORT_SYMBOL(create_empty_buffers);
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1693{
1694 struct buffer_head *old_bh;
1695
1696 old_bh = __find_get_block_slow(bdev, block, 0);
1697 if (old_bh) {
1698#if 0
1699 if (buffer_dirty(old_bh))
1700 buffer_error();
1701#endif
1702 clear_buffer_dirty(old_bh);
1703 wait_on_buffer(old_bh);
1704 clear_buffer_req(old_bh);
1705 __brelse(old_bh);
1706 }
1707}
1708EXPORT_SYMBOL(unmap_underlying_metadata);
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735static int __block_write_full_page(struct inode *inode, struct page *page,
1736 get_block_t *get_block, struct writeback_control *wbc)
1737{
1738 int err;
1739 unsigned long block;
1740 unsigned long last_block;
1741 struct buffer_head *bh, *head;
1742 int nr_underway = 0;
1743
1744 BUG_ON(!PageLocked(page));
1745
1746 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1747
1748 if (!page_has_buffers(page)) {
1749 if (!PageUptodate(page))
1750 buffer_error();
1751 create_empty_buffers(page, 1 << inode->i_blkbits,
1752 (1 << BH_Dirty)|(1 << BH_Uptodate));
1753 }
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765 block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1766 head = page_buffers(page);
1767 bh = head;
1768
1769
1770
1771
1772
1773 do {
1774 if (block > last_block) {
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786 clear_buffer_dirty(bh);
1787 set_buffer_uptodate(bh);
1788 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1789 if (buffer_new(bh))
1790 buffer_error();
1791 err = get_block(inode, block, bh, 1);
1792 if (err)
1793 goto recover;
1794 if (buffer_new(bh)) {
1795
1796 clear_buffer_new(bh);
1797 unmap_underlying_metadata(bh->b_bdev,
1798 bh->b_blocknr);
1799 }
1800 }
1801 bh = bh->b_this_page;
1802 block++;
1803 } while (bh != head);
1804
1805 do {
1806 get_bh(bh);
1807 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1808 if (wbc->sync_mode != WB_SYNC_NONE) {
1809 lock_buffer(bh);
1810 } else {
1811 if (test_set_buffer_locked(bh)) {
1812 __set_page_dirty_nobuffers(page);
1813 continue;
1814 }
1815 }
1816 if (test_clear_buffer_dirty(bh)) {
1817 if (!buffer_uptodate(bh))
1818 buffer_error();
1819 mark_buffer_async_write(bh);
1820 } else {
1821 unlock_buffer(bh);
1822 }
1823 }
1824 } while ((bh = bh->b_this_page) != head);
1825
1826 BUG_ON(PageWriteback(page));
1827 SetPageWriteback(page);
1828 unlock_page(page);
1829
1830
1831
1832
1833
1834 do {
1835 struct buffer_head *next = bh->b_this_page;
1836 if (buffer_async_write(bh)) {
1837 submit_bh(WRITE, bh);
1838 nr_underway++;
1839 }
1840 put_bh(bh);
1841 bh = next;
1842 } while (bh != head);
1843
1844 err = 0;
1845done:
1846 if (nr_underway == 0) {
1847
1848
1849
1850
1851
1852 int uptodate = 1;
1853 do {
1854 if (!buffer_uptodate(bh)) {
1855 uptodate = 0;
1856 break;
1857 }
1858 bh = bh->b_this_page;
1859 } while (bh != head);
1860 if (uptodate)
1861 SetPageUptodate(page);
1862 end_page_writeback(page);
1863 }
1864 return err;
1865
1866recover:
1867
1868
1869
1870
1871
1872
1873 bh = head;
1874
1875 do {
1876 get_bh(bh);
1877 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1878 lock_buffer(bh);
1879 mark_buffer_async_write(bh);
1880 } else {
1881
1882
1883
1884
1885 clear_buffer_dirty(bh);
1886 }
1887 } while ((bh = bh->b_this_page) != head);
1888 SetPageError(page);
1889 BUG_ON(PageWriteback(page));
1890 SetPageWriteback(page);
1891 unlock_page(page);
1892 do {
1893 struct buffer_head *next = bh->b_this_page;
1894 if (buffer_async_write(bh)) {
1895 clear_buffer_dirty(bh);
1896 submit_bh(WRITE, bh);
1897 nr_underway++;
1898 }
1899 put_bh(bh);
1900 bh = next;
1901 } while (bh != head);
1902 goto done;
1903}
1904
1905static int __block_prepare_write(struct inode *inode, struct page *page,
1906 unsigned from, unsigned to, get_block_t *get_block)
1907{
1908 unsigned block_start, block_end;
1909 sector_t block;
1910 int err = 0;
1911 unsigned blocksize, bbits;
1912 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1913
1914 BUG_ON(!PageLocked(page));
1915 BUG_ON(from > PAGE_CACHE_SIZE);
1916 BUG_ON(to > PAGE_CACHE_SIZE);
1917 BUG_ON(from > to);
1918
1919 blocksize = 1 << inode->i_blkbits;
1920 if (!page_has_buffers(page))
1921 create_empty_buffers(page, blocksize, 0);
1922 head = page_buffers(page);
1923
1924 bbits = inode->i_blkbits;
1925 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1926
1927 for(bh = head, block_start = 0; bh != head || !block_start;
1928 block++, block_start=block_end, bh = bh->b_this_page) {
1929 block_end = block_start + blocksize;
1930 if (block_end <= from || block_start >= to) {
1931 if (PageUptodate(page)) {
1932 if (!buffer_uptodate(bh))
1933 set_buffer_uptodate(bh);
1934 }
1935 continue;
1936 }
1937 if (buffer_new(bh))
1938 clear_buffer_new(bh);
1939 if (!buffer_mapped(bh)) {
1940 err = get_block(inode, block, bh, 1);
1941 if (err)
1942 goto out;
1943 if (buffer_new(bh)) {
1944 clear_buffer_new(bh);
1945 unmap_underlying_metadata(bh->b_bdev,
1946 bh->b_blocknr);
1947 if (PageUptodate(page)) {
1948 if (!buffer_mapped(bh))
1949 buffer_error();
1950 set_buffer_uptodate(bh);
1951 continue;
1952 }
1953 if (block_end > to || block_start < from) {
1954 void *kaddr;
1955
1956 kaddr = kmap_atomic(page, KM_USER0);
1957 if (block_end > to)
1958 memset(kaddr+to, 0,
1959 block_end-to);
1960 if (block_start < from)
1961 memset(kaddr+block_start,
1962 0, from-block_start);
1963 flush_dcache_page(page);
1964 kunmap_atomic(kaddr, KM_USER0);
1965 }
1966 continue;
1967 }
1968 }
1969 if (PageUptodate(page)) {
1970 if (!buffer_uptodate(bh))
1971 set_buffer_uptodate(bh);
1972 continue;
1973 }
1974 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1975 (block_start < from || block_end > to)) {
1976 ll_rw_block(READ, 1, &bh);
1977 *wait_bh++=bh;
1978 }
1979 }
1980
1981
1982
1983 while(wait_bh > wait) {
1984 wait_on_buffer(*--wait_bh);
1985 if (!buffer_uptodate(*wait_bh))
1986 return -EIO;
1987 }
1988 return 0;
1989out:
1990
1991
1992
1993
1994
1995 bh = head;
1996 block_start = 0;
1997 do {
1998 block_end = block_start+blocksize;
1999 if (block_end <= from)
2000 goto next_bh;
2001 if (block_start >= to)
2002 break;
2003 if (buffer_new(bh)) {
2004 void *kaddr;
2005
2006 clear_buffer_new(bh);
2007 if (buffer_uptodate(bh))
2008 buffer_error();
2009 kaddr = kmap_atomic(page, KM_USER0);
2010 memset(kaddr+block_start, 0, bh->b_size);
2011 kunmap_atomic(kaddr, KM_USER0);
2012 set_buffer_uptodate(bh);
2013 mark_buffer_dirty(bh);
2014 }
2015next_bh:
2016 block_start = block_end;
2017 bh = bh->b_this_page;
2018 } while (bh != head);
2019 return err;
2020}
2021
2022static int __block_commit_write(struct inode *inode, struct page *page,
2023 unsigned from, unsigned to)
2024{
2025 unsigned block_start, block_end;
2026 int partial = 0;
2027 unsigned blocksize;
2028 struct buffer_head *bh, *head;
2029
2030 blocksize = 1 << inode->i_blkbits;
2031
2032 for(bh = head = page_buffers(page), block_start = 0;
2033 bh != head || !block_start;
2034 block_start=block_end, bh = bh->b_this_page) {
2035 block_end = block_start + blocksize;
2036 if (block_end <= from || block_start >= to) {
2037 if (!buffer_uptodate(bh))
2038 partial = 1;
2039 } else {
2040 set_buffer_uptodate(bh);
2041 mark_buffer_dirty(bh);
2042 }
2043 }
2044
2045
2046
2047
2048
2049
2050
2051 if (!partial)
2052 SetPageUptodate(page);
2053 return 0;
2054}
2055
2056
2057
2058
2059
2060
2061
2062
2063int block_read_full_page(struct page *page, get_block_t *get_block)
2064{
2065 struct inode *inode = page->mapping->host;
2066 sector_t iblock, lblock;
2067 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2068 unsigned int blocksize;
2069 int nr, i;
2070 int fully_mapped = 1;
2071
2072 if (!PageLocked(page))
2073 PAGE_BUG(page);
2074 if (PageUptodate(page))
2075 buffer_error();
2076 blocksize = 1 << inode->i_blkbits;
2077 if (!page_has_buffers(page))
2078 create_empty_buffers(page, blocksize, 0);
2079 head = page_buffers(page);
2080
2081 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2082 lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
2083 bh = head;
2084 nr = 0;
2085 i = 0;
2086
2087 do {
2088 if (buffer_uptodate(bh))
2089 continue;
2090
2091 if (!buffer_mapped(bh)) {
2092 fully_mapped = 0;
2093 if (iblock < lblock) {
2094 if (get_block(inode, iblock, bh, 0))
2095 SetPageError(page);
2096 }
2097 if (!buffer_mapped(bh)) {
2098 void *kaddr = kmap_atomic(page, KM_USER0);
2099 memset(kaddr + i * blocksize, 0, blocksize);
2100 flush_dcache_page(page);
2101 kunmap_atomic(kaddr, KM_USER0);
2102 set_buffer_uptodate(bh);
2103 continue;
2104 }
2105
2106
2107
2108
2109 if (buffer_uptodate(bh))
2110 continue;
2111 }
2112 arr[nr++] = bh;
2113 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2114
2115 if (fully_mapped)
2116 SetPageMappedToDisk(page);
2117
2118 if (!nr) {
2119
2120
2121
2122
2123 if (!PageError(page))
2124 SetPageUptodate(page);
2125 unlock_page(page);
2126 return 0;
2127 }
2128
2129
2130 for (i = 0; i < nr; i++) {
2131 bh = arr[i];
2132 lock_buffer(bh);
2133 mark_buffer_async_read(bh);
2134 }
2135
2136
2137
2138
2139
2140
2141 for (i = 0; i < nr; i++) {
2142 bh = arr[i];
2143 if (buffer_uptodate(bh))
2144 end_buffer_async_read(bh, 1);
2145 else
2146 submit_bh(READ, bh);
2147 }
2148 return 0;
2149}
2150
2151
2152
2153
2154
2155int generic_cont_expand(struct inode *inode, loff_t size)
2156{
2157 struct address_space *mapping = inode->i_mapping;
2158 struct page *page;
2159 unsigned long index, offset, limit;
2160 int err;
2161
2162 err = -EFBIG;
2163 limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
2164 if (limit != RLIM_INFINITY && size > (loff_t)limit) {
2165 send_sig(SIGXFSZ, current, 0);
2166 goto out;
2167 }
2168 if (size > inode->i_sb->s_maxbytes)
2169 goto out;
2170
2171 offset = (size & (PAGE_CACHE_SIZE-1));
2172
2173
2174
2175
2176
2177 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
2178 offset++;
2179 }
2180 index = size >> PAGE_CACHE_SHIFT;
2181 err = -ENOMEM;
2182 page = grab_cache_page(mapping, index);
2183 if (!page)
2184 goto out;
2185 err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
2186 if (!err) {
2187 err = mapping->a_ops->commit_write(NULL, page, offset, offset);
2188 }
2189 unlock_page(page);
2190 page_cache_release(page);
2191 if (err > 0)
2192 err = 0;
2193out:
2194 return err;
2195}
2196
2197
2198
2199
2200
2201
2202int cont_prepare_write(struct page *page, unsigned offset,
2203 unsigned to, get_block_t *get_block, loff_t *bytes)
2204{
2205 struct address_space *mapping = page->mapping;
2206 struct inode *inode = mapping->host;
2207 struct page *new_page;
2208 unsigned long pgpos;
2209 long status;
2210 unsigned zerofrom;
2211 unsigned blocksize = 1 << inode->i_blkbits;
2212 void *kaddr;
2213
2214 while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
2215 status = -ENOMEM;
2216 new_page = grab_cache_page(mapping, pgpos);
2217 if (!new_page)
2218 goto out;
2219
2220 if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
2221 unlock_page(new_page);
2222 page_cache_release(new_page);
2223 continue;
2224 }
2225 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2226 if (zerofrom & (blocksize-1)) {
2227 *bytes |= (blocksize-1);
2228 (*bytes)++;
2229 }
2230 status = __block_prepare_write(inode, new_page, zerofrom,
2231 PAGE_CACHE_SIZE, get_block);
2232 if (status)
2233 goto out_unmap;
2234 kaddr = kmap_atomic(new_page, KM_USER0);
2235 memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
2236 flush_dcache_page(new_page);
2237 kunmap_atomic(kaddr, KM_USER0);
2238 __block_commit_write(inode, new_page,
2239 zerofrom, PAGE_CACHE_SIZE);
2240 unlock_page(new_page);
2241 page_cache_release(new_page);
2242 }
2243
2244 if (page->index < pgpos) {
2245
2246 zerofrom = offset;
2247 } else {
2248
2249 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2250
2251
2252 if (to > zerofrom && (zerofrom & (blocksize-1))) {
2253 *bytes |= (blocksize-1);
2254 (*bytes)++;
2255 }
2256
2257
2258 if (offset <= zerofrom)
2259 zerofrom = offset;
2260 }
2261 status = __block_prepare_write(inode, page, zerofrom, to, get_block);
2262 if (status)
2263 goto out1;
2264 if (zerofrom < offset) {
2265 kaddr = kmap_atomic(page, KM_USER0);
2266 memset(kaddr+zerofrom, 0, offset-zerofrom);
2267 flush_dcache_page(page);
2268 kunmap_atomic(kaddr, KM_USER0);
2269 __block_commit_write(inode, page, zerofrom, offset);
2270 }
2271 return 0;
2272out1:
2273 ClearPageUptodate(page);
2274 return status;
2275
2276out_unmap:
2277 ClearPageUptodate(new_page);
2278 unlock_page(new_page);
2279 page_cache_release(new_page);
2280out:
2281 return status;
2282}
2283
2284int block_prepare_write(struct page *page, unsigned from, unsigned to,
2285 get_block_t *get_block)
2286{
2287 struct inode *inode = page->mapping->host;
2288 int err = __block_prepare_write(inode, page, from, to, get_block);
2289 if (err)
2290 ClearPageUptodate(page);
2291 return err;
2292}
2293
2294int block_commit_write(struct page *page, unsigned from, unsigned to)
2295{
2296 struct inode *inode = page->mapping->host;
2297 __block_commit_write(inode,page,from,to);
2298 return 0;
2299}
2300
2301int generic_commit_write(struct file *file, struct page *page,
2302 unsigned from, unsigned to)
2303{
2304 struct inode *inode = page->mapping->host;
2305 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2306 __block_commit_write(inode,page,from,to);
2307
2308
2309
2310
2311 if (pos > inode->i_size) {
2312 i_size_write(inode, pos);
2313 mark_inode_dirty(inode);
2314 }
2315 return 0;
2316}
2317
2318
2319
2320
2321
2322int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
2323 get_block_t *get_block)
2324{
2325 struct inode *inode = page->mapping->host;
2326 const unsigned blkbits = inode->i_blkbits;
2327 const unsigned blocksize = 1 << blkbits;
2328 struct buffer_head map_bh;
2329 struct buffer_head *read_bh[MAX_BUF_PER_PAGE];
2330 unsigned block_in_page;
2331 unsigned block_start;
2332 sector_t block_in_file;
2333 char *kaddr;
2334 int nr_reads = 0;
2335 int i;
2336 int ret = 0;
2337 int is_mapped_to_disk = 1;
2338 int dirtied_it = 0;
2339
2340 if (PageMappedToDisk(page))
2341 return 0;
2342
2343 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2344 map_bh.b_page = page;
2345
2346
2347
2348
2349
2350
2351 for (block_start = 0, block_in_page = 0;
2352 block_start < PAGE_CACHE_SIZE;
2353 block_in_page++, block_start += blocksize) {
2354 unsigned block_end = block_start + blocksize;
2355 int create;
2356
2357 map_bh.b_state = 0;
2358 create = 1;
2359 if (block_start >= to)
2360 create = 0;
2361 ret = get_block(inode, block_in_file + block_in_page,
2362 &map_bh, create);
2363 if (ret)
2364 goto failed;
2365 if (!buffer_mapped(&map_bh))
2366 is_mapped_to_disk = 0;
2367 if (buffer_new(&map_bh))
2368 unmap_underlying_metadata(map_bh.b_bdev,
2369 map_bh.b_blocknr);
2370 if (PageUptodate(page))
2371 continue;
2372 if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) {
2373 kaddr = kmap_atomic(page, KM_USER0);
2374 if (block_start < from) {
2375 memset(kaddr+block_start, 0, from-block_start);
2376 dirtied_it = 1;
2377 }
2378 if (block_end > to) {
2379 memset(kaddr + to, 0, block_end - to);
2380 dirtied_it = 1;
2381 }
2382 flush_dcache_page(page);
2383 kunmap_atomic(kaddr, KM_USER0);
2384 continue;
2385 }
2386 if (buffer_uptodate(&map_bh))
2387 continue;
2388 if (block_start < from || block_end > to) {
2389 struct buffer_head *bh = alloc_buffer_head(GFP_NOFS);
2390
2391 if (!bh) {
2392 ret = -ENOMEM;
2393 goto failed;
2394 }
2395 bh->b_state = map_bh.b_state;
2396 atomic_set(&bh->b_count, 0);
2397 bh->b_this_page = 0;
2398 bh->b_page = page;
2399 bh->b_blocknr = map_bh.b_blocknr;
2400 bh->b_size = blocksize;
2401 bh->b_data = (char *)(long)block_start;
2402 bh->b_bdev = map_bh.b_bdev;
2403 bh->b_private = NULL;
2404 read_bh[nr_reads++] = bh;
2405 }
2406 }
2407
2408 if (nr_reads) {
2409 ll_rw_block(READ, nr_reads, read_bh);
2410 for (i = 0; i < nr_reads; i++) {
2411 wait_on_buffer(read_bh[i]);
2412 if (!buffer_uptodate(read_bh[i]))
2413 ret = -EIO;
2414 free_buffer_head(read_bh[i]);
2415 read_bh[i] = NULL;
2416 }
2417 if (ret)
2418 goto failed;
2419 }
2420
2421 if (is_mapped_to_disk)
2422 SetPageMappedToDisk(page);
2423 SetPageUptodate(page);
2424
2425
2426
2427
2428
2429
2430
2431
2432 if (dirtied_it)
2433 set_page_dirty(page);
2434
2435 return 0;
2436
2437failed:
2438 for (i = 0; i < nr_reads; i++) {
2439 if (read_bh[i])
2440 free_buffer_head(read_bh[i]);
2441 }
2442
2443
2444
2445
2446
2447 kaddr = kmap_atomic(page, KM_USER0);
2448 memset(kaddr, 0, PAGE_CACHE_SIZE);
2449 kunmap_atomic(kaddr, KM_USER0);
2450 SetPageUptodate(page);
2451 set_page_dirty(page);
2452 return ret;
2453}
2454EXPORT_SYMBOL(nobh_prepare_write);
2455
2456int nobh_commit_write(struct file *file, struct page *page,
2457 unsigned from, unsigned to)
2458{
2459 struct inode *inode = page->mapping->host;
2460 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2461
2462 set_page_dirty(page);
2463 if (pos > inode->i_size) {
2464 i_size_write(inode, pos);
2465 mark_inode_dirty(inode);
2466 }
2467 return 0;
2468}
2469EXPORT_SYMBOL(nobh_commit_write);
2470
2471
2472
2473
2474int nobh_truncate_page(struct address_space *mapping, loff_t from)
2475{
2476 struct inode *inode = mapping->host;
2477 unsigned blocksize = 1 << inode->i_blkbits;
2478 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2479 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2480 unsigned to;
2481 struct page *page;
2482 struct address_space_operations *a_ops = mapping->a_ops;
2483 char *kaddr;
2484 int ret = 0;
2485
2486 if ((offset & (blocksize - 1)) == 0)
2487 goto out;
2488
2489 ret = -ENOMEM;
2490 page = grab_cache_page(mapping, index);
2491 if (!page)
2492 goto out;
2493
2494 to = (offset + blocksize) & ~(blocksize - 1);
2495 ret = a_ops->prepare_write(NULL, page, offset, to);
2496 if (ret == 0) {
2497 kaddr = kmap_atomic(page, KM_USER0);
2498 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
2499 flush_dcache_page(page);
2500 kunmap_atomic(kaddr, KM_USER0);
2501 set_page_dirty(page);
2502 }
2503 unlock_page(page);
2504 page_cache_release(page);
2505out:
2506 return ret;
2507}
2508EXPORT_SYMBOL(nobh_truncate_page);
2509
2510int block_truncate_page(struct address_space *mapping,
2511 loff_t from, get_block_t *get_block)
2512{
2513 unsigned long index = from >> PAGE_CACHE_SHIFT;
2514 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2515 unsigned blocksize, iblock, length, pos;
2516 struct inode *inode = mapping->host;
2517 struct page *page;
2518 struct buffer_head *bh;
2519 void *kaddr;
2520 int err;
2521
2522 blocksize = 1 << inode->i_blkbits;
2523 length = offset & (blocksize - 1);
2524
2525
2526 if (!length)
2527 return 0;
2528
2529 length = blocksize - length;
2530 iblock = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2531
2532 page = grab_cache_page(mapping, index);
2533 err = -ENOMEM;
2534 if (!page)
2535 goto out;
2536
2537 if (!page_has_buffers(page))
2538 create_empty_buffers(page, blocksize, 0);
2539
2540
2541 bh = page_buffers(page);
2542 pos = blocksize;
2543 while (offset >= pos) {
2544 bh = bh->b_this_page;
2545 iblock++;
2546 pos += blocksize;
2547 }
2548
2549 err = 0;
2550 if (!buffer_mapped(bh)) {
2551 err = get_block(inode, iblock, bh, 0);
2552 if (err)
2553 goto unlock;
2554
2555 if (!buffer_mapped(bh))
2556 goto unlock;
2557 }
2558
2559
2560 if (PageUptodate(page))
2561 set_buffer_uptodate(bh);
2562
2563 if (!buffer_uptodate(bh) && !buffer_delay(bh)) {
2564 err = -EIO;
2565 ll_rw_block(READ, 1, &bh);
2566 wait_on_buffer(bh);
2567
2568 if (!buffer_uptodate(bh))
2569 goto unlock;
2570 }
2571
2572 kaddr = kmap_atomic(page, KM_USER0);
2573 memset(kaddr + offset, 0, length);
2574 flush_dcache_page(page);
2575 kunmap_atomic(kaddr, KM_USER0);
2576
2577 mark_buffer_dirty(bh);
2578 err = 0;
2579
2580unlock:
2581 unlock_page(page);
2582 page_cache_release(page);
2583out:
2584 return err;
2585}
2586
2587
2588
2589
2590int block_write_full_page(struct page *page, get_block_t *get_block,
2591 struct writeback_control *wbc)
2592{
2593 struct inode * const inode = page->mapping->host;
2594 loff_t i_size = i_size_read(inode);
2595 const unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
2596 unsigned offset;
2597 void *kaddr;
2598
2599
2600 if (page->index < end_index)
2601 return __block_write_full_page(inode, page, get_block, wbc);
2602
2603
2604 offset = i_size & (PAGE_CACHE_SIZE-1);
2605 if (page->index >= end_index+1 || !offset) {
2606
2607
2608
2609
2610
2611 block_invalidatepage(page, 0);
2612 unlock_page(page);
2613 return 0;
2614 }
2615
2616
2617
2618
2619
2620
2621
2622
2623 kaddr = kmap_atomic(page, KM_USER0);
2624 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
2625 flush_dcache_page(page);
2626 kunmap_atomic(kaddr, KM_USER0);
2627 return __block_write_full_page(inode, page, get_block, wbc);
2628}
2629
2630sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2631 get_block_t *get_block)
2632{
2633 struct buffer_head tmp;
2634 struct inode *inode = mapping->host;
2635 tmp.b_state = 0;
2636 tmp.b_blocknr = 0;
2637 get_block(inode, block, &tmp, 0);
2638 return tmp.b_blocknr;
2639}
2640
2641static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err)
2642{
2643 struct buffer_head *bh = bio->bi_private;
2644
2645 if (bio->bi_size)
2646 return 1;
2647
2648 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2649 bio_put(bio);
2650 return 0;
2651}
2652
2653int submit_bh(int rw, struct buffer_head * bh)
2654{
2655 struct bio *bio;
2656
2657 BUG_ON(!buffer_locked(bh));
2658 BUG_ON(!buffer_mapped(bh));
2659 BUG_ON(!bh->b_end_io);
2660
2661 if ((rw == READ || rw == READA) && buffer_uptodate(bh))
2662 buffer_error();
2663 if (rw == WRITE && !buffer_uptodate(bh))
2664 buffer_error();
2665 if (rw == READ && buffer_dirty(bh))
2666 buffer_error();
2667
2668
2669 if (test_set_buffer_req(bh) && rw == WRITE)
2670 clear_buffer_write_io_error(bh);
2671
2672
2673
2674
2675
2676 bio = bio_alloc(GFP_NOIO, 1);
2677
2678 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
2679 bio->bi_bdev = bh->b_bdev;
2680 bio->bi_io_vec[0].bv_page = bh->b_page;
2681 bio->bi_io_vec[0].bv_len = bh->b_size;
2682 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
2683
2684 bio->bi_vcnt = 1;
2685 bio->bi_idx = 0;
2686 bio->bi_size = bh->b_size;
2687
2688 bio->bi_end_io = end_bio_bh_io_sync;
2689 bio->bi_private = bh;
2690
2691 return submit_bio(rw, bio);
2692}
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2720{
2721 int i;
2722
2723 for (i = 0; i < nr; i++) {
2724 struct buffer_head *bh = bhs[i];
2725
2726 if (test_set_buffer_locked(bh))
2727 continue;
2728
2729 get_bh(bh);
2730 if (rw == WRITE) {
2731 bh->b_end_io = end_buffer_write_sync;
2732 if (test_clear_buffer_dirty(bh)) {
2733 submit_bh(WRITE, bh);
2734 continue;
2735 }
2736 } else {
2737 bh->b_end_io = end_buffer_read_sync;
2738 if (!buffer_uptodate(bh)) {
2739 submit_bh(rw, bh);
2740 continue;
2741 }
2742 }
2743 unlock_buffer(bh);
2744 put_bh(bh);
2745 }
2746}
2747
2748
2749
2750
2751
2752void sync_dirty_buffer(struct buffer_head *bh)
2753{
2754 WARN_ON(atomic_read(&bh->b_count) < 1);
2755 lock_buffer(bh);
2756 if (test_clear_buffer_dirty(bh)) {
2757 get_bh(bh);
2758 bh->b_end_io = end_buffer_write_sync;
2759 submit_bh(WRITE, bh);
2760 wait_on_buffer(bh);
2761 } else {
2762 unlock_buffer(bh);
2763 }
2764}
2765
2766
2767
2768
2769static void check_ttfb_buffer(struct page *page, struct buffer_head *bh)
2770{
2771 if (!buffer_uptodate(bh) && !buffer_req(bh)) {
2772 if (PageUptodate(page) && page->mapping
2773 && buffer_mapped(bh)
2774 && S_ISBLK(page->mapping->host->i_mode))
2775 {
2776 buffer_error();
2777 }
2778 }
2779}
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801static inline int buffer_busy(struct buffer_head *bh)
2802{
2803 return atomic_read(&bh->b_count) |
2804 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
2805}
2806
2807static int
2808drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
2809{
2810 struct buffer_head *head = page_buffers(page);
2811 struct buffer_head *bh;
2812 int was_uptodate = 1;
2813
2814 bh = head;
2815 do {
2816 check_ttfb_buffer(page, bh);
2817 if (buffer_write_io_error(bh))
2818 set_bit(AS_EIO, &page->mapping->flags);
2819 if (buffer_busy(bh))
2820 goto failed;
2821 if (!buffer_uptodate(bh) && !buffer_req(bh))
2822 was_uptodate = 0;
2823 bh = bh->b_this_page;
2824 } while (bh != head);
2825
2826 if (!was_uptodate && PageUptodate(page) && !PageError(page))
2827 buffer_error();
2828
2829 do {
2830 struct buffer_head *next = bh->b_this_page;
2831
2832 if (!list_empty(&bh->b_assoc_buffers))
2833 __remove_assoc_queue(bh);
2834 bh = next;
2835 } while (bh != head);
2836 *buffers_to_free = head;
2837 __clear_page_buffers(page);
2838 return 1;
2839failed:
2840 return 0;
2841}
2842
2843int try_to_free_buffers(struct page *page)
2844{
2845 struct address_space * const mapping = page->mapping;
2846 struct buffer_head *buffers_to_free = NULL;
2847 int ret = 0;
2848
2849 BUG_ON(!PageLocked(page));
2850 if (PageWriteback(page))
2851 return 0;
2852
2853 if (mapping == NULL) {
2854 ret = drop_buffers(page, &buffers_to_free);
2855 goto out;
2856 }
2857
2858 spin_lock(&mapping->private_lock);
2859 ret = drop_buffers(page, &buffers_to_free);
2860 if (ret && !PageSwapCache(page)) {
2861
2862
2863
2864
2865
2866
2867
2868
2869 clear_page_dirty(page);
2870 }
2871 spin_unlock(&mapping->private_lock);
2872out:
2873 if (buffers_to_free) {
2874 struct buffer_head *bh = buffers_to_free;
2875
2876 do {
2877 struct buffer_head *next = bh->b_this_page;
2878 free_buffer_head(bh);
2879 bh = next;
2880 } while (bh != buffers_to_free);
2881 }
2882 return ret;
2883}
2884EXPORT_SYMBOL(try_to_free_buffers);
2885
2886int block_sync_page(struct page *page)
2887{
2888 blk_run_queues();
2889 return 0;
2890}
2891
2892
2893
2894
2895
2896
2897
2898
2899asmlinkage long sys_bdflush(int func, long data)
2900{
2901 static int msg_count;
2902
2903 if (!capable(CAP_SYS_ADMIN))
2904 return -EPERM;
2905
2906 if (msg_count < 5) {
2907 msg_count++;
2908 printk(KERN_INFO
2909 "warning: process `%s' used the obsolete bdflush"
2910 " system call\n", current->comm);
2911 printk(KERN_INFO "Fix your initscripts?\n");
2912 }
2913
2914 if (func == 1)
2915 do_exit(0);
2916 return 0;
2917}
2918
2919
2920
2921
2922static kmem_cache_t *bh_cachep;
2923
2924
2925
2926
2927
2928static int max_buffer_heads;
2929
2930int buffer_heads_over_limit;
2931
2932struct bh_accounting {
2933 int nr;
2934 int ratelimit;
2935};
2936
2937static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
2938
2939static void recalc_bh_state(void)
2940{
2941 int i;
2942 int tot = 0;
2943
2944 if (__get_cpu_var(bh_accounting).ratelimit++ < 4096)
2945 return;
2946 __get_cpu_var(bh_accounting).ratelimit = 0;
2947 for (i = 0; i < NR_CPUS; i++) {
2948 if (cpu_online(i))
2949 tot += per_cpu(bh_accounting, i).nr;
2950 }
2951 buffer_heads_over_limit = (tot > max_buffer_heads);
2952}
2953
2954struct buffer_head *alloc_buffer_head(int gfp_flags)
2955{
2956 struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
2957 if (ret) {
2958 preempt_disable();
2959 __get_cpu_var(bh_accounting).nr++;
2960 recalc_bh_state();
2961 preempt_enable();
2962 }
2963 return ret;
2964}
2965EXPORT_SYMBOL(alloc_buffer_head);
2966
2967void free_buffer_head(struct buffer_head *bh)
2968{
2969 BUG_ON(!list_empty(&bh->b_assoc_buffers));
2970 kmem_cache_free(bh_cachep, bh);
2971 preempt_disable();
2972 __get_cpu_var(bh_accounting).nr--;
2973 recalc_bh_state();
2974 preempt_enable();
2975}
2976EXPORT_SYMBOL(free_buffer_head);
2977
2978static void
2979init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long flags)
2980{
2981 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
2982 SLAB_CTOR_CONSTRUCTOR) {
2983 struct buffer_head * bh = (struct buffer_head *)data;
2984
2985 memset(bh, 0, sizeof(*bh));
2986 INIT_LIST_HEAD(&bh->b_assoc_buffers);
2987 }
2988}
2989
2990static void buffer_init_cpu(int cpu)
2991{
2992 struct bh_accounting *bha = &per_cpu(bh_accounting, cpu);
2993 struct bh_lru *bhl = &per_cpu(bh_lrus, cpu);
2994
2995 bha->nr = 0;
2996 bha->ratelimit = 0;
2997 memset(bhl, 0, sizeof(*bhl));
2998}
2999
3000static int __devinit buffer_cpu_notify(struct notifier_block *self,
3001 unsigned long action, void *hcpu)
3002{
3003 long cpu = (long)hcpu;
3004 switch(action) {
3005 case CPU_UP_PREPARE:
3006 buffer_init_cpu(cpu);
3007 break;
3008 default:
3009 break;
3010 }
3011 return NOTIFY_OK;
3012}
3013
3014static struct notifier_block __devinitdata buffer_nb = {
3015 .notifier_call = buffer_cpu_notify,
3016};
3017
3018void __init buffer_init(void)
3019{
3020 int i;
3021 int nrpages;
3022
3023 bh_cachep = kmem_cache_create("buffer_head",
3024 sizeof(struct buffer_head), 0,
3025 0, init_buffer_head, NULL);
3026 for (i = 0; i < ARRAY_SIZE(bh_wait_queue_heads); i++)
3027 init_waitqueue_head(&bh_wait_queue_heads[i].wqh);
3028
3029
3030
3031
3032 nrpages = (nr_free_buffer_pages() * 10) / 100;
3033 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3034 buffer_cpu_notify(&buffer_nb, (unsigned long)CPU_UP_PREPARE,
3035 (void *)(long)smp_processor_id());
3036 register_cpu_notifier(&buffer_nb);
3037}
3038
3039EXPORT_SYMBOL(__bforget);
3040EXPORT_SYMBOL(__brelse);
3041EXPORT_SYMBOL(__wait_on_buffer);
3042EXPORT_SYMBOL(block_commit_write);
3043EXPORT_SYMBOL(block_prepare_write);
3044EXPORT_SYMBOL(block_read_full_page);
3045EXPORT_SYMBOL(block_sync_page);
3046EXPORT_SYMBOL(block_truncate_page);
3047EXPORT_SYMBOL(block_write_full_page);
3048EXPORT_SYMBOL(buffer_insert_list);
3049EXPORT_SYMBOL(cont_prepare_write);
3050EXPORT_SYMBOL(end_buffer_async_write);
3051EXPORT_SYMBOL(end_buffer_read_sync);
3052EXPORT_SYMBOL(end_buffer_write_sync);
3053EXPORT_SYMBOL(file_fsync);
3054EXPORT_SYMBOL(fsync_bdev);
3055EXPORT_SYMBOL(fsync_buffers_list);
3056EXPORT_SYMBOL(generic_block_bmap);
3057EXPORT_SYMBOL(generic_commit_write);
3058EXPORT_SYMBOL(generic_cont_expand);
3059EXPORT_SYMBOL(init_buffer);
3060EXPORT_SYMBOL(invalidate_bdev);
3061EXPORT_SYMBOL(ll_rw_block);
3062EXPORT_SYMBOL(mark_buffer_dirty);
3063EXPORT_SYMBOL(submit_bh);
3064EXPORT_SYMBOL(sync_dirty_buffer);
3065EXPORT_SYMBOL(unlock_buffer);
3066