1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/config.h>
22#include <linux/kernel.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/percpu.h>
26#include <linux/slab.h>
27#include <linux/smp_lock.h>
28#include <linux/blkdev.h>
29#include <linux/file.h>
30#include <linux/quotaops.h>
31#include <linux/iobuf.h>
32#include <linux/module.h>
33#include <linux/writeback.h>
34#include <linux/mempool.h>
35#include <linux/hash.h>
36#include <linux/suspend.h>
37#include <linux/buffer_head.h>
38#include <linux/bio.h>
39#include <asm/bitops.h>
40
41static void invalidate_bh_lrus(void);
42
43#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
44
45
46
47
48#define BH_WAIT_TABLE_ORDER 7
49static struct bh_wait_queue_head {
50 wait_queue_head_t wqh;
51} ____cacheline_aligned_in_smp bh_wait_queue_heads[1<<BH_WAIT_TABLE_ORDER];
52
53
54
55
56
57void __buffer_error(char *file, int line)
58{
59 static int enough;
60
61 if (enough > 10)
62 return;
63 enough++;
64 printk("buffer layer error at %s:%d\n", file, line);
65 printk("Pass this trace through ksymoops for reporting\n");
66 dump_stack();
67}
68EXPORT_SYMBOL(__buffer_error);
69
70inline void
71init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
72{
73 bh->b_end_io = handler;
74 bh->b_private = private;
75}
76
77
78
79
80
81static wait_queue_head_t *bh_waitq_head(struct buffer_head *bh)
82{
83 return &bh_wait_queue_heads[hash_ptr(bh, BH_WAIT_TABLE_ORDER)].wqh;
84}
85
86
87
88
89
90void sleep_on_buffer(struct buffer_head *bh)
91{
92 wait_queue_head_t *wq = bh_waitq_head(bh);
93 sleep_on(wq);
94}
95EXPORT_SYMBOL(sleep_on_buffer);
96
97void wake_up_buffer(struct buffer_head *bh)
98{
99 wait_queue_head_t *wq = bh_waitq_head(bh);
100
101 if (waitqueue_active(wq))
102 wake_up_all(wq);
103}
104EXPORT_SYMBOL(wake_up_buffer);
105
106void unlock_buffer(struct buffer_head *bh)
107{
108
109
110
111
112
113
114 if (atomic_read(&bh->b_count) == 0 &&
115 !PageLocked(bh->b_page) &&
116 !PageWriteback(bh->b_page))
117 buffer_error();
118
119 clear_buffer_locked(bh);
120 smp_mb__after_clear_bit();
121 wake_up_buffer(bh);
122}
123
124
125
126
127
128
129void __wait_on_buffer(struct buffer_head * bh)
130{
131 wait_queue_head_t *wq = bh_waitq_head(bh);
132 struct task_struct *tsk = current;
133 DECLARE_WAITQUEUE(wait, tsk);
134
135 get_bh(bh);
136 add_wait_queue(wq, &wait);
137 do {
138 blk_run_queues();
139 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
140 if (!buffer_locked(bh))
141 break;
142 schedule();
143 } while (buffer_locked(bh));
144 tsk->state = TASK_RUNNING;
145 remove_wait_queue(wq, &wait);
146 put_bh(bh);
147}
148
149static inline void
150__set_page_buffers(struct page *page, struct buffer_head *head)
151{
152 if (page_has_buffers(page))
153 buffer_error();
154 page_cache_get(page);
155 SetPagePrivate(page);
156 page->private = (unsigned long)head;
157}
158
159static inline void
160__clear_page_buffers(struct page *page)
161{
162 ClearPagePrivate(page);
163 page->private = 0;
164 page_cache_release(page);
165}
166
167static void buffer_io_error(struct buffer_head *bh)
168{
169 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
170 bdevname(bh->b_bdev),
171 (unsigned long long)bh->b_blocknr);
172}
173
174
175
176
177
178void end_buffer_io_sync(struct buffer_head *bh, int uptodate)
179{
180 if (uptodate) {
181 set_buffer_uptodate(bh);
182 } else {
183
184
185
186
187 clear_buffer_uptodate(bh);
188 }
189 unlock_buffer(bh);
190 put_bh(bh);
191}
192
193
194
195
196
197int sync_blockdev(struct block_device *bdev)
198{
199 int ret = 0;
200
201 if (bdev) {
202 int err;
203
204 ret = filemap_fdatawrite(bdev->bd_inode->i_mapping);
205 err = filemap_fdatawait(bdev->bd_inode->i_mapping);
206 if (!ret)
207 ret = err;
208 }
209 return ret;
210}
211EXPORT_SYMBOL(sync_blockdev);
212
213
214
215
216
217
218int fsync_super(struct super_block *sb)
219{
220 sync_inodes_sb(sb, 0);
221 DQUOT_SYNC(sb);
222 lock_super(sb);
223 if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
224 sb->s_op->write_super(sb);
225 unlock_super(sb);
226 sync_blockdev(sb->s_bdev);
227 sync_inodes_sb(sb, 1);
228
229 return sync_blockdev(sb->s_bdev);
230}
231
232
233
234
235
236
237int fsync_bdev(struct block_device *bdev)
238{
239 struct super_block *sb = get_super(bdev);
240 if (sb) {
241 int res = fsync_super(sb);
242 drop_super(sb);
243 return res;
244 }
245 return sync_blockdev(bdev);
246}
247
248
249
250
251asmlinkage long sys_sync(void)
252{
253 sync_inodes(0);
254 DQUOT_SYNC(NULL);
255 sync_supers();
256 sync_inodes(1);
257 return 0;
258}
259
260
261
262
263
264
265
266int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
267{
268 struct inode * inode = dentry->d_inode;
269 struct super_block * sb;
270 int ret;
271
272
273 write_inode_now(inode, 0);
274
275
276 sb = inode->i_sb;
277 lock_super(sb);
278 if (sb->s_op && sb->s_op->write_super)
279 sb->s_op->write_super(sb);
280 unlock_super(sb);
281
282
283 ret = sync_blockdev(sb->s_bdev);
284 return ret;
285}
286
287asmlinkage long sys_fsync(unsigned int fd)
288{
289 struct file * file;
290 struct dentry * dentry;
291 struct inode * inode;
292 int ret, err;
293
294 ret = -EBADF;
295 file = fget(fd);
296 if (!file)
297 goto out;
298
299 dentry = file->f_dentry;
300 inode = dentry->d_inode;
301
302 ret = -EINVAL;
303 if (!file->f_op || !file->f_op->fsync) {
304
305 goto out_putf;
306 }
307
308
309 down(&inode->i_sem);
310 ret = filemap_fdatawrite(inode->i_mapping);
311 err = file->f_op->fsync(file, dentry, 0);
312 if (!ret)
313 ret = err;
314 err = filemap_fdatawait(inode->i_mapping);
315 if (!ret)
316 ret = err;
317 up(&inode->i_sem);
318
319out_putf:
320 fput(file);
321out:
322 return ret;
323}
324
325asmlinkage long sys_fdatasync(unsigned int fd)
326{
327 struct file * file;
328 struct dentry * dentry;
329 struct inode * inode;
330 int ret, err;
331
332 ret = -EBADF;
333 file = fget(fd);
334 if (!file)
335 goto out;
336
337 dentry = file->f_dentry;
338 inode = dentry->d_inode;
339
340 ret = -EINVAL;
341 if (!file->f_op || !file->f_op->fsync)
342 goto out_putf;
343
344 down(&inode->i_sem);
345 ret = filemap_fdatawrite(inode->i_mapping);
346 err = file->f_op->fsync(file, dentry, 1);
347 if (!ret)
348 ret = err;
349 err = filemap_fdatawait(inode->i_mapping);
350 if (!ret)
351 ret = err;
352 up(&inode->i_sem);
353
354out_putf:
355 fput(file);
356out:
357 return ret;
358}
359
360
361
362
363
364
365
366
367
368
369
370
371struct buffer_head *
372__find_get_block_slow(struct block_device *bdev, sector_t block, int unused)
373{
374 struct inode *bd_inode = bdev->bd_inode;
375 struct address_space *bd_mapping = bd_inode->i_mapping;
376 struct buffer_head *ret = NULL;
377 unsigned long index;
378 struct buffer_head *bh;
379 struct buffer_head *head;
380 struct page *page;
381
382 index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
383 page = find_get_page(bd_mapping, index);
384 if (!page)
385 goto out;
386
387 spin_lock(&bd_mapping->private_lock);
388 if (!page_has_buffers(page))
389 goto out_unlock;
390 head = page_buffers(page);
391 bh = head;
392 do {
393 if (bh->b_blocknr == block) {
394 ret = bh;
395 get_bh(bh);
396 goto out_unlock;
397 }
398 bh = bh->b_this_page;
399 } while (bh != head);
400 buffer_error();
401out_unlock:
402 spin_unlock(&bd_mapping->private_lock);
403 page_cache_release(page);
404out:
405 return ret;
406}
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
441{
442 invalidate_bh_lrus();
443
444
445
446
447
448 invalidate_inode_pages(bdev->bd_inode);
449}
450
451void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers)
452{
453 struct block_device *bdev = bdget(kdev_t_to_nr(dev));
454 if (bdev) {
455 invalidate_bdev(bdev, destroy_dirty_buffers);
456 bdput(bdev);
457 }
458}
459
460
461
462
463
464static void free_more_memory(void)
465{
466 struct zone *zone;
467
468 zone = contig_page_data.node_zonelists[GFP_NOFS & GFP_ZONEMASK].zones[0];
469
470 wakeup_bdflush();
471 try_to_free_pages(zone, GFP_NOFS, 0);
472 blk_run_queues();
473 yield();
474}
475
476
477
478
479
480static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
481{
482 static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
483 unsigned long flags;
484 struct buffer_head *tmp;
485 struct page *page;
486 int page_uptodate = 1;
487
488 BUG_ON(!buffer_async_read(bh));
489
490 page = bh->b_page;
491 if (uptodate) {
492 set_buffer_uptodate(bh);
493 } else {
494 clear_buffer_uptodate(bh);
495 buffer_io_error(bh);
496 SetPageError(page);
497 }
498
499
500
501
502
503
504 spin_lock_irqsave(&page_uptodate_lock, flags);
505 clear_buffer_async_read(bh);
506 unlock_buffer(bh);
507 tmp = bh;
508 do {
509 if (!buffer_uptodate(tmp))
510 page_uptodate = 0;
511 if (buffer_async_read(tmp)) {
512 BUG_ON(!buffer_locked(tmp));
513 goto still_busy;
514 }
515 tmp = tmp->b_this_page;
516 } while (tmp != bh);
517 spin_unlock_irqrestore(&page_uptodate_lock, flags);
518
519
520
521
522
523 if (page_uptodate && !PageError(page))
524 SetPageUptodate(page);
525 unlock_page(page);
526 return;
527
528still_busy:
529 spin_unlock_irqrestore(&page_uptodate_lock, flags);
530 return;
531}
532
533
534
535
536
537static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
538{
539 static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
540 unsigned long flags;
541 struct buffer_head *tmp;
542 struct page *page;
543
544 BUG_ON(!buffer_async_write(bh));
545
546 page = bh->b_page;
547 if (uptodate) {
548 set_buffer_uptodate(bh);
549 } else {
550 buffer_io_error(bh);
551 clear_buffer_uptodate(bh);
552 SetPageError(page);
553 }
554
555 spin_lock_irqsave(&page_uptodate_lock, flags);
556 clear_buffer_async_write(bh);
557 unlock_buffer(bh);
558 tmp = bh->b_this_page;
559 while (tmp != bh) {
560 if (buffer_async_write(tmp)) {
561 BUG_ON(!buffer_locked(tmp));
562 goto still_busy;
563 }
564 tmp = tmp->b_this_page;
565 }
566 spin_unlock_irqrestore(&page_uptodate_lock, flags);
567 end_page_writeback(page);
568 return;
569
570still_busy:
571 spin_unlock_irqrestore(&page_uptodate_lock, flags);
572 return;
573}
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596inline void mark_buffer_async_read(struct buffer_head *bh)
597{
598 bh->b_end_io = end_buffer_async_read;
599 set_buffer_async_read(bh);
600}
601EXPORT_SYMBOL(mark_buffer_async_read);
602
603inline void mark_buffer_async_write(struct buffer_head *bh)
604{
605 bh->b_end_io = end_buffer_async_write;
606 set_buffer_async_write(bh);
607}
608EXPORT_SYMBOL(mark_buffer_async_write);
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660void buffer_insert_list(spinlock_t *lock,
661 struct buffer_head *bh, struct list_head *list)
662{
663 spin_lock(lock);
664 list_del(&bh->b_assoc_buffers);
665 list_add(&bh->b_assoc_buffers, list);
666 spin_unlock(lock);
667}
668
669
670
671
672static inline void __remove_assoc_queue(struct buffer_head *bh)
673{
674 list_del_init(&bh->b_assoc_buffers);
675}
676
677int inode_has_buffers(struct inode *inode)
678{
679 return !list_empty(&inode->i_mapping->private_list);
680}
681
682
683
684
685
686
687
688
689
690
691
692static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
693{
694 struct buffer_head *bh;
695 struct list_head *p;
696 int err = 0;
697
698 spin_lock(lock);
699repeat:
700 list_for_each_prev(p, list) {
701 bh = BH_ENTRY(p);
702 if (buffer_locked(bh)) {
703 get_bh(bh);
704 spin_unlock(lock);
705 wait_on_buffer(bh);
706 if (!buffer_uptodate(bh))
707 err = -EIO;
708 brelse(bh);
709 spin_lock(lock);
710 goto repeat;
711 }
712 }
713 spin_unlock(lock);
714 return err;
715}
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730int sync_mapping_buffers(struct address_space *mapping)
731{
732 struct address_space *buffer_mapping = mapping->assoc_mapping;
733
734 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
735 return 0;
736
737 return fsync_buffers_list(&buffer_mapping->private_lock,
738 &mapping->private_list);
739}
740EXPORT_SYMBOL(sync_mapping_buffers);
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767int write_mapping_buffers(struct address_space *mapping)
768{
769 spinlock_t *lock;
770 struct address_space *buffer_mapping;
771 unsigned nr_to_write;
772 struct list_head *lh;
773 int ret = 0;
774
775 if (list_empty(&mapping->private_list))
776 goto out;
777
778 buffer_mapping = mapping->assoc_mapping;
779 lock = &buffer_mapping->private_lock;
780 spin_lock(lock);
781 nr_to_write = 0;
782 lh = mapping->private_list.next;
783 while (lh != &mapping->private_list) {
784 lh = lh->next;
785 nr_to_write++;
786 }
787 nr_to_write *= 2;
788
789 while (nr_to_write-- && !list_empty(&mapping->private_list)) {
790 struct buffer_head *bh;
791
792 bh = BH_ENTRY(mapping->private_list.prev);
793 list_del_init(&bh->b_assoc_buffers);
794 if (!buffer_dirty(bh) && !buffer_locked(bh))
795 continue;
796
797 list_add(&bh->b_assoc_buffers, &mapping->private_list);
798 if (test_set_buffer_locked(bh))
799 continue;
800 get_bh(bh);
801 spin_unlock(lock);
802 if (test_clear_buffer_dirty(bh)) {
803 bh->b_end_io = end_buffer_io_sync;
804 submit_bh(WRITE, bh);
805 } else {
806 unlock_buffer(bh);
807 put_bh(bh);
808 }
809 spin_lock(lock);
810 }
811 spin_unlock(lock);
812out:
813 return ret;
814}
815EXPORT_SYMBOL(write_mapping_buffers);
816
817void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
818{
819 struct address_space *mapping = inode->i_mapping;
820 struct address_space *buffer_mapping = bh->b_page->mapping;
821
822 mark_buffer_dirty(bh);
823 if (!mapping->assoc_mapping) {
824 mapping->assoc_mapping = buffer_mapping;
825 } else {
826 if (mapping->assoc_mapping != buffer_mapping)
827 BUG();
828 }
829 if (list_empty(&bh->b_assoc_buffers))
830 buffer_insert_list(&buffer_mapping->private_lock,
831 bh, &mapping->private_list);
832}
833EXPORT_SYMBOL(mark_buffer_dirty_inode);
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
855{
856 struct buffer_head *bh;
857 struct list_head tmp;
858 int err = 0, err2;
859
860 INIT_LIST_HEAD(&tmp);
861
862 spin_lock(lock);
863 while (!list_empty(list)) {
864 bh = BH_ENTRY(list->next);
865 list_del_init(&bh->b_assoc_buffers);
866 if (buffer_dirty(bh) || buffer_locked(bh)) {
867 list_add(&bh->b_assoc_buffers, &tmp);
868 if (buffer_dirty(bh)) {
869 get_bh(bh);
870 spin_unlock(lock);
871 ll_rw_block(WRITE, 1, &bh);
872 brelse(bh);
873 spin_lock(lock);
874 }
875 }
876 }
877
878 while (!list_empty(&tmp)) {
879 bh = BH_ENTRY(tmp.prev);
880 __remove_assoc_queue(bh);
881 get_bh(bh);
882 spin_unlock(lock);
883 wait_on_buffer(bh);
884 if (!buffer_uptodate(bh))
885 err = -EIO;
886 brelse(bh);
887 spin_lock(lock);
888 }
889
890 spin_unlock(lock);
891 err2 = osync_buffers_list(lock, list);
892 if (err)
893 return err;
894 else
895 return err2;
896}
897
898
899
900
901
902
903
904
905
906
907void invalidate_inode_buffers(struct inode *inode)
908{
909 if (inode_has_buffers(inode)) {
910 struct address_space *mapping = inode->i_mapping;
911 struct list_head *list = &mapping->private_list;
912 struct address_space *buffer_mapping = mapping->assoc_mapping;
913
914 spin_lock(&buffer_mapping->private_lock);
915 while (!list_empty(list))
916 __remove_assoc_queue(BH_ENTRY(list->next));
917 spin_unlock(&buffer_mapping->private_lock);
918 }
919}
920
921
922
923
924
925
926
927
928
929
930static struct buffer_head *
931create_buffers(struct page * page, unsigned long size, int retry)
932{
933 struct buffer_head *bh, *head;
934 long offset;
935
936try_again:
937 head = NULL;
938 offset = PAGE_SIZE;
939 while ((offset -= size) >= 0) {
940 int pf_flags = current->flags;
941
942 current->flags |= PF_NOWARN;
943 bh = alloc_buffer_head();
944 current->flags = pf_flags;
945 if (!bh)
946 goto no_grow;
947
948 bh->b_bdev = NULL;
949 bh->b_this_page = head;
950 bh->b_blocknr = -1;
951 head = bh;
952
953 bh->b_state = 0;
954 atomic_set(&bh->b_count, 0);
955 bh->b_size = size;
956
957
958 set_bh_page(bh, page, offset);
959
960 bh->b_end_io = NULL;
961 }
962 return head;
963
964
965
966no_grow:
967 if (head) {
968 do {
969 bh = head;
970 head = head->b_this_page;
971 free_buffer_head(bh);
972 } while (head);
973 }
974
975
976
977
978
979
980
981 if (!retry)
982 return NULL;
983
984
985
986
987
988
989
990 blk_run_queues();
991
992 free_more_memory();
993 goto try_again;
994}
995
996static inline void
997link_dev_buffers(struct page *page, struct buffer_head *head)
998{
999 struct buffer_head *bh, *tail;
1000
1001 bh = head;
1002 do {
1003 tail = bh;
1004 bh = bh->b_this_page;
1005 } while (bh);
1006 tail->b_this_page = head;
1007 __set_page_buffers(page, head);
1008}
1009
1010
1011
1012
1013static void
1014init_page_buffers(struct page *page, struct block_device *bdev,
1015 int block, int size)
1016{
1017 struct buffer_head *head = page_buffers(page);
1018 struct buffer_head *bh = head;
1019 unsigned int b_state;
1020
1021 b_state = 1 << BH_Mapped;
1022 if (PageUptodate(page))
1023 b_state |= 1 << BH_Uptodate;
1024
1025 do {
1026 if (!(bh->b_state & (1 << BH_Mapped))) {
1027 init_buffer(bh, NULL, NULL);
1028 bh->b_bdev = bdev;
1029 bh->b_blocknr = block;
1030 bh->b_state = b_state;
1031 }
1032 block++;
1033 bh = bh->b_this_page;
1034 } while (bh != head);
1035}
1036
1037
1038
1039
1040
1041
1042static struct page *
1043grow_dev_page(struct block_device *bdev, unsigned long block,
1044 unsigned long index, int size)
1045{
1046 struct inode *inode = bdev->bd_inode;
1047 struct page *page;
1048 struct buffer_head *bh;
1049
1050 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
1051 if (!page)
1052 return NULL;
1053
1054 if (!PageLocked(page))
1055 BUG();
1056
1057 if (page_has_buffers(page)) {
1058 bh = page_buffers(page);
1059 if (bh->b_size == size)
1060 return page;
1061 if (!try_to_free_buffers(page))
1062 goto failed;
1063 }
1064
1065
1066
1067
1068 bh = create_buffers(page, size, 0);
1069 if (!bh)
1070 goto failed;
1071
1072
1073
1074
1075
1076
1077 spin_lock(&inode->i_mapping->private_lock);
1078 link_dev_buffers(page, bh);
1079 init_page_buffers(page, bdev, block, size);
1080 spin_unlock(&inode->i_mapping->private_lock);
1081 return page;
1082
1083failed:
1084 buffer_error();
1085 unlock_page(page);
1086 page_cache_release(page);
1087 return NULL;
1088}
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099static inline int
1100grow_buffers(struct block_device *bdev, unsigned long block, int size)
1101{
1102 struct page *page;
1103 unsigned long index;
1104 int sizebits;
1105
1106
1107 if (size & (bdev_hardsect_size(bdev)-1))
1108 BUG();
1109 if (size < 512 || size > PAGE_SIZE)
1110 BUG();
1111
1112 sizebits = -1;
1113 do {
1114 sizebits++;
1115 } while ((size << sizebits) < PAGE_SIZE);
1116
1117 index = block >> sizebits;
1118 block = index << sizebits;
1119
1120
1121 page = grow_dev_page(bdev, block, index, size);
1122 if (!page)
1123 return 0;
1124 unlock_page(page);
1125 page_cache_release(page);
1126 return 1;
1127}
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141struct buffer_head *
1142__getblk_slow(struct block_device *bdev, sector_t block, int size)
1143{
1144 for (;;) {
1145 struct buffer_head * bh;
1146
1147 bh = __find_get_block(bdev, block, size);
1148 if (bh) {
1149 touch_buffer(bh);
1150 return bh;
1151 }
1152
1153 if (!grow_buffers(bdev, block, size))
1154 free_more_memory();
1155 }
1156}
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192void mark_buffer_dirty(struct buffer_head *bh)
1193{
1194 if (!buffer_uptodate(bh))
1195 buffer_error();
1196 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
1197 __set_page_dirty_nobuffers(bh->b_page);
1198}
1199
1200
1201
1202
1203
1204
1205
1206
1207void __brelse(struct buffer_head * buf)
1208{
1209 if (atomic_read(&buf->b_count)) {
1210 put_bh(buf);
1211 return;
1212 }
1213 printk(KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1214 buffer_error();
1215}
1216
1217
1218
1219
1220
1221void __bforget(struct buffer_head *bh)
1222{
1223 clear_buffer_dirty(bh);
1224 if (!list_empty(&bh->b_assoc_buffers)) {
1225 struct address_space *buffer_mapping = bh->b_page->mapping;
1226
1227 spin_lock(&buffer_mapping->private_lock);
1228 list_del_init(&bh->b_assoc_buffers);
1229 spin_unlock(&buffer_mapping->private_lock);
1230 }
1231 __brelse(bh);
1232}
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242struct buffer_head *
1243__bread_slow(struct block_device *bdev, sector_t block, int size)
1244{
1245 struct buffer_head *bh = __getblk(bdev, block, size);
1246
1247 if (buffer_uptodate(bh))
1248 return bh;
1249 lock_buffer(bh);
1250 if (buffer_uptodate(bh)) {
1251 unlock_buffer(bh);
1252 return bh;
1253 } else {
1254 if (buffer_dirty(bh))
1255 buffer_error();
1256 get_bh(bh);
1257 bh->b_end_io = end_buffer_io_sync;
1258 submit_bh(READ, bh);
1259 wait_on_buffer(bh);
1260 if (buffer_uptodate(bh))
1261 return bh;
1262 }
1263 brelse(bh);
1264 return NULL;
1265}
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281#define BH_LRU_SIZE 8
1282
1283static struct bh_lru {
1284 struct buffer_head *bhs[BH_LRU_SIZE];
1285} ____cacheline_aligned_in_smp bh_lrus[NR_CPUS];
1286
1287#ifdef CONFIG_SMP
1288#define bh_lru_lock() local_irq_disable()
1289#define bh_lru_unlock() local_irq_enable()
1290#else
1291#define bh_lru_lock() preempt_disable()
1292#define bh_lru_unlock() preempt_enable()
1293#endif
1294
1295static inline void check_irqs_on(void)
1296{
1297#ifdef irqs_disabled
1298 BUG_ON(irqs_disabled());
1299#endif
1300}
1301
1302
1303
1304
1305static void bh_lru_install(struct buffer_head *bh)
1306{
1307 struct buffer_head *evictee = NULL;
1308 struct bh_lru *lru;
1309
1310 if (bh == NULL)
1311 return;
1312
1313 check_irqs_on();
1314 bh_lru_lock();
1315 lru = &bh_lrus[smp_processor_id()];
1316 if (lru->bhs[0] != bh) {
1317 struct buffer_head *bhs[BH_LRU_SIZE];
1318 int in;
1319 int out = 0;
1320
1321 get_bh(bh);
1322 bhs[out++] = bh;
1323 for (in = 0; in < BH_LRU_SIZE; in++) {
1324 struct buffer_head *bh2 = lru->bhs[in];
1325
1326 if (bh2 == bh) {
1327 __brelse(bh2);
1328 } else {
1329 if (out >= BH_LRU_SIZE) {
1330 BUG_ON(evictee != NULL);
1331 evictee = bh2;
1332 } else {
1333 bhs[out++] = bh2;
1334 }
1335 }
1336 }
1337 while (out < BH_LRU_SIZE)
1338 bhs[out++] = NULL;
1339 memcpy(lru->bhs, bhs, sizeof(bhs));
1340 }
1341 bh_lru_unlock();
1342
1343 if (evictee) {
1344 touch_buffer(evictee);
1345 __brelse(evictee);
1346 }
1347}
1348
1349static inline struct buffer_head *
1350lookup_bh(struct block_device *bdev, sector_t block, int size)
1351{
1352 struct buffer_head *ret = NULL;
1353 struct bh_lru *lru;
1354 int i;
1355
1356 check_irqs_on();
1357 bh_lru_lock();
1358 lru = &bh_lrus[smp_processor_id()];
1359 for (i = 0; i < BH_LRU_SIZE; i++) {
1360 struct buffer_head *bh = lru->bhs[i];
1361
1362 if (bh && bh->b_bdev == bdev &&
1363 bh->b_blocknr == block && bh->b_size == size) {
1364 if (i) {
1365 while (i) {
1366 lru->bhs[i] = lru->bhs[i - 1];
1367 i--;
1368 }
1369 lru->bhs[0] = bh;
1370 }
1371 get_bh(bh);
1372 ret = bh;
1373 break;
1374 }
1375 }
1376 bh_lru_unlock();
1377 return ret;
1378}
1379
1380struct buffer_head *
1381__find_get_block(struct block_device *bdev, sector_t block, int size)
1382{
1383 struct buffer_head *bh = lookup_bh(bdev, block, size);
1384
1385 if (bh == NULL) {
1386 bh = __find_get_block_slow(bdev, block, size);
1387 bh_lru_install(bh);
1388 }
1389 return bh;
1390}
1391EXPORT_SYMBOL(__find_get_block);
1392
1393struct buffer_head *
1394__getblk(struct block_device *bdev, sector_t block, int size)
1395{
1396 struct buffer_head *bh = __find_get_block(bdev, block, size);
1397
1398 if (bh == NULL) {
1399 bh = __getblk_slow(bdev, block, size);
1400 bh_lru_install(bh);
1401 }
1402 return bh;
1403}
1404EXPORT_SYMBOL(__getblk);
1405
1406struct buffer_head *
1407__bread(struct block_device *bdev, sector_t block, int size)
1408{
1409 struct buffer_head *bh = __getblk(bdev, block, size);
1410
1411 if (bh) {
1412 if (buffer_uptodate(bh))
1413 return bh;
1414 __brelse(bh);
1415 }
1416 bh = __bread_slow(bdev, block, size);
1417 bh_lru_install(bh);
1418 return bh;
1419}
1420EXPORT_SYMBOL(__bread);
1421
1422
1423
1424
1425
1426
1427
1428static void invalidate_bh_lru(void *arg)
1429{
1430 const int cpu = get_cpu();
1431 int i;
1432
1433 for (i = 0; i < BH_LRU_SIZE; i++) {
1434 brelse(bh_lrus[cpu].bhs[i]);
1435 bh_lrus[cpu].bhs[i] = NULL;
1436 }
1437 put_cpu();
1438}
1439
1440static void invalidate_bh_lrus(void)
1441{
1442 preempt_disable();
1443 invalidate_bh_lru(NULL);
1444 smp_call_function(invalidate_bh_lru, NULL, 1, 1);
1445 preempt_enable();
1446}
1447
1448
1449
1450void set_bh_page(struct buffer_head *bh,
1451 struct page *page, unsigned long offset)
1452{
1453 bh->b_page = page;
1454 if (offset >= PAGE_SIZE)
1455 BUG();
1456 if (PageHighMem(page))
1457
1458
1459
1460 bh->b_data = (char *)(0 + offset);
1461 else
1462 bh->b_data = page_address(page) + offset;
1463}
1464EXPORT_SYMBOL(set_bh_page);
1465
1466
1467
1468
1469static void discard_buffer(struct buffer_head * bh)
1470{
1471 lock_buffer(bh);
1472 clear_buffer_dirty(bh);
1473 bh->b_bdev = NULL;
1474 clear_buffer_mapped(bh);
1475 clear_buffer_req(bh);
1476 clear_buffer_new(bh);
1477 unlock_buffer(bh);
1478}
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495int try_to_release_page(struct page *page, int gfp_mask)
1496{
1497 struct address_space * const mapping = page->mapping;
1498
1499 if (!PageLocked(page))
1500 BUG();
1501 if (PageWriteback(page))
1502 return 0;
1503
1504 if (mapping && mapping->a_ops->releasepage)
1505 return mapping->a_ops->releasepage(page, gfp_mask);
1506 return try_to_free_buffers(page);
1507}
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524int block_invalidatepage(struct page *page, unsigned long offset)
1525{
1526 struct buffer_head *head, *bh, *next;
1527 unsigned int curr_off = 0;
1528 int ret = 1;
1529
1530 BUG_ON(!PageLocked(page));
1531 if (!page_has_buffers(page))
1532 goto out;
1533
1534 head = page_buffers(page);
1535 bh = head;
1536 do {
1537 unsigned int next_off = curr_off + bh->b_size;
1538 next = bh->b_this_page;
1539
1540
1541
1542
1543 if (offset <= curr_off)
1544 discard_buffer(bh);
1545 curr_off = next_off;
1546 bh = next;
1547 } while (bh != head);
1548
1549
1550
1551
1552
1553
1554 if (offset == 0)
1555 ret = try_to_release_page(page, 0);
1556out:
1557 return ret;
1558}
1559EXPORT_SYMBOL(block_invalidatepage);
1560
1561
1562
1563
1564
1565
1566void create_empty_buffers(struct page *page,
1567 unsigned long blocksize, unsigned long b_state)
1568{
1569 struct buffer_head *bh, *head, *tail;
1570
1571 head = create_buffers(page, blocksize, 1);
1572 bh = head;
1573 do {
1574 bh->b_state |= b_state;
1575 tail = bh;
1576 bh = bh->b_this_page;
1577 } while (bh);
1578 tail->b_this_page = head;
1579
1580 spin_lock(&page->mapping->private_lock);
1581 if (PageUptodate(page) || PageDirty(page)) {
1582 bh = head;
1583 do {
1584 if (PageDirty(page))
1585 set_buffer_dirty(bh);
1586 if (PageUptodate(page))
1587 set_buffer_uptodate(bh);
1588 bh = bh->b_this_page;
1589 } while (bh != head);
1590 }
1591 __set_page_buffers(page, head);
1592 spin_unlock(&page->mapping->private_lock);
1593}
1594EXPORT_SYMBOL(create_empty_buffers);
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1613{
1614 struct buffer_head *old_bh;
1615
1616 old_bh = __find_get_block(bdev, block, 0);
1617 if (old_bh) {
1618#if 0
1619 if (buffer_dirty(old_bh))
1620 buffer_error();
1621#endif
1622 clear_buffer_dirty(old_bh);
1623 wait_on_buffer(old_bh);
1624 clear_buffer_req(old_bh);
1625 __brelse(old_bh);
1626 }
1627}
1628EXPORT_SYMBOL(unmap_underlying_metadata);
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655static int __block_write_full_page(struct inode *inode,
1656 struct page *page, get_block_t *get_block)
1657{
1658 int err;
1659 int ret = 0;
1660 unsigned long block;
1661 unsigned long last_block;
1662 struct buffer_head *bh, *head;
1663 int nr_underway = 0;
1664
1665 BUG_ON(!PageLocked(page));
1666
1667 last_block = (inode->i_size - 1) >> inode->i_blkbits;
1668
1669 if (!page_has_buffers(page)) {
1670 if (S_ISBLK(inode->i_mode))
1671 buffer_error();
1672 if (!PageUptodate(page))
1673 buffer_error();
1674 create_empty_buffers(page, 1 << inode->i_blkbits,
1675 (1 << BH_Dirty)|(1 << BH_Uptodate));
1676 }
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688 block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1689 head = page_buffers(page);
1690 bh = head;
1691
1692
1693
1694
1695
1696 do {
1697 if (block > last_block) {
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709 clear_buffer_dirty(bh);
1710 set_buffer_uptodate(bh);
1711 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1712 if (buffer_new(bh))
1713 buffer_error();
1714 err = get_block(inode, block, bh, 1);
1715 if (err)
1716 goto recover;
1717 if (buffer_new(bh)) {
1718
1719 clear_buffer_new(bh);
1720 unmap_underlying_metadata(bh->b_bdev,
1721 bh->b_blocknr);
1722 }
1723 }
1724 bh = bh->b_this_page;
1725 block++;
1726 } while (bh != head);
1727
1728 do {
1729 get_bh(bh);
1730 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1731 if (called_for_sync()) {
1732 lock_buffer(bh);
1733 } else {
1734 if (test_set_buffer_locked(bh)) {
1735 ret = -EAGAIN;
1736 continue;
1737 }
1738 }
1739 if (test_clear_buffer_dirty(bh)) {
1740 if (!buffer_uptodate(bh))
1741 buffer_error();
1742 mark_buffer_async_write(bh);
1743 } else {
1744 unlock_buffer(bh);
1745 }
1746 }
1747 } while ((bh = bh->b_this_page) != head);
1748
1749 BUG_ON(PageWriteback(page));
1750 SetPageWriteback(page);
1751 unlock_page(page);
1752
1753
1754
1755
1756
1757 do {
1758 struct buffer_head *next = bh->b_this_page;
1759 if (buffer_async_write(bh)) {
1760 submit_bh(WRITE, bh);
1761 nr_underway++;
1762 }
1763 put_bh(bh);
1764 bh = next;
1765 } while (bh != head);
1766
1767 err = 0;
1768done:
1769 if (nr_underway == 0) {
1770
1771
1772
1773
1774
1775 int uptodate = 1;
1776 do {
1777 if (!buffer_uptodate(bh)) {
1778 uptodate = 0;
1779 break;
1780 }
1781 bh = bh->b_this_page;
1782 } while (bh != head);
1783 if (uptodate)
1784 SetPageUptodate(page);
1785 end_page_writeback(page);
1786 }
1787 if (err == 0)
1788 return ret;
1789 return err;
1790
1791recover:
1792
1793
1794
1795
1796
1797
1798 ClearPageUptodate(page);
1799 bh = head;
1800
1801 do {
1802 get_bh(bh);
1803 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1804 lock_buffer(bh);
1805 mark_buffer_async_write(bh);
1806 } else {
1807
1808
1809
1810
1811 clear_buffer_dirty(bh);
1812 }
1813 } while ((bh = bh->b_this_page) != head);
1814 SetPageError(page);
1815 BUG_ON(PageWriteback(page));
1816 SetPageWriteback(page);
1817 unlock_page(page);
1818 do {
1819 struct buffer_head *next = bh->b_this_page;
1820 if (buffer_async_write(bh)) {
1821 clear_buffer_dirty(bh);
1822 submit_bh(WRITE, bh);
1823 nr_underway++;
1824 }
1825 put_bh(bh);
1826 bh = next;
1827 } while (bh != head);
1828 goto done;
1829}
1830
1831static int __block_prepare_write(struct inode *inode, struct page *page,
1832 unsigned from, unsigned to, get_block_t *get_block)
1833{
1834 unsigned block_start, block_end;
1835 unsigned long block;
1836 int err = 0;
1837 unsigned blocksize, bbits;
1838 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1839
1840 BUG_ON(!PageLocked(page));
1841 BUG_ON(from > PAGE_CACHE_SIZE);
1842 BUG_ON(to > PAGE_CACHE_SIZE);
1843 BUG_ON(from > to);
1844
1845 blocksize = 1 << inode->i_blkbits;
1846 if (!page_has_buffers(page))
1847 create_empty_buffers(page, blocksize, 0);
1848 head = page_buffers(page);
1849
1850 bbits = inode->i_blkbits;
1851 block = page->index << (PAGE_CACHE_SHIFT - bbits);
1852
1853 for(bh = head, block_start = 0; bh != head || !block_start;
1854 block++, block_start=block_end, bh = bh->b_this_page) {
1855 block_end = block_start + blocksize;
1856 if (block_end <= from || block_start >= to) {
1857 if (PageUptodate(page)) {
1858 if (!buffer_uptodate(bh))
1859 set_buffer_uptodate(bh);
1860 }
1861 continue;
1862 }
1863 if (buffer_new(bh))
1864 clear_buffer_new(bh);
1865 if (!buffer_mapped(bh)) {
1866 err = get_block(inode, block, bh, 1);
1867 if (err)
1868 goto out;
1869 if (buffer_new(bh)) {
1870 clear_buffer_new(bh);
1871 unmap_underlying_metadata(bh->b_bdev,
1872 bh->b_blocknr);
1873 if (PageUptodate(page)) {
1874 if (!buffer_mapped(bh))
1875 buffer_error();
1876 set_buffer_uptodate(bh);
1877 continue;
1878 }
1879 if (block_end > to || block_start < from) {
1880 void *kaddr;
1881
1882 kaddr = kmap_atomic(page, KM_USER0);
1883 if (block_end > to)
1884 memset(kaddr+to, 0,
1885 block_end-to);
1886 if (block_start < from)
1887 memset(kaddr+block_start,
1888 0, from-block_start);
1889 flush_dcache_page(page);
1890 kunmap_atomic(kaddr, KM_USER0);
1891 }
1892 continue;
1893 }
1894 }
1895 if (PageUptodate(page)) {
1896 if (!buffer_uptodate(bh))
1897 set_buffer_uptodate(bh);
1898 continue;
1899 }
1900 if (!buffer_uptodate(bh) &&
1901 (block_start < from || block_end > to)) {
1902 ll_rw_block(READ, 1, &bh);
1903 *wait_bh++=bh;
1904 }
1905 }
1906
1907
1908
1909 while(wait_bh > wait) {
1910 wait_on_buffer(*--wait_bh);
1911 if (!buffer_uptodate(*wait_bh))
1912 return -EIO;
1913 }
1914 return 0;
1915out:
1916
1917
1918
1919
1920
1921 bh = head;
1922 block_start = 0;
1923 do {
1924 block_end = block_start+blocksize;
1925 if (block_end <= from)
1926 goto next_bh;
1927 if (block_start >= to)
1928 break;
1929 if (buffer_new(bh)) {
1930 void *kaddr;
1931
1932 clear_buffer_new(bh);
1933 if (buffer_uptodate(bh))
1934 buffer_error();
1935 kaddr = kmap_atomic(page, KM_USER0);
1936 memset(kaddr+block_start, 0, bh->b_size);
1937 kunmap_atomic(kaddr, KM_USER0);
1938 set_buffer_uptodate(bh);
1939 mark_buffer_dirty(bh);
1940 }
1941next_bh:
1942 block_start = block_end;
1943 bh = bh->b_this_page;
1944 } while (bh != head);
1945 return err;
1946}
1947
1948static int __block_commit_write(struct inode *inode, struct page *page,
1949 unsigned from, unsigned to)
1950{
1951 unsigned block_start, block_end;
1952 int partial = 0;
1953 unsigned blocksize;
1954 struct buffer_head *bh, *head;
1955
1956 blocksize = 1 << inode->i_blkbits;
1957
1958 for(bh = head = page_buffers(page), block_start = 0;
1959 bh != head || !block_start;
1960 block_start=block_end, bh = bh->b_this_page) {
1961 block_end = block_start + blocksize;
1962 if (block_end <= from || block_start >= to) {
1963 if (!buffer_uptodate(bh))
1964 partial = 1;
1965 } else {
1966 set_buffer_uptodate(bh);
1967 mark_buffer_dirty(bh);
1968 }
1969 }
1970
1971
1972
1973
1974
1975
1976
1977 if (!partial)
1978 SetPageUptodate(page);
1979 return 0;
1980}
1981
1982
1983
1984
1985
1986
1987
1988
1989int block_read_full_page(struct page *page, get_block_t *get_block)
1990{
1991 struct inode *inode = page->mapping->host;
1992 unsigned long iblock, lblock;
1993 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
1994 unsigned int blocksize, blocks;
1995 int nr, i;
1996
1997 if (!PageLocked(page))
1998 PAGE_BUG(page);
1999 if (PageUptodate(page))
2000 buffer_error();
2001 blocksize = 1 << inode->i_blkbits;
2002 if (!page_has_buffers(page))
2003 create_empty_buffers(page, blocksize, 0);
2004 head = page_buffers(page);
2005
2006 blocks = PAGE_CACHE_SIZE >> inode->i_blkbits;
2007 iblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2008 lblock = (inode->i_size+blocksize-1) >> inode->i_blkbits;
2009 bh = head;
2010 nr = 0;
2011 i = 0;
2012
2013 do {
2014 if (buffer_uptodate(bh))
2015 continue;
2016
2017 if (!buffer_mapped(bh)) {
2018 if (iblock < lblock) {
2019 if (get_block(inode, iblock, bh, 0))
2020 SetPageError(page);
2021 }
2022 if (!buffer_mapped(bh)) {
2023 void *kaddr = kmap_atomic(page, KM_USER0);
2024 memset(kaddr + i * blocksize, 0, blocksize);
2025 flush_dcache_page(page);
2026 kunmap_atomic(kaddr, KM_USER0);
2027 set_buffer_uptodate(bh);
2028 continue;
2029 }
2030
2031
2032
2033
2034 if (buffer_uptodate(bh))
2035 continue;
2036 }
2037 arr[nr++] = bh;
2038 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2039
2040 if (!nr) {
2041
2042
2043
2044
2045 if (!PageError(page))
2046 SetPageUptodate(page);
2047 unlock_page(page);
2048 return 0;
2049 }
2050
2051
2052 for (i = 0; i < nr; i++) {
2053 bh = arr[i];
2054 lock_buffer(bh);
2055 mark_buffer_async_read(bh);
2056 }
2057
2058
2059
2060
2061
2062
2063 for (i = 0; i < nr; i++) {
2064 bh = arr[i];
2065 if (buffer_uptodate(bh))
2066 end_buffer_async_read(bh, 1);
2067 else
2068 submit_bh(READ, bh);
2069 }
2070 return 0;
2071}
2072
2073
2074
2075
2076
2077int generic_cont_expand(struct inode *inode, loff_t size)
2078{
2079 struct address_space *mapping = inode->i_mapping;
2080 struct page *page;
2081 unsigned long index, offset, limit;
2082 int err;
2083
2084 err = -EFBIG;
2085 limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
2086 if (limit != RLIM_INFINITY && size > (loff_t)limit) {
2087 send_sig(SIGXFSZ, current, 0);
2088 goto out;
2089 }
2090 if (size > inode->i_sb->s_maxbytes)
2091 goto out;
2092
2093 offset = (size & (PAGE_CACHE_SIZE-1));
2094
2095
2096
2097
2098
2099 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
2100 offset++;
2101 }
2102 index = size >> PAGE_CACHE_SHIFT;
2103 err = -ENOMEM;
2104 page = grab_cache_page(mapping, index);
2105 if (!page)
2106 goto out;
2107 err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
2108 if (!err) {
2109 err = mapping->a_ops->commit_write(NULL, page, offset, offset);
2110 }
2111 unlock_page(page);
2112 page_cache_release(page);
2113 if (err > 0)
2114 err = 0;
2115out:
2116 return err;
2117}
2118
2119
2120
2121
2122
2123
2124int cont_prepare_write(struct page *page, unsigned offset,
2125 unsigned to, get_block_t *get_block, loff_t *bytes)
2126{
2127 struct address_space *mapping = page->mapping;
2128 struct inode *inode = mapping->host;
2129 struct page *new_page;
2130 unsigned long pgpos;
2131 long status;
2132 unsigned zerofrom;
2133 unsigned blocksize = 1 << inode->i_blkbits;
2134 void *kaddr;
2135
2136 while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
2137 status = -ENOMEM;
2138 new_page = grab_cache_page(mapping, pgpos);
2139 if (!new_page)
2140 goto out;
2141
2142 if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
2143 unlock_page(new_page);
2144 page_cache_release(new_page);
2145 continue;
2146 }
2147 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2148 if (zerofrom & (blocksize-1)) {
2149 *bytes |= (blocksize-1);
2150 (*bytes)++;
2151 }
2152 status = __block_prepare_write(inode, new_page, zerofrom,
2153 PAGE_CACHE_SIZE, get_block);
2154 if (status)
2155 goto out_unmap;
2156 kaddr = kmap_atomic(new_page, KM_USER0);
2157 memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
2158 flush_dcache_page(new_page);
2159 kunmap_atomic(kaddr, KM_USER0);
2160 __block_commit_write(inode, new_page,
2161 zerofrom, PAGE_CACHE_SIZE);
2162 unlock_page(new_page);
2163 page_cache_release(new_page);
2164 }
2165
2166 if (page->index < pgpos) {
2167
2168 zerofrom = offset;
2169 } else {
2170
2171 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2172
2173
2174 if (to > zerofrom && (zerofrom & (blocksize-1))) {
2175 *bytes |= (blocksize-1);
2176 (*bytes)++;
2177 }
2178
2179
2180 if (offset <= zerofrom)
2181 zerofrom = offset;
2182 }
2183 status = __block_prepare_write(inode, page, zerofrom, to, get_block);
2184 if (status)
2185 goto out1;
2186 if (zerofrom < offset) {
2187 kaddr = kmap_atomic(page, KM_USER0);
2188 memset(kaddr+zerofrom, 0, offset-zerofrom);
2189 flush_dcache_page(page);
2190 kunmap_atomic(kaddr, KM_USER0);
2191 __block_commit_write(inode, page, zerofrom, offset);
2192 }
2193 return 0;
2194out1:
2195 ClearPageUptodate(page);
2196 return status;
2197
2198out_unmap:
2199 ClearPageUptodate(new_page);
2200 unlock_page(new_page);
2201 page_cache_release(new_page);
2202out:
2203 return status;
2204}
2205
2206int block_prepare_write(struct page *page, unsigned from, unsigned to,
2207 get_block_t *get_block)
2208{
2209 struct inode *inode = page->mapping->host;
2210 int err = __block_prepare_write(inode, page, from, to, get_block);
2211 if (err)
2212 ClearPageUptodate(page);
2213 return err;
2214}
2215
2216int block_commit_write(struct page *page, unsigned from, unsigned to)
2217{
2218 struct inode *inode = page->mapping->host;
2219 __block_commit_write(inode,page,from,to);
2220 return 0;
2221}
2222
2223int generic_commit_write(struct file *file, struct page *page,
2224 unsigned from, unsigned to)
2225{
2226 struct inode *inode = page->mapping->host;
2227 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2228 __block_commit_write(inode,page,from,to);
2229 if (pos > inode->i_size) {
2230 inode->i_size = pos;
2231 mark_inode_dirty(inode);
2232 }
2233 return 0;
2234}
2235
2236int block_truncate_page(struct address_space *mapping,
2237 loff_t from, get_block_t *get_block)
2238{
2239 unsigned long index = from >> PAGE_CACHE_SHIFT;
2240 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2241 unsigned blocksize, iblock, length, pos;
2242 struct inode *inode = mapping->host;
2243 struct page *page;
2244 struct buffer_head *bh;
2245 void *kaddr;
2246 int err;
2247
2248 blocksize = 1 << inode->i_blkbits;
2249 length = offset & (blocksize - 1);
2250
2251
2252 if (!length)
2253 return 0;
2254
2255 length = blocksize - length;
2256 iblock = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2257
2258 page = grab_cache_page(mapping, index);
2259 err = -ENOMEM;
2260 if (!page)
2261 goto out;
2262
2263 if (!page_has_buffers(page))
2264 create_empty_buffers(page, blocksize, 0);
2265
2266
2267 bh = page_buffers(page);
2268 pos = blocksize;
2269 while (offset >= pos) {
2270 bh = bh->b_this_page;
2271 iblock++;
2272 pos += blocksize;
2273 }
2274
2275 err = 0;
2276 if (!buffer_mapped(bh)) {
2277 err = get_block(inode, iblock, bh, 0);
2278 if (err)
2279 goto unlock;
2280
2281 if (!buffer_mapped(bh))
2282 goto unlock;
2283 }
2284
2285
2286 if (PageUptodate(page))
2287 set_buffer_uptodate(bh);
2288
2289 if (!buffer_uptodate(bh)) {
2290 err = -EIO;
2291 ll_rw_block(READ, 1, &bh);
2292 wait_on_buffer(bh);
2293
2294 if (!buffer_uptodate(bh))
2295 goto unlock;
2296 }
2297
2298 kaddr = kmap_atomic(page, KM_USER0);
2299 memset(kaddr + offset, 0, length);
2300 flush_dcache_page(page);
2301 kunmap_atomic(kaddr, KM_USER0);
2302
2303 mark_buffer_dirty(bh);
2304 err = 0;
2305
2306unlock:
2307 unlock_page(page);
2308 page_cache_release(page);
2309out:
2310 return err;
2311}
2312
2313
2314
2315
2316int block_write_full_page(struct page *page, get_block_t *get_block)
2317{
2318 struct inode * const inode = page->mapping->host;
2319 const unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
2320 unsigned offset;
2321 void *kaddr;
2322
2323
2324 if (page->index < end_index)
2325 return __block_write_full_page(inode, page, get_block);
2326
2327
2328 offset = inode->i_size & (PAGE_CACHE_SIZE-1);
2329 if (page->index >= end_index+1 || !offset) {
2330 unlock_page(page);
2331 return -EIO;
2332 }
2333
2334
2335
2336
2337
2338
2339
2340
2341 kaddr = kmap_atomic(page, KM_USER0);
2342 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
2343 flush_dcache_page(page);
2344 kunmap_atomic(kaddr, KM_USER0);
2345 return __block_write_full_page(inode, page, get_block);
2346}
2347
2348sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2349 get_block_t *get_block)
2350{
2351 struct buffer_head tmp;
2352 struct inode *inode = mapping->host;
2353 tmp.b_state = 0;
2354 tmp.b_blocknr = 0;
2355 get_block(inode, block, &tmp, 0);
2356 return tmp.b_blocknr;
2357}
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
2370 struct block_device *bdev, sector_t b[], int size)
2371{
2372 int transferred;
2373 int i;
2374 int err;
2375 struct kiobuf * iobuf;
2376
2377 if (!nr)
2378 return 0;
2379
2380
2381
2382
2383 for (i = 0; i < nr; i++) {
2384 iobuf = iovec[i];
2385 if ((iobuf->offset & (size-1)) || (iobuf->length & (size-1)))
2386 return -EINVAL;
2387 if (!iobuf->nr_pages)
2388 panic("brw_kiovec: iobuf not initialised");
2389 }
2390
2391
2392
2393
2394 for (i = 0; i < nr; i++) {
2395 iobuf = iovec[i];
2396 iobuf->errno = 0;
2397
2398 ll_rw_kio(rw, iobuf, bdev, b[i] * (size >> 9));
2399 }
2400
2401
2402
2403
2404 transferred = 0;
2405 err = 0;
2406 for (i = 0; i < nr; i++) {
2407 iobuf = iovec[i];
2408 kiobuf_wait_for_io(iobuf);
2409 if (iobuf->errno && !err)
2410 err = iobuf->errno;
2411 if (!err)
2412 transferred += iobuf->length;
2413 }
2414
2415 return err ? err : transferred;
2416}
2417
2418static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err)
2419{
2420 struct buffer_head *bh = bio->bi_private;
2421
2422 if (bio->bi_size)
2423 return 1;
2424
2425 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2426 bio_put(bio);
2427 return 0;
2428}
2429
2430int submit_bh(int rw, struct buffer_head * bh)
2431{
2432 struct bio *bio;
2433
2434 BUG_ON(!buffer_locked(bh));
2435 BUG_ON(!buffer_mapped(bh));
2436 BUG_ON(!bh->b_end_io);
2437
2438 if ((rw == READ || rw == READA) && buffer_uptodate(bh))
2439 buffer_error();
2440 if (rw == WRITE && !buffer_uptodate(bh))
2441 buffer_error();
2442 if (rw == READ && buffer_dirty(bh))
2443 buffer_error();
2444
2445 set_buffer_req(bh);
2446
2447
2448
2449
2450
2451 bio = bio_alloc(GFP_NOIO, 1);
2452
2453 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
2454 bio->bi_bdev = bh->b_bdev;
2455 bio->bi_io_vec[0].bv_page = bh->b_page;
2456 bio->bi_io_vec[0].bv_len = bh->b_size;
2457 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
2458
2459 bio->bi_vcnt = 1;
2460 bio->bi_idx = 0;
2461 bio->bi_size = bh->b_size;
2462
2463 bio->bi_end_io = end_bio_bh_io_sync;
2464 bio->bi_private = bh;
2465
2466 return submit_bio(rw, bio);
2467}
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
2495{
2496 unsigned int major;
2497 int correct_size;
2498 int i;
2499
2500 if (!nr)
2501 return;
2502
2503 major = major(to_kdev_t(bhs[0]->b_bdev->bd_dev));
2504
2505
2506 correct_size = bdev_hardsect_size(bhs[0]->b_bdev);
2507
2508
2509 for (i = 0; i < nr; i++) {
2510 struct buffer_head *bh = bhs[i];
2511 if (bh->b_size & (correct_size - 1)) {
2512 printk(KERN_NOTICE "ll_rw_block: device %s: "
2513 "only %d-char blocks implemented (%u)\n",
2514 bdevname(bhs[0]->b_bdev),
2515 correct_size, bh->b_size);
2516 goto sorry;
2517 }
2518 }
2519
2520 if ((rw & WRITE) && bdev_read_only(bhs[0]->b_bdev)) {
2521 printk(KERN_NOTICE "Can't write to read-only device %s\n",
2522 bdevname(bhs[0]->b_bdev));
2523 goto sorry;
2524 }
2525
2526 for (i = 0; i < nr; i++) {
2527 struct buffer_head *bh = bhs[i];
2528
2529
2530 if (test_set_buffer_locked(bh))
2531 continue;
2532
2533
2534 atomic_inc(&bh->b_count);
2535 bh->b_end_io = end_buffer_io_sync;
2536
2537 switch(rw) {
2538 case WRITE:
2539 if (!test_clear_buffer_dirty(bh))
2540
2541 goto end_io;
2542 break;
2543
2544 case READA:
2545 case READ:
2546 if (buffer_uptodate(bh))
2547
2548 goto end_io;
2549 break;
2550 default:
2551 BUG();
2552 end_io:
2553 bh->b_end_io(bh, buffer_uptodate(bh));
2554 continue;
2555 }
2556
2557 submit_bh(rw, bh);
2558 }
2559 return;
2560
2561sorry:
2562
2563 for (i = 0; i < nr; i++)
2564 clear_buffer_dirty(bhs[i]);
2565}
2566
2567
2568
2569
2570static void check_ttfb_buffer(struct page *page, struct buffer_head *bh)
2571{
2572 if (!buffer_uptodate(bh)) {
2573 if (PageUptodate(page) && page->mapping
2574 && buffer_mapped(bh)
2575 && S_ISBLK(page->mapping->host->i_mode))
2576 {
2577 buffer_error();
2578 }
2579 }
2580}
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602static inline int buffer_busy(struct buffer_head *bh)
2603{
2604 return atomic_read(&bh->b_count) |
2605 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
2606}
2607
2608static inline int
2609drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
2610{
2611 struct buffer_head *head = page_buffers(page);
2612 struct buffer_head *bh;
2613 int was_uptodate = 1;
2614
2615 bh = head;
2616 do {
2617 check_ttfb_buffer(page, bh);
2618 if (buffer_busy(bh))
2619 goto failed;
2620 if (!buffer_uptodate(bh))
2621 was_uptodate = 0;
2622 bh = bh->b_this_page;
2623 } while (bh != head);
2624
2625 if (!was_uptodate && PageUptodate(page))
2626 buffer_error();
2627
2628 do {
2629 struct buffer_head *next = bh->b_this_page;
2630
2631 if (!list_empty(&bh->b_assoc_buffers))
2632 __remove_assoc_queue(bh);
2633 bh = next;
2634 } while (bh != head);
2635 *buffers_to_free = head;
2636 __clear_page_buffers(page);
2637 return 1;
2638failed:
2639 return 0;
2640}
2641
2642int try_to_free_buffers(struct page *page)
2643{
2644 struct address_space * const mapping = page->mapping;
2645 struct buffer_head *buffers_to_free = NULL;
2646 int ret = 0;
2647
2648 BUG_ON(!PageLocked(page));
2649 if (PageWriteback(page))
2650 return 0;
2651
2652 if (mapping == NULL) {
2653 ret = drop_buffers(page, &buffers_to_free);
2654 goto out;
2655 }
2656
2657 spin_lock(&mapping->private_lock);
2658 ret = drop_buffers(page, &buffers_to_free);
2659 if (ret && !PageSwapCache(page)) {
2660
2661
2662
2663
2664
2665
2666
2667
2668 clear_page_dirty(page);
2669 }
2670 spin_unlock(&mapping->private_lock);
2671out:
2672 if (buffers_to_free) {
2673 struct buffer_head *bh = buffers_to_free;
2674
2675 do {
2676 struct buffer_head *next = bh->b_this_page;
2677 free_buffer_head(bh);
2678 bh = next;
2679 } while (bh != buffers_to_free);
2680 }
2681 return ret;
2682}
2683EXPORT_SYMBOL(try_to_free_buffers);
2684
2685int block_sync_page(struct page *page)
2686{
2687 blk_run_queues();
2688 return 0;
2689}
2690
2691
2692
2693
2694
2695asmlinkage long sys_bdflush(int func, long data)
2696{
2697 if (!capable(CAP_SYS_ADMIN))
2698 return -EPERM;
2699 if (func == 1)
2700 do_exit(0);
2701 return 0;
2702}
2703
2704
2705
2706
2707static kmem_cache_t *bh_cachep;
2708static mempool_t *bh_mempool;
2709
2710
2711
2712
2713
2714static int max_buffer_heads;
2715
2716int buffer_heads_over_limit;
2717
2718struct bh_accounting {
2719 int nr;
2720 int ratelimit;
2721};
2722
2723static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
2724
2725static void recalc_bh_state(void)
2726{
2727 int i;
2728 int tot = 0;
2729
2730 if (__get_cpu_var(bh_accounting).ratelimit++ < 4096)
2731 return;
2732 __get_cpu_var(bh_accounting).ratelimit = 0;
2733 for (i = 0; i < NR_CPUS; i++)
2734 tot += per_cpu(bh_accounting, i).nr;
2735 buffer_heads_over_limit = (tot > max_buffer_heads);
2736}
2737
2738struct buffer_head *alloc_buffer_head(void)
2739{
2740 struct buffer_head *ret = mempool_alloc(bh_mempool, GFP_NOFS);
2741 if (ret) {
2742 preempt_disable();
2743 __get_cpu_var(bh_accounting).nr++;
2744 recalc_bh_state();
2745 preempt_enable();
2746 }
2747 return ret;
2748}
2749EXPORT_SYMBOL(alloc_buffer_head);
2750
2751void free_buffer_head(struct buffer_head *bh)
2752{
2753 BUG_ON(!list_empty(&bh->b_assoc_buffers));
2754 mempool_free(bh, bh_mempool);
2755 preempt_disable();
2756 __get_cpu_var(bh_accounting).nr--;
2757 recalc_bh_state();
2758 preempt_enable();
2759}
2760EXPORT_SYMBOL(free_buffer_head);
2761
2762static void init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long flags)
2763{
2764 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
2765 SLAB_CTOR_CONSTRUCTOR) {
2766 struct buffer_head * bh = (struct buffer_head *)data;
2767
2768 memset(bh, 0, sizeof(*bh));
2769 INIT_LIST_HEAD(&bh->b_assoc_buffers);
2770 }
2771}
2772
2773static void *bh_mempool_alloc(int gfp_mask, void *pool_data)
2774{
2775 return kmem_cache_alloc(bh_cachep, gfp_mask);
2776}
2777
2778static void bh_mempool_free(void *element, void *pool_data)
2779{
2780 return kmem_cache_free(bh_cachep, element);
2781}
2782
2783#define NR_RESERVED (10*MAX_BUF_PER_PAGE)
2784#define MAX_UNUSED_BUFFERS NR_RESERVED+20
2785
2786void __init buffer_init(void)
2787{
2788 int i;
2789 int nrpages;
2790
2791 bh_cachep = kmem_cache_create("buffer_head",
2792 sizeof(struct buffer_head), 0,
2793 0, init_buffer_head, NULL);
2794 bh_mempool = mempool_create(MAX_UNUSED_BUFFERS, bh_mempool_alloc,
2795 bh_mempool_free, NULL);
2796 for (i = 0; i < ARRAY_SIZE(bh_wait_queue_heads); i++)
2797 init_waitqueue_head(&bh_wait_queue_heads[i].wqh);
2798
2799
2800
2801
2802 nrpages = (nr_free_buffer_pages() * 10) / 100;
2803 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
2804}
2805