1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/spinlock.h>
16#include <linux/fs.h>
17#include <linux/mm.h>
18#include <linux/swap.h>
19#include <linux/slab.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/init.h>
23#include <linux/backing-dev.h>
24#include <linux/task_io_accounting_ops.h>
25#include <linux/blkdev.h>
26#include <linux/mpage.h>
27#include <linux/rmap.h>
28#include <linux/percpu.h>
29#include <linux/notifier.h>
30#include <linux/smp.h>
31#include <linux/sysctl.h>
32#include <linux/cpu.h>
33#include <linux/syscalls.h>
34#include <linux/buffer_head.h>
35#include <linux/pagevec.h>
36
37
38
39
40
41
42
43
44#define MAX_WRITEBACK_PAGES 1024
45
46
47
48
49
50static long ratelimit_pages = 32;
51
52static int dirty_exceeded __cacheline_aligned_in_smp;
53
54
55
56
57
58
59
60static inline long sync_writeback_pages(void)
61{
62 return ratelimit_pages + ratelimit_pages / 2;
63}
64
65
66
67
68
69
70int dirty_background_ratio = 5;
71
72
73
74
75int vm_dirty_ratio = 10;
76
77
78
79
80int dirty_writeback_interval = 5 * HZ;
81
82
83
84
85int dirty_expire_interval = 30 * HZ;
86
87
88
89
90int block_dump;
91
92
93
94
95
96int laptop_mode;
97
98EXPORT_SYMBOL(laptop_mode);
99
100
101
102
103static void background_writeout(unsigned long _min_pages);
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123static unsigned long highmem_dirtyable_memory(unsigned long total)
124{
125#ifdef CONFIG_HIGHMEM
126 int node;
127 unsigned long x = 0;
128
129 for_each_online_node(node) {
130 struct zone *z =
131 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
132
133 x += zone_page_state(z, NR_FREE_PAGES)
134 + zone_page_state(z, NR_INACTIVE)
135 + zone_page_state(z, NR_ACTIVE);
136 }
137
138
139
140
141
142
143 return min(x, total);
144#else
145 return 0;
146#endif
147}
148
149static unsigned long determine_dirtyable_memory(void)
150{
151 unsigned long x;
152
153 x = global_page_state(NR_FREE_PAGES)
154 + global_page_state(NR_INACTIVE)
155 + global_page_state(NR_ACTIVE);
156 x -= highmem_dirtyable_memory(x);
157 return x + 1;
158}
159
160static void
161get_dirty_limits(long *pbackground, long *pdirty,
162 struct address_space *mapping)
163{
164 int background_ratio;
165 int dirty_ratio;
166 int unmapped_ratio;
167 long background;
168 long dirty;
169 unsigned long available_memory = determine_dirtyable_memory();
170 struct task_struct *tsk;
171
172 unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) +
173 global_page_state(NR_ANON_PAGES)) * 100) /
174 available_memory;
175
176 dirty_ratio = vm_dirty_ratio;
177 if (dirty_ratio > unmapped_ratio / 2)
178 dirty_ratio = unmapped_ratio / 2;
179
180 if (dirty_ratio < 5)
181 dirty_ratio = 5;
182
183 background_ratio = dirty_background_ratio;
184 if (background_ratio >= dirty_ratio)
185 background_ratio = dirty_ratio / 2;
186
187 background = (background_ratio * available_memory) / 100;
188 dirty = (dirty_ratio * available_memory) / 100;
189 tsk = current;
190 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
191 background += background / 4;
192 dirty += dirty / 4;
193 }
194 *pbackground = background;
195 *pdirty = dirty;
196}
197
198
199
200
201
202
203
204
205static void balance_dirty_pages(struct address_space *mapping)
206{
207 long nr_reclaimable;
208 long background_thresh;
209 long dirty_thresh;
210 unsigned long pages_written = 0;
211 unsigned long write_chunk = sync_writeback_pages();
212
213 struct backing_dev_info *bdi = mapping->backing_dev_info;
214
215 for (;;) {
216 struct writeback_control wbc = {
217 .bdi = bdi,
218 .sync_mode = WB_SYNC_NONE,
219 .older_than_this = NULL,
220 .nr_to_write = write_chunk,
221 .range_cyclic = 1,
222 };
223
224 get_dirty_limits(&background_thresh, &dirty_thresh, mapping);
225 nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
226 global_page_state(NR_UNSTABLE_NFS);
227 if (nr_reclaimable + global_page_state(NR_WRITEBACK) <=
228 dirty_thresh)
229 break;
230
231 if (!dirty_exceeded)
232 dirty_exceeded = 1;
233
234
235
236
237
238
239
240 if (nr_reclaimable) {
241 writeback_inodes(&wbc);
242 get_dirty_limits(&background_thresh,
243 &dirty_thresh, mapping);
244 nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
245 global_page_state(NR_UNSTABLE_NFS);
246 if (nr_reclaimable +
247 global_page_state(NR_WRITEBACK)
248 <= dirty_thresh)
249 break;
250 pages_written += write_chunk - wbc.nr_to_write;
251 if (pages_written >= write_chunk)
252 break;
253 }
254 congestion_wait(WRITE, HZ/10);
255 }
256
257 if (nr_reclaimable + global_page_state(NR_WRITEBACK)
258 <= dirty_thresh && dirty_exceeded)
259 dirty_exceeded = 0;
260
261 if (writeback_in_progress(bdi))
262 return;
263
264
265
266
267
268
269
270
271
272 if ((laptop_mode && pages_written) ||
273 (!laptop_mode && (nr_reclaimable > background_thresh)))
274 pdflush_operation(background_writeout, 0);
275}
276
277void set_page_dirty_balance(struct page *page)
278{
279 if (set_page_dirty(page)) {
280 struct address_space *mapping = page_mapping(page);
281
282 if (mapping)
283 balance_dirty_pages_ratelimited(mapping);
284 }
285}
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
302 unsigned long nr_pages_dirtied)
303{
304 static DEFINE_PER_CPU(unsigned long, ratelimits) = 0;
305 unsigned long ratelimit;
306 unsigned long *p;
307
308 ratelimit = ratelimit_pages;
309 if (dirty_exceeded)
310 ratelimit = 8;
311
312
313
314
315
316 preempt_disable();
317 p = &__get_cpu_var(ratelimits);
318 *p += nr_pages_dirtied;
319 if (unlikely(*p >= ratelimit)) {
320 *p = 0;
321 preempt_enable();
322 balance_dirty_pages(mapping);
323 return;
324 }
325 preempt_enable();
326}
327EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
328
329void throttle_vm_writeout(gfp_t gfp_mask)
330{
331 long background_thresh;
332 long dirty_thresh;
333
334 if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) {
335
336
337
338
339
340 congestion_wait(WRITE, HZ/10);
341 return;
342 }
343
344 for ( ; ; ) {
345 get_dirty_limits(&background_thresh, &dirty_thresh, NULL);
346
347
348
349
350
351 dirty_thresh += dirty_thresh / 10;
352
353 if (global_page_state(NR_UNSTABLE_NFS) +
354 global_page_state(NR_WRITEBACK) <= dirty_thresh)
355 break;
356 congestion_wait(WRITE, HZ/10);
357 }
358}
359
360
361
362
363
364static void background_writeout(unsigned long _min_pages)
365{
366 long min_pages = _min_pages;
367 struct writeback_control wbc = {
368 .bdi = NULL,
369 .sync_mode = WB_SYNC_NONE,
370 .older_than_this = NULL,
371 .nr_to_write = 0,
372 .nonblocking = 1,
373 .range_cyclic = 1,
374 };
375
376 for ( ; ; ) {
377 long background_thresh;
378 long dirty_thresh;
379
380 get_dirty_limits(&background_thresh, &dirty_thresh, NULL);
381 if (global_page_state(NR_FILE_DIRTY) +
382 global_page_state(NR_UNSTABLE_NFS) < background_thresh
383 && min_pages <= 0)
384 break;
385 wbc.encountered_congestion = 0;
386 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
387 wbc.pages_skipped = 0;
388 writeback_inodes(&wbc);
389 min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
390 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
391
392 congestion_wait(WRITE, HZ/10);
393 if (!wbc.encountered_congestion)
394 break;
395 }
396 }
397}
398
399
400
401
402
403
404int wakeup_pdflush(long nr_pages)
405{
406 if (nr_pages == 0)
407 nr_pages = global_page_state(NR_FILE_DIRTY) +
408 global_page_state(NR_UNSTABLE_NFS);
409 return pdflush_operation(background_writeout, nr_pages);
410}
411
412static void wb_timer_fn(unsigned long unused);
413static void laptop_timer_fn(unsigned long unused);
414
415static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0);
416static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433static void wb_kupdate(unsigned long arg)
434{
435 unsigned long oldest_jif;
436 unsigned long start_jif;
437 unsigned long next_jif;
438 long nr_to_write;
439 struct writeback_control wbc = {
440 .bdi = NULL,
441 .sync_mode = WB_SYNC_NONE,
442 .older_than_this = &oldest_jif,
443 .nr_to_write = 0,
444 .nonblocking = 1,
445 .for_kupdate = 1,
446 .range_cyclic = 1,
447 };
448
449 sync_supers();
450
451 oldest_jif = jiffies - dirty_expire_interval;
452 start_jif = jiffies;
453 next_jif = start_jif + dirty_writeback_interval;
454 nr_to_write = global_page_state(NR_FILE_DIRTY) +
455 global_page_state(NR_UNSTABLE_NFS) +
456 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
457 while (nr_to_write > 0) {
458 wbc.encountered_congestion = 0;
459 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
460 writeback_inodes(&wbc);
461 if (wbc.nr_to_write > 0) {
462 if (wbc.encountered_congestion)
463 congestion_wait(WRITE, HZ/10);
464 else
465 break;
466 }
467 nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
468 }
469 if (time_before(next_jif, jiffies + HZ))
470 next_jif = jiffies + HZ;
471 if (dirty_writeback_interval)
472 mod_timer(&wb_timer, next_jif);
473}
474
475
476
477
478int dirty_writeback_centisecs_handler(ctl_table *table, int write,
479 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
480{
481 proc_dointvec_userhz_jiffies(table, write, file, buffer, length, ppos);
482 if (dirty_writeback_interval) {
483 mod_timer(&wb_timer,
484 jiffies + dirty_writeback_interval);
485 } else {
486 del_timer(&wb_timer);
487 }
488 return 0;
489}
490
491static void wb_timer_fn(unsigned long unused)
492{
493 if (pdflush_operation(wb_kupdate, 0) < 0)
494 mod_timer(&wb_timer, jiffies + HZ);
495}
496
497static void laptop_flush(unsigned long unused)
498{
499 sys_sync();
500}
501
502static void laptop_timer_fn(unsigned long unused)
503{
504 pdflush_operation(laptop_flush, 0);
505}
506
507
508
509
510
511
512void laptop_io_completion(void)
513{
514 mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode);
515}
516
517
518
519
520
521
522void laptop_sync_completion(void)
523{
524 del_timer(&laptop_mode_wb_timer);
525}
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544void writeback_set_ratelimit(void)
545{
546 ratelimit_pages = vm_total_pages / (num_online_cpus() * 32);
547 if (ratelimit_pages < 16)
548 ratelimit_pages = 16;
549 if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024)
550 ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE;
551}
552
553static int __cpuinit
554ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
555{
556 writeback_set_ratelimit();
557 return NOTIFY_DONE;
558}
559
560static struct notifier_block __cpuinitdata ratelimit_nb = {
561 .notifier_call = ratelimit_handler,
562 .next = NULL,
563};
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583void __init page_writeback_init(void)
584{
585 mod_timer(&wb_timer, jiffies + dirty_writeback_interval);
586 writeback_set_ratelimit();
587 register_cpu_notifier(&ratelimit_nb);
588}
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605int write_cache_pages(struct address_space *mapping,
606 struct writeback_control *wbc, writepage_t writepage,
607 void *data)
608{
609 struct backing_dev_info *bdi = mapping->backing_dev_info;
610 int ret = 0;
611 int done = 0;
612 struct pagevec pvec;
613 int nr_pages;
614 pgoff_t index;
615 pgoff_t end;
616 int scanned = 0;
617 int range_whole = 0;
618
619 if (wbc->nonblocking && bdi_write_congested(bdi)) {
620 wbc->encountered_congestion = 1;
621 return 0;
622 }
623
624 pagevec_init(&pvec, 0);
625 if (wbc->range_cyclic) {
626 index = mapping->writeback_index;
627 end = -1;
628 } else {
629 index = wbc->range_start >> PAGE_CACHE_SHIFT;
630 end = wbc->range_end >> PAGE_CACHE_SHIFT;
631 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
632 range_whole = 1;
633 scanned = 1;
634 }
635retry:
636 while (!done && (index <= end) &&
637 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
638 PAGECACHE_TAG_DIRTY,
639 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
640 unsigned i;
641
642 scanned = 1;
643 for (i = 0; i < nr_pages; i++) {
644 struct page *page = pvec.pages[i];
645
646
647
648
649
650
651
652
653 lock_page(page);
654
655 if (unlikely(page->mapping != mapping)) {
656 unlock_page(page);
657 continue;
658 }
659
660 if (!wbc->range_cyclic && page->index > end) {
661 done = 1;
662 unlock_page(page);
663 continue;
664 }
665
666 if (wbc->sync_mode != WB_SYNC_NONE)
667 wait_on_page_writeback(page);
668
669 if (PageWriteback(page) ||
670 !clear_page_dirty_for_io(page)) {
671 unlock_page(page);
672 continue;
673 }
674
675 ret = (*writepage)(page, wbc, data);
676
677 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
678 unlock_page(page);
679 ret = 0;
680 }
681 if (ret || (--(wbc->nr_to_write) <= 0))
682 done = 1;
683 if (wbc->nonblocking && bdi_write_congested(bdi)) {
684 wbc->encountered_congestion = 1;
685 done = 1;
686 }
687 }
688 pagevec_release(&pvec);
689 cond_resched();
690 }
691 if (!scanned && !done) {
692
693
694
695
696 scanned = 1;
697 index = 0;
698 goto retry;
699 }
700 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
701 mapping->writeback_index = index;
702 return ret;
703}
704EXPORT_SYMBOL(write_cache_pages);
705
706
707
708
709
710static int __writepage(struct page *page, struct writeback_control *wbc,
711 void *data)
712{
713 struct address_space *mapping = data;
714 int ret = mapping->a_ops->writepage(page, wbc);
715 mapping_set_error(mapping, ret);
716 return ret;
717}
718
719
720
721
722
723
724
725
726
727int generic_writepages(struct address_space *mapping,
728 struct writeback_control *wbc)
729{
730
731 if (!mapping->a_ops->writepage)
732 return 0;
733
734 return write_cache_pages(mapping, wbc, __writepage, mapping);
735}
736
737EXPORT_SYMBOL(generic_writepages);
738
739int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
740{
741 int ret;
742
743 if (wbc->nr_to_write <= 0)
744 return 0;
745 wbc->for_writepages = 1;
746 if (mapping->a_ops->writepages)
747 ret = mapping->a_ops->writepages(mapping, wbc);
748 else
749 ret = generic_writepages(mapping, wbc);
750 wbc->for_writepages = 0;
751 return ret;
752}
753
754
755
756
757
758
759
760
761
762
763int write_one_page(struct page *page, int wait)
764{
765 struct address_space *mapping = page->mapping;
766 int ret = 0;
767 struct writeback_control wbc = {
768 .sync_mode = WB_SYNC_ALL,
769 .nr_to_write = 1,
770 };
771
772 BUG_ON(!PageLocked(page));
773
774 if (wait)
775 wait_on_page_writeback(page);
776
777 if (clear_page_dirty_for_io(page)) {
778 page_cache_get(page);
779 ret = mapping->a_ops->writepage(page, &wbc);
780 if (ret == 0 && wait) {
781 wait_on_page_writeback(page);
782 if (PageError(page))
783 ret = -EIO;
784 }
785 page_cache_release(page);
786 } else {
787 unlock_page(page);
788 }
789 return ret;
790}
791EXPORT_SYMBOL(write_one_page);
792
793
794
795
796int __set_page_dirty_no_writeback(struct page *page)
797{
798 if (!PageDirty(page))
799 SetPageDirty(page);
800 return 0;
801}
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818int __set_page_dirty_nobuffers(struct page *page)
819{
820 if (!TestSetPageDirty(page)) {
821 struct address_space *mapping = page_mapping(page);
822 struct address_space *mapping2;
823
824 if (!mapping)
825 return 1;
826
827 write_lock_irq(&mapping->tree_lock);
828 mapping2 = page_mapping(page);
829 if (mapping2) {
830 BUG_ON(mapping2 != mapping);
831 if (mapping_cap_account_dirty(mapping)) {
832 __inc_zone_page_state(page, NR_FILE_DIRTY);
833 task_io_account_write(PAGE_CACHE_SIZE);
834 }
835 radix_tree_tag_set(&mapping->page_tree,
836 page_index(page), PAGECACHE_TAG_DIRTY);
837 }
838 write_unlock_irq(&mapping->tree_lock);
839 if (mapping->host) {
840
841 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
842 }
843 return 1;
844 }
845 return 0;
846}
847EXPORT_SYMBOL(__set_page_dirty_nobuffers);
848
849
850
851
852
853
854int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
855{
856 wbc->pages_skipped++;
857 return __set_page_dirty_nobuffers(page);
858}
859EXPORT_SYMBOL(redirty_page_for_writepage);
860
861
862
863
864
865int fastcall set_page_dirty(struct page *page)
866{
867 struct address_space *mapping = page_mapping(page);
868
869 if (likely(mapping)) {
870 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
871#ifdef CONFIG_BLOCK
872 if (!spd)
873 spd = __set_page_dirty_buffers;
874#endif
875 return (*spd)(page);
876 }
877 if (!PageDirty(page)) {
878 if (!TestSetPageDirty(page))
879 return 1;
880 }
881 return 0;
882}
883EXPORT_SYMBOL(set_page_dirty);
884
885
886
887
888
889
890
891
892
893
894
895int set_page_dirty_lock(struct page *page)
896{
897 int ret;
898
899 lock_page_nosync(page);
900 ret = set_page_dirty(page);
901 unlock_page(page);
902 return ret;
903}
904EXPORT_SYMBOL(set_page_dirty_lock);
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920int clear_page_dirty_for_io(struct page *page)
921{
922 struct address_space *mapping = page_mapping(page);
923
924 if (mapping && mapping_cap_account_dirty(mapping)) {
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955 if (page_mkclean(page))
956 set_page_dirty(page);
957 if (TestClearPageDirty(page)) {
958 dec_zone_page_state(page, NR_FILE_DIRTY);
959 return 1;
960 }
961 return 0;
962 }
963 return TestClearPageDirty(page);
964}
965EXPORT_SYMBOL(clear_page_dirty_for_io);
966
967int test_clear_page_writeback(struct page *page)
968{
969 struct address_space *mapping = page_mapping(page);
970 int ret;
971
972 if (mapping) {
973 unsigned long flags;
974
975 write_lock_irqsave(&mapping->tree_lock, flags);
976 ret = TestClearPageWriteback(page);
977 if (ret)
978 radix_tree_tag_clear(&mapping->page_tree,
979 page_index(page),
980 PAGECACHE_TAG_WRITEBACK);
981 write_unlock_irqrestore(&mapping->tree_lock, flags);
982 } else {
983 ret = TestClearPageWriteback(page);
984 }
985 return ret;
986}
987
988int test_set_page_writeback(struct page *page)
989{
990 struct address_space *mapping = page_mapping(page);
991 int ret;
992
993 if (mapping) {
994 unsigned long flags;
995
996 write_lock_irqsave(&mapping->tree_lock, flags);
997 ret = TestSetPageWriteback(page);
998 if (!ret)
999 radix_tree_tag_set(&mapping->page_tree,
1000 page_index(page),
1001 PAGECACHE_TAG_WRITEBACK);
1002 if (!PageDirty(page))
1003 radix_tree_tag_clear(&mapping->page_tree,
1004 page_index(page),
1005 PAGECACHE_TAG_DIRTY);
1006 write_unlock_irqrestore(&mapping->tree_lock, flags);
1007 } else {
1008 ret = TestSetPageWriteback(page);
1009 }
1010 return ret;
1011
1012}
1013EXPORT_SYMBOL(test_set_page_writeback);
1014
1015
1016
1017
1018
1019int mapping_tagged(struct address_space *mapping, int tag)
1020{
1021 unsigned long flags;
1022 int ret;
1023
1024 read_lock_irqsave(&mapping->tree_lock, flags);
1025 ret = radix_tree_tagged(&mapping->page_tree, tag);
1026 read_unlock_irqrestore(&mapping->tree_lock, flags);
1027 return ret;
1028}
1029EXPORT_SYMBOL(mapping_tagged);
1030