1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/export.h>
16#include <linux/spinlock.h>
17#include <linux/fs.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/slab.h>
21#include <linux/pagemap.h>
22#include <linux/writeback.h>
23#include <linux/init.h>
24#include <linux/backing-dev.h>
25#include <linux/task_io_accounting_ops.h>
26#include <linux/blkdev.h>
27#include <linux/mpage.h>
28#include <linux/rmap.h>
29#include <linux/percpu.h>
30#include <linux/notifier.h>
31#include <linux/smp.h>
32#include <linux/sysctl.h>
33#include <linux/cpu.h>
34#include <linux/syscalls.h>
35#include <linux/buffer_head.h>
36#include <linux/pagevec.h>
37#include <linux/timer.h>
38#include <linux/sched/rt.h>
39#include <trace/events/writeback.h>
40
41
42
43
44#define MAX_PAUSE max(HZ/5, 1)
45
46
47
48
49
50#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
51
52
53
54
55#define BANDWIDTH_INTERVAL max(HZ/5, 1)
56
57#define RATELIMIT_CALC_SHIFT 10
58
59
60
61
62
63static long ratelimit_pages = 32;
64
65
66
67
68
69
70int dirty_background_ratio = 10;
71
72
73
74
75
76unsigned long dirty_background_bytes;
77
78
79
80
81
82int vm_highmem_is_dirtyable;
83
84
85
86
87int vm_dirty_ratio = 20;
88
89
90
91
92
93unsigned long vm_dirty_bytes;
94
95
96
97
98unsigned int dirty_writeback_interval = 5 * 100;
99
100EXPORT_SYMBOL_GPL(dirty_writeback_interval);
101
102
103
104
105unsigned int dirty_expire_interval = 30 * 100;
106
107
108
109
110int block_dump;
111
112
113
114
115
116int laptop_mode;
117
118EXPORT_SYMBOL(laptop_mode);
119
120
121
122unsigned long global_dirty_limit;
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140static struct fprop_global writeout_completions;
141
142static void writeout_period(unsigned long t);
143
144static struct timer_list writeout_period_timer =
145 TIMER_DEFERRED_INITIALIZER(writeout_period, 0, 0);
146static unsigned long writeout_period_time = 0;
147
148
149
150
151
152
153#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191static unsigned long highmem_dirtyable_memory(unsigned long total)
192{
193#ifdef CONFIG_HIGHMEM
194 int node;
195 unsigned long x = 0;
196
197 for_each_node_state(node, N_HIGH_MEMORY) {
198 struct zone *z =
199 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
200
201 x += zone_page_state(z, NR_FREE_PAGES) +
202 zone_reclaimable_pages(z) - z->dirty_balance_reserve;
203 }
204
205
206
207
208
209
210
211
212
213 if ((long)x < 0)
214 x = 0;
215
216
217
218
219
220
221
222 return min(x, total);
223#else
224 return 0;
225#endif
226}
227
228
229
230
231
232
233
234static unsigned long global_dirtyable_memory(void)
235{
236 unsigned long x;
237
238 x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
239 x -= min(x, dirty_balance_reserve);
240
241 if (!vm_highmem_is_dirtyable)
242 x -= highmem_dirtyable_memory(x);
243
244
245 x -= min_t(unsigned long, x, min_free_kbytes >> (PAGE_SHIFT - 10));
246
247 return x + 1;
248}
249
250
251
252
253
254
255
256
257
258
259void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
260{
261 unsigned long background;
262 unsigned long dirty;
263 unsigned long uninitialized_var(available_memory);
264 struct task_struct *tsk;
265
266 if (!vm_dirty_bytes || !dirty_background_bytes)
267 available_memory = global_dirtyable_memory();
268
269 if (vm_dirty_bytes)
270 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
271 else
272 dirty = (vm_dirty_ratio * available_memory) / 100;
273
274 if (dirty_background_bytes)
275 background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE);
276 else
277 background = (dirty_background_ratio * available_memory) / 100;
278
279 if (background >= dirty)
280 background = dirty / 2;
281 tsk = current;
282 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
283 background += background / 4;
284 dirty += dirty / 4;
285 }
286 *pbackground = background;
287 *pdirty = dirty;
288 trace_global_dirty_state(background, dirty);
289}
290
291
292
293
294
295
296
297
298static unsigned long zone_dirtyable_memory(struct zone *zone)
299{
300
301
302
303
304
305
306
307
308
309 unsigned long nr_pages = zone_page_state(zone, NR_FREE_PAGES) +
310 zone_reclaimable_pages(zone);
311
312
313 nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
314 return nr_pages;
315}
316
317
318
319
320
321
322
323
324static unsigned long zone_dirty_limit(struct zone *zone)
325{
326 unsigned long zone_memory = zone_dirtyable_memory(zone);
327 struct task_struct *tsk = current;
328 unsigned long dirty;
329
330 if (vm_dirty_bytes)
331 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
332 zone_memory / global_dirtyable_memory();
333 else
334 dirty = vm_dirty_ratio * zone_memory / 100;
335
336 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
337 dirty += dirty / 4;
338
339 return dirty;
340}
341
342
343
344
345
346
347
348
349bool zone_dirty_ok(struct zone *zone)
350{
351 unsigned long limit = zone_dirty_limit(zone);
352
353 return zone_page_state(zone, NR_FILE_DIRTY) +
354 zone_page_state(zone, NR_UNSTABLE_NFS) +
355 zone_page_state(zone, NR_WRITEBACK) <= limit;
356}
357
358int dirty_background_ratio_handler(struct ctl_table *table, int write,
359 void __user *buffer, size_t *lenp,
360 loff_t *ppos)
361{
362 int ret;
363
364 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
365 if (ret == 0 && write)
366 dirty_background_bytes = 0;
367 return ret;
368}
369
370int dirty_background_bytes_handler(struct ctl_table *table, int write,
371 void __user *buffer, size_t *lenp,
372 loff_t *ppos)
373{
374 int ret;
375
376 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
377 if (ret == 0 && write)
378 dirty_background_ratio = 0;
379 return ret;
380}
381
382int dirty_ratio_handler(struct ctl_table *table, int write,
383 void __user *buffer, size_t *lenp,
384 loff_t *ppos)
385{
386 int old_ratio = vm_dirty_ratio;
387 int ret;
388
389 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
390 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
391 writeback_set_ratelimit();
392 vm_dirty_bytes = 0;
393 }
394 return ret;
395}
396
397int dirty_bytes_handler(struct ctl_table *table, int write,
398 void __user *buffer, size_t *lenp,
399 loff_t *ppos)
400{
401 unsigned long old_bytes = vm_dirty_bytes;
402 int ret;
403
404 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
405 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
406 writeback_set_ratelimit();
407 vm_dirty_ratio = 0;
408 }
409 return ret;
410}
411
412static unsigned long wp_next_time(unsigned long cur_time)
413{
414 cur_time += VM_COMPLETIONS_PERIOD_LEN;
415
416 if (!cur_time)
417 return 1;
418 return cur_time;
419}
420
421
422
423
424
425static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
426{
427 __inc_bdi_stat(bdi, BDI_WRITTEN);
428 __fprop_inc_percpu_max(&writeout_completions, &bdi->completions,
429 bdi->max_prop_frac);
430
431 if (!unlikely(writeout_period_time)) {
432
433
434
435
436
437
438 writeout_period_time = wp_next_time(jiffies);
439 mod_timer(&writeout_period_timer, writeout_period_time);
440 }
441}
442
443void bdi_writeout_inc(struct backing_dev_info *bdi)
444{
445 unsigned long flags;
446
447 local_irq_save(flags);
448 __bdi_writeout_inc(bdi);
449 local_irq_restore(flags);
450}
451EXPORT_SYMBOL_GPL(bdi_writeout_inc);
452
453
454
455
456static void bdi_writeout_fraction(struct backing_dev_info *bdi,
457 long *numerator, long *denominator)
458{
459 fprop_fraction_percpu(&writeout_completions, &bdi->completions,
460 numerator, denominator);
461}
462
463
464
465
466
467static void writeout_period(unsigned long t)
468{
469 int miss_periods = (jiffies - writeout_period_time) /
470 VM_COMPLETIONS_PERIOD_LEN;
471
472 if (fprop_new_period(&writeout_completions, miss_periods + 1)) {
473 writeout_period_time = wp_next_time(writeout_period_time +
474 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
475 mod_timer(&writeout_period_timer, writeout_period_time);
476 } else {
477
478
479
480
481 writeout_period_time = 0;
482 }
483}
484
485
486
487
488
489
490static unsigned int bdi_min_ratio;
491
492int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
493{
494 int ret = 0;
495
496 spin_lock_bh(&bdi_lock);
497 if (min_ratio > bdi->max_ratio) {
498 ret = -EINVAL;
499 } else {
500 min_ratio -= bdi->min_ratio;
501 if (bdi_min_ratio + min_ratio < 100) {
502 bdi_min_ratio += min_ratio;
503 bdi->min_ratio += min_ratio;
504 } else {
505 ret = -EINVAL;
506 }
507 }
508 spin_unlock_bh(&bdi_lock);
509
510 return ret;
511}
512
513int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
514{
515 int ret = 0;
516
517 if (max_ratio > 100)
518 return -EINVAL;
519
520 spin_lock_bh(&bdi_lock);
521 if (bdi->min_ratio > max_ratio) {
522 ret = -EINVAL;
523 } else {
524 bdi->max_ratio = max_ratio;
525 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
526 }
527 spin_unlock_bh(&bdi_lock);
528
529 return ret;
530}
531EXPORT_SYMBOL(bdi_set_max_ratio);
532
533static unsigned long dirty_freerun_ceiling(unsigned long thresh,
534 unsigned long bg_thresh)
535{
536 return (thresh + bg_thresh) / 2;
537}
538
539static unsigned long hard_dirty_limit(unsigned long thresh)
540{
541 return max(thresh, global_dirty_limit);
542}
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)
567{
568 u64 bdi_dirty;
569 long numerator, denominator;
570
571
572
573
574 bdi_writeout_fraction(bdi, &numerator, &denominator);
575
576 bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100;
577 bdi_dirty *= numerator;
578 do_div(bdi_dirty, denominator);
579
580 bdi_dirty += (dirty * bdi->min_ratio) / 100;
581 if (bdi_dirty > (dirty * bdi->max_ratio) / 100)
582 bdi_dirty = dirty * bdi->max_ratio / 100;
583
584 return bdi_dirty;
585}
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
663 unsigned long thresh,
664 unsigned long bg_thresh,
665 unsigned long dirty,
666 unsigned long bdi_thresh,
667 unsigned long bdi_dirty)
668{
669 unsigned long write_bw = bdi->avg_write_bandwidth;
670 unsigned long freerun = dirty_freerun_ceiling(thresh, bg_thresh);
671 unsigned long limit = hard_dirty_limit(thresh);
672 unsigned long x_intercept;
673 unsigned long setpoint;
674 unsigned long bdi_setpoint;
675 unsigned long span;
676 long long pos_ratio;
677 long x;
678
679 if (unlikely(dirty >= limit))
680 return 0;
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698 setpoint = (freerun + limit) / 2;
699 x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
700 limit - setpoint + 1);
701 pos_ratio = x;
702 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
703 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
704 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737 if (unlikely(bdi_thresh > thresh))
738 bdi_thresh = thresh;
739
740
741
742
743
744
745
746 bdi_thresh = max(bdi_thresh, (limit - dirty) / 8);
747
748
749
750
751 x = div_u64((u64)bdi_thresh << 16, thresh + 1);
752 bdi_setpoint = setpoint * (u64)x >> 16;
753
754
755
756
757
758
759
760
761 span = (thresh - bdi_thresh + 8 * write_bw) * (u64)x >> 16;
762 x_intercept = bdi_setpoint + span;
763
764 if (bdi_dirty < x_intercept - span / 4) {
765 pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty),
766 x_intercept - bdi_setpoint + 1);
767 } else
768 pos_ratio /= 4;
769
770
771
772
773
774
775 x_intercept = bdi_thresh / 2;
776 if (bdi_dirty < x_intercept) {
777 if (bdi_dirty > x_intercept / 8)
778 pos_ratio = div_u64(pos_ratio * x_intercept, bdi_dirty);
779 else
780 pos_ratio *= 8;
781 }
782
783 return pos_ratio;
784}
785
786static void bdi_update_write_bandwidth(struct backing_dev_info *bdi,
787 unsigned long elapsed,
788 unsigned long written)
789{
790 const unsigned long period = roundup_pow_of_two(3 * HZ);
791 unsigned long avg = bdi->avg_write_bandwidth;
792 unsigned long old = bdi->write_bandwidth;
793 u64 bw;
794
795
796
797
798
799
800
801
802 bw = written - bdi->written_stamp;
803 bw *= HZ;
804 if (unlikely(elapsed > period)) {
805 do_div(bw, elapsed);
806 avg = bw;
807 goto out;
808 }
809 bw += (u64)bdi->write_bandwidth * (period - elapsed);
810 bw >>= ilog2(period);
811
812
813
814
815 if (avg > old && old >= (unsigned long)bw)
816 avg -= (avg - old) >> 3;
817
818 if (avg < old && old <= (unsigned long)bw)
819 avg += (old - avg) >> 3;
820
821out:
822 bdi->write_bandwidth = bw;
823 bdi->avg_write_bandwidth = avg;
824}
825
826
827
828
829
830
831
832
833
834static void update_dirty_limit(unsigned long thresh, unsigned long dirty)
835{
836 unsigned long limit = global_dirty_limit;
837
838
839
840
841 if (limit < thresh) {
842 limit = thresh;
843 goto update;
844 }
845
846
847
848
849
850
851 thresh = max(thresh, dirty);
852 if (limit > thresh) {
853 limit -= (limit - thresh) >> 5;
854 goto update;
855 }
856 return;
857update:
858 global_dirty_limit = limit;
859}
860
861static void global_update_bandwidth(unsigned long thresh,
862 unsigned long dirty,
863 unsigned long now)
864{
865 static DEFINE_SPINLOCK(dirty_lock);
866 static unsigned long update_time;
867
868
869
870
871 if (time_before(now, update_time + BANDWIDTH_INTERVAL))
872 return;
873
874 spin_lock(&dirty_lock);
875 if (time_after_eq(now, update_time + BANDWIDTH_INTERVAL)) {
876 update_dirty_limit(thresh, dirty);
877 update_time = now;
878 }
879 spin_unlock(&dirty_lock);
880}
881
882
883
884
885
886
887
888static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
889 unsigned long thresh,
890 unsigned long bg_thresh,
891 unsigned long dirty,
892 unsigned long bdi_thresh,
893 unsigned long bdi_dirty,
894 unsigned long dirtied,
895 unsigned long elapsed)
896{
897 unsigned long freerun = dirty_freerun_ceiling(thresh, bg_thresh);
898 unsigned long limit = hard_dirty_limit(thresh);
899 unsigned long setpoint = (freerun + limit) / 2;
900 unsigned long write_bw = bdi->avg_write_bandwidth;
901 unsigned long dirty_ratelimit = bdi->dirty_ratelimit;
902 unsigned long dirty_rate;
903 unsigned long task_ratelimit;
904 unsigned long balanced_dirty_ratelimit;
905 unsigned long pos_ratio;
906 unsigned long step;
907 unsigned long x;
908
909
910
911
912
913 dirty_rate = (dirtied - bdi->dirtied_stamp) * HZ / elapsed;
914
915 pos_ratio = bdi_position_ratio(bdi, thresh, bg_thresh, dirty,
916 bdi_thresh, bdi_dirty);
917
918
919
920 task_ratelimit = (u64)dirty_ratelimit *
921 pos_ratio >> RATELIMIT_CALC_SHIFT;
922 task_ratelimit++;
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
955 dirty_rate | 1);
956
957
958
959 if (unlikely(balanced_dirty_ratelimit > write_bw))
960 balanced_dirty_ratelimit = write_bw;
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996 step = 0;
997 if (dirty < setpoint) {
998 x = min(bdi->balanced_dirty_ratelimit,
999 min(balanced_dirty_ratelimit, task_ratelimit));
1000 if (dirty_ratelimit < x)
1001 step = x - dirty_ratelimit;
1002 } else {
1003 x = max(bdi->balanced_dirty_ratelimit,
1004 max(balanced_dirty_ratelimit, task_ratelimit));
1005 if (dirty_ratelimit > x)
1006 step = dirty_ratelimit - x;
1007 }
1008
1009
1010
1011
1012
1013
1014 step >>= dirty_ratelimit / (2 * step + 1);
1015
1016
1017
1018 step = (step + 7) / 8;
1019
1020 if (dirty_ratelimit < balanced_dirty_ratelimit)
1021 dirty_ratelimit += step;
1022 else
1023 dirty_ratelimit -= step;
1024
1025 bdi->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1026 bdi->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1027
1028 trace_bdi_dirty_ratelimit(bdi, dirty_rate, task_ratelimit);
1029}
1030
1031void __bdi_update_bandwidth(struct backing_dev_info *bdi,
1032 unsigned long thresh,
1033 unsigned long bg_thresh,
1034 unsigned long dirty,
1035 unsigned long bdi_thresh,
1036 unsigned long bdi_dirty,
1037 unsigned long start_time)
1038{
1039 unsigned long now = jiffies;
1040 unsigned long elapsed = now - bdi->bw_time_stamp;
1041 unsigned long dirtied;
1042 unsigned long written;
1043
1044
1045
1046
1047 if (elapsed < BANDWIDTH_INTERVAL)
1048 return;
1049
1050 dirtied = percpu_counter_read(&bdi->bdi_stat[BDI_DIRTIED]);
1051 written = percpu_counter_read(&bdi->bdi_stat[BDI_WRITTEN]);
1052
1053
1054
1055
1056
1057 if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time))
1058 goto snapshot;
1059
1060 if (thresh) {
1061 global_update_bandwidth(thresh, dirty, now);
1062 bdi_update_dirty_ratelimit(bdi, thresh, bg_thresh, dirty,
1063 bdi_thresh, bdi_dirty,
1064 dirtied, elapsed);
1065 }
1066 bdi_update_write_bandwidth(bdi, elapsed, written);
1067
1068snapshot:
1069 bdi->dirtied_stamp = dirtied;
1070 bdi->written_stamp = written;
1071 bdi->bw_time_stamp = now;
1072}
1073
1074static void bdi_update_bandwidth(struct backing_dev_info *bdi,
1075 unsigned long thresh,
1076 unsigned long bg_thresh,
1077 unsigned long dirty,
1078 unsigned long bdi_thresh,
1079 unsigned long bdi_dirty,
1080 unsigned long start_time)
1081{
1082 if (time_is_after_eq_jiffies(bdi->bw_time_stamp + BANDWIDTH_INTERVAL))
1083 return;
1084 spin_lock(&bdi->wb.list_lock);
1085 __bdi_update_bandwidth(bdi, thresh, bg_thresh, dirty,
1086 bdi_thresh, bdi_dirty, start_time);
1087 spin_unlock(&bdi->wb.list_lock);
1088}
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098static unsigned long dirty_poll_interval(unsigned long dirty,
1099 unsigned long thresh)
1100{
1101 if (thresh > dirty)
1102 return 1UL << (ilog2(thresh - dirty) >> 1);
1103
1104 return 1;
1105}
1106
1107static long bdi_max_pause(struct backing_dev_info *bdi,
1108 unsigned long bdi_dirty)
1109{
1110 long bw = bdi->avg_write_bandwidth;
1111 long t;
1112
1113
1114
1115
1116
1117
1118
1119
1120 t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1121 t++;
1122
1123 return min_t(long, t, MAX_PAUSE);
1124}
1125
1126static long bdi_min_pause(struct backing_dev_info *bdi,
1127 long max_pause,
1128 unsigned long task_ratelimit,
1129 unsigned long dirty_ratelimit,
1130 int *nr_dirtied_pause)
1131{
1132 long hi = ilog2(bdi->avg_write_bandwidth);
1133 long lo = ilog2(bdi->dirty_ratelimit);
1134 long t;
1135 long pause;
1136 int pages;
1137
1138
1139 t = max(1, HZ / 100);
1140
1141
1142
1143
1144
1145
1146
1147 if (hi > lo)
1148 t += (hi - lo) * (10 * HZ) / 1024;
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168 t = min(t, 1 + max_pause / 2);
1169 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179 if (pages < DIRTY_POLL_THRESH) {
1180 t = max_pause;
1181 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1182 if (pages > DIRTY_POLL_THRESH) {
1183 pages = DIRTY_POLL_THRESH;
1184 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1185 }
1186 }
1187
1188 pause = HZ * pages / (task_ratelimit + 1);
1189 if (pause > max_pause) {
1190 t = max_pause;
1191 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1192 }
1193
1194 *nr_dirtied_pause = pages;
1195
1196
1197
1198 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1199}
1200
1201
1202
1203
1204
1205
1206
1207
1208static void balance_dirty_pages(struct address_space *mapping,
1209 unsigned long pages_dirtied)
1210{
1211 unsigned long nr_reclaimable;
1212 unsigned long bdi_reclaimable;
1213 unsigned long nr_dirty;
1214 unsigned long bdi_dirty;
1215 unsigned long freerun;
1216 unsigned long background_thresh;
1217 unsigned long dirty_thresh;
1218 unsigned long bdi_thresh;
1219 long period;
1220 long pause;
1221 long max_pause;
1222 long min_pause;
1223 int nr_dirtied_pause;
1224 bool dirty_exceeded = false;
1225 unsigned long task_ratelimit;
1226 unsigned long dirty_ratelimit;
1227 unsigned long pos_ratio;
1228 struct backing_dev_info *bdi = mapping->backing_dev_info;
1229 unsigned long start_time = jiffies;
1230
1231 for (;;) {
1232 unsigned long now = jiffies;
1233
1234
1235
1236
1237
1238
1239
1240 nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
1241 global_page_state(NR_UNSTABLE_NFS);
1242 nr_dirty = nr_reclaimable + global_page_state(NR_WRITEBACK);
1243
1244 global_dirty_limits(&background_thresh, &dirty_thresh);
1245
1246
1247
1248
1249
1250
1251 freerun = dirty_freerun_ceiling(dirty_thresh,
1252 background_thresh);
1253 if (nr_dirty <= freerun) {
1254 current->dirty_paused_when = now;
1255 current->nr_dirtied = 0;
1256 current->nr_dirtied_pause =
1257 dirty_poll_interval(nr_dirty, dirty_thresh);
1258 break;
1259 }
1260
1261 if (unlikely(!writeback_in_progress(bdi)))
1262 bdi_start_background_writeback(bdi);
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277 bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289 if (bdi_thresh < 2 * bdi_stat_error(bdi)) {
1290 bdi_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
1291 bdi_dirty = bdi_reclaimable +
1292 bdi_stat_sum(bdi, BDI_WRITEBACK);
1293 } else {
1294 bdi_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
1295 bdi_dirty = bdi_reclaimable +
1296 bdi_stat(bdi, BDI_WRITEBACK);
1297 }
1298
1299 dirty_exceeded = (bdi_dirty > bdi_thresh) &&
1300 (nr_dirty > dirty_thresh);
1301 if (dirty_exceeded && !bdi->dirty_exceeded)
1302 bdi->dirty_exceeded = 1;
1303
1304 bdi_update_bandwidth(bdi, dirty_thresh, background_thresh,
1305 nr_dirty, bdi_thresh, bdi_dirty,
1306 start_time);
1307
1308 dirty_ratelimit = bdi->dirty_ratelimit;
1309 pos_ratio = bdi_position_ratio(bdi, dirty_thresh,
1310 background_thresh, nr_dirty,
1311 bdi_thresh, bdi_dirty);
1312 task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >>
1313 RATELIMIT_CALC_SHIFT;
1314 max_pause = bdi_max_pause(bdi, bdi_dirty);
1315 min_pause = bdi_min_pause(bdi, max_pause,
1316 task_ratelimit, dirty_ratelimit,
1317 &nr_dirtied_pause);
1318
1319 if (unlikely(task_ratelimit == 0)) {
1320 period = max_pause;
1321 pause = max_pause;
1322 goto pause;
1323 }
1324 period = HZ * pages_dirtied / task_ratelimit;
1325 pause = period;
1326 if (current->dirty_paused_when)
1327 pause -= now - current->dirty_paused_when;
1328
1329
1330
1331
1332
1333
1334
1335 if (pause < min_pause) {
1336 trace_balance_dirty_pages(bdi,
1337 dirty_thresh,
1338 background_thresh,
1339 nr_dirty,
1340 bdi_thresh,
1341 bdi_dirty,
1342 dirty_ratelimit,
1343 task_ratelimit,
1344 pages_dirtied,
1345 period,
1346 min(pause, 0L),
1347 start_time);
1348 if (pause < -HZ) {
1349 current->dirty_paused_when = now;
1350 current->nr_dirtied = 0;
1351 } else if (period) {
1352 current->dirty_paused_when += period;
1353 current->nr_dirtied = 0;
1354 } else if (current->nr_dirtied_pause <= pages_dirtied)
1355 current->nr_dirtied_pause += pages_dirtied;
1356 break;
1357 }
1358 if (unlikely(pause > max_pause)) {
1359
1360 now += min(pause - max_pause, max_pause);
1361 pause = max_pause;
1362 }
1363
1364pause:
1365 trace_balance_dirty_pages(bdi,
1366 dirty_thresh,
1367 background_thresh,
1368 nr_dirty,
1369 bdi_thresh,
1370 bdi_dirty,
1371 dirty_ratelimit,
1372 task_ratelimit,
1373 pages_dirtied,
1374 period,
1375 pause,
1376 start_time);
1377 __set_current_state(TASK_KILLABLE);
1378 io_schedule_timeout(pause);
1379
1380 current->dirty_paused_when = now + pause;
1381 current->nr_dirtied = 0;
1382 current->nr_dirtied_pause = nr_dirtied_pause;
1383
1384
1385
1386
1387
1388 if (task_ratelimit)
1389 break;
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401 if (bdi_dirty <= bdi_stat_error(bdi))
1402 break;
1403
1404 if (fatal_signal_pending(current))
1405 break;
1406 }
1407
1408 if (!dirty_exceeded && bdi->dirty_exceeded)
1409 bdi->dirty_exceeded = 0;
1410
1411 if (writeback_in_progress(bdi))
1412 return;
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422 if (laptop_mode)
1423 return;
1424
1425 if (nr_reclaimable > background_thresh)
1426 bdi_start_background_writeback(bdi);
1427}
1428
1429void set_page_dirty_balance(struct page *page, int page_mkwrite)
1430{
1431 if (set_page_dirty(page) || page_mkwrite) {
1432 struct address_space *mapping = page_mapping(page);
1433
1434 if (mapping)
1435 balance_dirty_pages_ratelimited(mapping);
1436 }
1437}
1438
1439static DEFINE_PER_CPU(int, bdp_ratelimits);
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470void balance_dirty_pages_ratelimited(struct address_space *mapping)
1471{
1472 struct backing_dev_info *bdi = mapping->backing_dev_info;
1473 int ratelimit;
1474 int *p;
1475
1476 if (!bdi_cap_account_dirty(bdi))
1477 return;
1478
1479 ratelimit = current->nr_dirtied_pause;
1480 if (bdi->dirty_exceeded)
1481 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1482
1483 preempt_disable();
1484
1485
1486
1487
1488
1489
1490 p = &__get_cpu_var(bdp_ratelimits);
1491 if (unlikely(current->nr_dirtied >= ratelimit))
1492 *p = 0;
1493 else if (unlikely(*p >= ratelimit_pages)) {
1494 *p = 0;
1495 ratelimit = 0;
1496 }
1497
1498
1499
1500
1501
1502 p = &__get_cpu_var(dirty_throttle_leaks);
1503 if (*p > 0 && current->nr_dirtied < ratelimit) {
1504 unsigned long nr_pages_dirtied;
1505 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1506 *p -= nr_pages_dirtied;
1507 current->nr_dirtied += nr_pages_dirtied;
1508 }
1509 preempt_enable();
1510
1511 if (unlikely(current->nr_dirtied >= ratelimit))
1512 balance_dirty_pages(mapping, current->nr_dirtied);
1513}
1514EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1515
1516void throttle_vm_writeout(gfp_t gfp_mask)
1517{
1518 unsigned long background_thresh;
1519 unsigned long dirty_thresh;
1520
1521 for ( ; ; ) {
1522 global_dirty_limits(&background_thresh, &dirty_thresh);
1523 dirty_thresh = hard_dirty_limit(dirty_thresh);
1524
1525
1526
1527
1528
1529 dirty_thresh += dirty_thresh / 10;
1530
1531 if (global_page_state(NR_UNSTABLE_NFS) +
1532 global_page_state(NR_WRITEBACK) <= dirty_thresh)
1533 break;
1534 congestion_wait(BLK_RW_ASYNC, HZ/10);
1535
1536
1537
1538
1539
1540
1541 if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO))
1542 break;
1543 }
1544}
1545
1546
1547
1548
1549int dirty_writeback_centisecs_handler(ctl_table *table, int write,
1550 void __user *buffer, size_t *length, loff_t *ppos)
1551{
1552 proc_dointvec(table, write, buffer, length, ppos);
1553 return 0;
1554}
1555
1556#ifdef CONFIG_BLOCK
1557void laptop_mode_timer_fn(unsigned long data)
1558{
1559 struct request_queue *q = (struct request_queue *)data;
1560 int nr_pages = global_page_state(NR_FILE_DIRTY) +
1561 global_page_state(NR_UNSTABLE_NFS);
1562
1563
1564
1565
1566
1567 if (bdi_has_dirty_io(&q->backing_dev_info))
1568 bdi_start_writeback(&q->backing_dev_info, nr_pages,
1569 WB_REASON_LAPTOP_TIMER);
1570}
1571
1572
1573
1574
1575
1576
1577void laptop_io_completion(struct backing_dev_info *info)
1578{
1579 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
1580}
1581
1582
1583
1584
1585
1586
1587void laptop_sync_completion(void)
1588{
1589 struct backing_dev_info *bdi;
1590
1591 rcu_read_lock();
1592
1593 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
1594 del_timer(&bdi->laptop_mode_wb_timer);
1595
1596 rcu_read_unlock();
1597}
1598#endif
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611void writeback_set_ratelimit(void)
1612{
1613 unsigned long background_thresh;
1614 unsigned long dirty_thresh;
1615 global_dirty_limits(&background_thresh, &dirty_thresh);
1616 global_dirty_limit = dirty_thresh;
1617 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
1618 if (ratelimit_pages < 16)
1619 ratelimit_pages = 16;
1620}
1621
1622static int
1623ratelimit_handler(struct notifier_block *self, unsigned long action,
1624 void *hcpu)
1625{
1626
1627 switch (action & ~CPU_TASKS_FROZEN) {
1628 case CPU_ONLINE:
1629 case CPU_DEAD:
1630 writeback_set_ratelimit();
1631 return NOTIFY_OK;
1632 default:
1633 return NOTIFY_DONE;
1634 }
1635}
1636
1637static struct notifier_block ratelimit_nb = {
1638 .notifier_call = ratelimit_handler,
1639 .next = NULL,
1640};
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660void __init page_writeback_init(void)
1661{
1662 writeback_set_ratelimit();
1663 register_cpu_notifier(&ratelimit_nb);
1664
1665 fprop_global_init(&writeout_completions);
1666}
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685void tag_pages_for_writeback(struct address_space *mapping,
1686 pgoff_t start, pgoff_t end)
1687{
1688#define WRITEBACK_TAG_BATCH 4096
1689 unsigned long tagged;
1690
1691 do {
1692 spin_lock_irq(&mapping->tree_lock);
1693 tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree,
1694 &start, end, WRITEBACK_TAG_BATCH,
1695 PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
1696 spin_unlock_irq(&mapping->tree_lock);
1697 WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
1698 cond_resched();
1699
1700 } while (tagged >= WRITEBACK_TAG_BATCH && start);
1701}
1702EXPORT_SYMBOL(tag_pages_for_writeback);
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726int write_cache_pages(struct address_space *mapping,
1727 struct writeback_control *wbc, writepage_t writepage,
1728 void *data)
1729{
1730 int ret = 0;
1731 int done = 0;
1732 struct pagevec pvec;
1733 int nr_pages;
1734 pgoff_t uninitialized_var(writeback_index);
1735 pgoff_t index;
1736 pgoff_t end;
1737 pgoff_t done_index;
1738 int cycled;
1739 int range_whole = 0;
1740 int tag;
1741
1742 pagevec_init(&pvec, 0);
1743 if (wbc->range_cyclic) {
1744 writeback_index = mapping->writeback_index;
1745 index = writeback_index;
1746 if (index == 0)
1747 cycled = 1;
1748 else
1749 cycled = 0;
1750 end = -1;
1751 } else {
1752 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1753 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1754 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1755 range_whole = 1;
1756 cycled = 1;
1757 }
1758 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
1759 tag = PAGECACHE_TAG_TOWRITE;
1760 else
1761 tag = PAGECACHE_TAG_DIRTY;
1762retry:
1763 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
1764 tag_pages_for_writeback(mapping, index, end);
1765 done_index = index;
1766 while (!done && (index <= end)) {
1767 int i;
1768
1769 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
1770 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1771 if (nr_pages == 0)
1772 break;
1773
1774 for (i = 0; i < nr_pages; i++) {
1775 struct page *page = pvec.pages[i];
1776
1777
1778
1779
1780
1781
1782
1783
1784 if (page->index > end) {
1785
1786
1787
1788
1789 done = 1;
1790 break;
1791 }
1792
1793 done_index = page->index;
1794
1795 lock_page(page);
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805 if (unlikely(page->mapping != mapping)) {
1806continue_unlock:
1807 unlock_page(page);
1808 continue;
1809 }
1810
1811 if (!PageDirty(page)) {
1812
1813 goto continue_unlock;
1814 }
1815
1816 if (PageWriteback(page)) {
1817 if (wbc->sync_mode != WB_SYNC_NONE)
1818 wait_on_page_writeback(page);
1819 else
1820 goto continue_unlock;
1821 }
1822
1823 BUG_ON(PageWriteback(page));
1824 if (!clear_page_dirty_for_io(page))
1825 goto continue_unlock;
1826
1827 trace_wbc_writepage(wbc, mapping->backing_dev_info);
1828 ret = (*writepage)(page, wbc, data);
1829 if (unlikely(ret)) {
1830 if (ret == AOP_WRITEPAGE_ACTIVATE) {
1831 unlock_page(page);
1832 ret = 0;
1833 } else {
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843 done_index = page->index + 1;
1844 done = 1;
1845 break;
1846 }
1847 }
1848
1849
1850
1851
1852
1853
1854
1855 if (--wbc->nr_to_write <= 0 &&
1856 wbc->sync_mode == WB_SYNC_NONE) {
1857 done = 1;
1858 break;
1859 }
1860 }
1861 pagevec_release(&pvec);
1862 cond_resched();
1863 }
1864 if (!cycled && !done) {
1865
1866
1867
1868
1869
1870 cycled = 1;
1871 index = 0;
1872 end = writeback_index - 1;
1873 goto retry;
1874 }
1875 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1876 mapping->writeback_index = done_index;
1877
1878 return ret;
1879}
1880EXPORT_SYMBOL(write_cache_pages);
1881
1882
1883
1884
1885
1886static int __writepage(struct page *page, struct writeback_control *wbc,
1887 void *data)
1888{
1889 struct address_space *mapping = data;
1890 int ret = mapping->a_ops->writepage(page, wbc);
1891 mapping_set_error(mapping, ret);
1892 return ret;
1893}
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903int generic_writepages(struct address_space *mapping,
1904 struct writeback_control *wbc)
1905{
1906 struct blk_plug plug;
1907 int ret;
1908
1909
1910 if (!mapping->a_ops->writepage)
1911 return 0;
1912
1913 blk_start_plug(&plug);
1914 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
1915 blk_finish_plug(&plug);
1916 return ret;
1917}
1918
1919EXPORT_SYMBOL(generic_writepages);
1920
1921int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
1922{
1923 int ret;
1924
1925 if (wbc->nr_to_write <= 0)
1926 return 0;
1927 if (mapping->a_ops->writepages)
1928 ret = mapping->a_ops->writepages(mapping, wbc);
1929 else
1930 ret = generic_writepages(mapping, wbc);
1931 return ret;
1932}
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943int write_one_page(struct page *page, int wait)
1944{
1945 struct address_space *mapping = page->mapping;
1946 int ret = 0;
1947 struct writeback_control wbc = {
1948 .sync_mode = WB_SYNC_ALL,
1949 .nr_to_write = 1,
1950 };
1951
1952 BUG_ON(!PageLocked(page));
1953
1954 if (wait)
1955 wait_on_page_writeback(page);
1956
1957 if (clear_page_dirty_for_io(page)) {
1958 page_cache_get(page);
1959 ret = mapping->a_ops->writepage(page, &wbc);
1960 if (ret == 0 && wait) {
1961 wait_on_page_writeback(page);
1962 if (PageError(page))
1963 ret = -EIO;
1964 }
1965 page_cache_release(page);
1966 } else {
1967 unlock_page(page);
1968 }
1969 return ret;
1970}
1971EXPORT_SYMBOL(write_one_page);
1972
1973
1974
1975
1976int __set_page_dirty_no_writeback(struct page *page)
1977{
1978 if (!PageDirty(page))
1979 return !TestSetPageDirty(page);
1980 return 0;
1981}
1982
1983
1984
1985
1986
1987void account_page_dirtied(struct page *page, struct address_space *mapping)
1988{
1989 trace_writeback_dirty_page(page, mapping);
1990
1991 if (mapping_cap_account_dirty(mapping)) {
1992 __inc_zone_page_state(page, NR_FILE_DIRTY);
1993 __inc_zone_page_state(page, NR_DIRTIED);
1994 __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
1995 __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
1996 task_io_account_write(PAGE_CACHE_SIZE);
1997 current->nr_dirtied++;
1998 this_cpu_inc(bdp_ratelimits);
1999 }
2000}
2001EXPORT_SYMBOL(account_page_dirtied);
2002
2003
2004
2005
2006
2007
2008void account_page_writeback(struct page *page)
2009{
2010 inc_zone_page_state(page, NR_WRITEBACK);
2011}
2012EXPORT_SYMBOL(account_page_writeback);
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029int __set_page_dirty_nobuffers(struct page *page)
2030{
2031 if (!TestSetPageDirty(page)) {
2032 struct address_space *mapping = page_mapping(page);
2033 struct address_space *mapping2;
2034
2035 if (!mapping)
2036 return 1;
2037
2038 spin_lock_irq(&mapping->tree_lock);
2039 mapping2 = page_mapping(page);
2040 if (mapping2) {
2041 BUG_ON(mapping2 != mapping);
2042 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2043 account_page_dirtied(page, mapping);
2044 radix_tree_tag_set(&mapping->page_tree,
2045 page_index(page), PAGECACHE_TAG_DIRTY);
2046 }
2047 spin_unlock_irq(&mapping->tree_lock);
2048 if (mapping->host) {
2049
2050 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2051 }
2052 return 1;
2053 }
2054 return 0;
2055}
2056EXPORT_SYMBOL(__set_page_dirty_nobuffers);
2057
2058
2059
2060
2061
2062
2063
2064
2065void account_page_redirty(struct page *page)
2066{
2067 struct address_space *mapping = page->mapping;
2068 if (mapping && mapping_cap_account_dirty(mapping)) {
2069 current->nr_dirtied--;
2070 dec_zone_page_state(page, NR_DIRTIED);
2071 dec_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
2072 }
2073}
2074EXPORT_SYMBOL(account_page_redirty);
2075
2076
2077
2078
2079
2080
2081int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
2082{
2083 wbc->pages_skipped++;
2084 account_page_redirty(page);
2085 return __set_page_dirty_nobuffers(page);
2086}
2087EXPORT_SYMBOL(redirty_page_for_writepage);
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100int set_page_dirty(struct page *page)
2101{
2102 struct address_space *mapping = page_mapping(page);
2103
2104 if (likely(mapping)) {
2105 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116 ClearPageReclaim(page);
2117#ifdef CONFIG_BLOCK
2118 if (!spd)
2119 spd = __set_page_dirty_buffers;
2120#endif
2121 return (*spd)(page);
2122 }
2123 if (!PageDirty(page)) {
2124 if (!TestSetPageDirty(page))
2125 return 1;
2126 }
2127 return 0;
2128}
2129EXPORT_SYMBOL(set_page_dirty);
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141int set_page_dirty_lock(struct page *page)
2142{
2143 int ret;
2144
2145 lock_page(page);
2146 ret = set_page_dirty(page);
2147 unlock_page(page);
2148 return ret;
2149}
2150EXPORT_SYMBOL(set_page_dirty_lock);
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166int clear_page_dirty_for_io(struct page *page)
2167{
2168 struct address_space *mapping = page_mapping(page);
2169
2170 BUG_ON(!PageLocked(page));
2171
2172 if (mapping && mapping_cap_account_dirty(mapping)) {
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198 if (page_mkclean(page))
2199 set_page_dirty(page);
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210 if (TestClearPageDirty(page)) {
2211 dec_zone_page_state(page, NR_FILE_DIRTY);
2212 dec_bdi_stat(mapping->backing_dev_info,
2213 BDI_RECLAIMABLE);
2214 return 1;
2215 }
2216 return 0;
2217 }
2218 return TestClearPageDirty(page);
2219}
2220EXPORT_SYMBOL(clear_page_dirty_for_io);
2221
2222int test_clear_page_writeback(struct page *page)
2223{
2224 struct address_space *mapping = page_mapping(page);
2225 int ret;
2226
2227 if (mapping) {
2228 struct backing_dev_info *bdi = mapping->backing_dev_info;
2229 unsigned long flags;
2230
2231 spin_lock_irqsave(&mapping->tree_lock, flags);
2232 ret = TestClearPageWriteback(page);
2233 if (ret) {
2234 radix_tree_tag_clear(&mapping->page_tree,
2235 page_index(page),
2236 PAGECACHE_TAG_WRITEBACK);
2237 if (bdi_cap_account_writeback(bdi)) {
2238 __dec_bdi_stat(bdi, BDI_WRITEBACK);
2239 __bdi_writeout_inc(bdi);
2240 }
2241 }
2242 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2243 } else {
2244 ret = TestClearPageWriteback(page);
2245 }
2246 if (ret) {
2247 dec_zone_page_state(page, NR_WRITEBACK);
2248 inc_zone_page_state(page, NR_WRITTEN);
2249 }
2250 return ret;
2251}
2252
2253int test_set_page_writeback(struct page *page)
2254{
2255 struct address_space *mapping = page_mapping(page);
2256 int ret;
2257
2258 if (mapping) {
2259 struct backing_dev_info *bdi = mapping->backing_dev_info;
2260 unsigned long flags;
2261
2262 spin_lock_irqsave(&mapping->tree_lock, flags);
2263 ret = TestSetPageWriteback(page);
2264 if (!ret) {
2265 radix_tree_tag_set(&mapping->page_tree,
2266 page_index(page),
2267 PAGECACHE_TAG_WRITEBACK);
2268 if (bdi_cap_account_writeback(bdi))
2269 __inc_bdi_stat(bdi, BDI_WRITEBACK);
2270 }
2271 if (!PageDirty(page))
2272 radix_tree_tag_clear(&mapping->page_tree,
2273 page_index(page),
2274 PAGECACHE_TAG_DIRTY);
2275 radix_tree_tag_clear(&mapping->page_tree,
2276 page_index(page),
2277 PAGECACHE_TAG_TOWRITE);
2278 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2279 } else {
2280 ret = TestSetPageWriteback(page);
2281 }
2282 if (!ret)
2283 account_page_writeback(page);
2284 return ret;
2285
2286}
2287EXPORT_SYMBOL(test_set_page_writeback);
2288
2289
2290
2291
2292
2293int mapping_tagged(struct address_space *mapping, int tag)
2294{
2295 return radix_tree_tagged(&mapping->page_tree, tag);
2296}
2297EXPORT_SYMBOL(mapping_tagged);
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307void wait_for_stable_page(struct page *page)
2308{
2309 struct address_space *mapping = page_mapping(page);
2310 struct backing_dev_info *bdi = mapping->backing_dev_info;
2311
2312 if (!bdi_cap_stable_pages_required(bdi))
2313 return;
2314
2315 wait_on_page_writeback(page);
2316}
2317EXPORT_SYMBOL_GPL(wait_for_stable_page);
2318