1
2
3
4
5
6
7
8
9
10#include <linux/kernel.h>
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/blkdev.h>
15#include <linux/backing-dev.h>
16#include <linux/task_io_accounting_ops.h>
17#include <linux/pagevec.h>
18
19void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
20{
21}
22EXPORT_SYMBOL(default_unplug_io_fn);
23
24
25
26
27
28
29#define MAX_RA_PAGES (VM_MAX_READAHEAD*1024 / PAGE_CACHE_SIZE)
30#define MIN_RA_PAGES DIV_ROUND_UP(VM_MIN_READAHEAD*1024, PAGE_CACHE_SIZE)
31
32struct backing_dev_info default_backing_dev_info = {
33 .ra_pages = MAX_RA_PAGES,
34 .state = 0,
35 .capabilities = BDI_CAP_MAP_COPY,
36 .unplug_io_fn = default_unplug_io_fn,
37};
38EXPORT_SYMBOL_GPL(default_backing_dev_info);
39
40
41
42
43
44void
45file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
46{
47 ra->ra_pages = mapping->backing_dev_info->ra_pages;
48 ra->prev_index = -1;
49}
50EXPORT_SYMBOL_GPL(file_ra_state_init);
51
52
53
54
55static inline unsigned long get_max_readahead(struct file_ra_state *ra)
56{
57 return ra->ra_pages;
58}
59
60static inline unsigned long get_min_readahead(struct file_ra_state *ra)
61{
62 return MIN_RA_PAGES;
63}
64
65static inline void reset_ahead_window(struct file_ra_state *ra)
66{
67
68
69
70
71
72
73 ra->ahead_size += ra->ahead_start;
74 ra->ahead_start = 0;
75}
76
77static inline void ra_off(struct file_ra_state *ra)
78{
79 ra->start = 0;
80 ra->flags = 0;
81 ra->size = 0;
82 reset_ahead_window(ra);
83 return;
84}
85
86
87
88
89
90
91
92static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
93{
94 unsigned long newsize = roundup_pow_of_two(size);
95
96 if (newsize <= max / 32)
97 newsize = newsize * 4;
98 else if (newsize <= max / 4)
99 newsize = newsize * 2;
100 else
101 newsize = max;
102 return newsize;
103}
104
105
106
107
108
109
110static inline unsigned long get_next_ra_size(struct file_ra_state *ra)
111{
112 unsigned long max = get_max_readahead(ra);
113 unsigned long min = get_min_readahead(ra);
114 unsigned long cur = ra->size;
115 unsigned long newsize;
116
117 if (ra->flags & RA_FLAG_MISS) {
118 ra->flags &= ~RA_FLAG_MISS;
119 newsize = max((cur - 2), min);
120 } else if (cur < max / 16) {
121 newsize = 4 * cur;
122 } else {
123 newsize = 2 * cur;
124 }
125 return min(newsize, max);
126}
127
128#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
129
130
131
132
133
134
135
136
137
138
139
140int read_cache_pages(struct address_space *mapping, struct list_head *pages,
141 int (*filler)(void *, struct page *), void *data)
142{
143 struct page *page;
144 struct pagevec lru_pvec;
145 int ret = 0;
146
147 pagevec_init(&lru_pvec, 0);
148
149 while (!list_empty(pages)) {
150 page = list_to_page(pages);
151 list_del(&page->lru);
152 if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) {
153 page_cache_release(page);
154 continue;
155 }
156 ret = filler(data, page);
157 if (!pagevec_add(&lru_pvec, page))
158 __pagevec_lru_add(&lru_pvec);
159 if (ret) {
160 put_pages_list(pages);
161 break;
162 }
163 task_io_account_read(PAGE_CACHE_SIZE);
164 }
165 pagevec_lru_add(&lru_pvec);
166 return ret;
167}
168
169EXPORT_SYMBOL(read_cache_pages);
170
171static int read_pages(struct address_space *mapping, struct file *filp,
172 struct list_head *pages, unsigned nr_pages)
173{
174 unsigned page_idx;
175 struct pagevec lru_pvec;
176 int ret;
177
178 if (mapping->a_ops->readpages) {
179 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
180
181 put_pages_list(pages);
182 goto out;
183 }
184
185 pagevec_init(&lru_pvec, 0);
186 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
187 struct page *page = list_to_page(pages);
188 list_del(&page->lru);
189 if (!add_to_page_cache(page, mapping,
190 page->index, GFP_KERNEL)) {
191 mapping->a_ops->readpage(filp, page);
192 if (!pagevec_add(&lru_pvec, page))
193 __pagevec_lru_add(&lru_pvec);
194 } else
195 page_cache_release(page);
196 }
197 pagevec_lru_add(&lru_pvec);
198 ret = 0;
199out:
200 return ret;
201}
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274static int
275__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
276 pgoff_t offset, unsigned long nr_to_read)
277{
278 struct inode *inode = mapping->host;
279 struct page *page;
280 unsigned long end_index;
281 LIST_HEAD(page_pool);
282 int page_idx;
283 int ret = 0;
284 loff_t isize = i_size_read(inode);
285
286 if (isize == 0)
287 goto out;
288
289 end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
290
291
292
293
294 read_lock_irq(&mapping->tree_lock);
295 for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
296 pgoff_t page_offset = offset + page_idx;
297
298 if (page_offset > end_index)
299 break;
300
301 page = radix_tree_lookup(&mapping->page_tree, page_offset);
302 if (page)
303 continue;
304
305 read_unlock_irq(&mapping->tree_lock);
306 page = page_cache_alloc_cold(mapping);
307 read_lock_irq(&mapping->tree_lock);
308 if (!page)
309 break;
310 page->index = page_offset;
311 list_add(&page->lru, &page_pool);
312 ret++;
313 }
314 read_unlock_irq(&mapping->tree_lock);
315
316
317
318
319
320
321 if (ret)
322 read_pages(mapping, filp, &page_pool, ret);
323 BUG_ON(!list_empty(&page_pool));
324out:
325 return ret;
326}
327
328
329
330
331
332int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
333 pgoff_t offset, unsigned long nr_to_read)
334{
335 int ret = 0;
336
337 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
338 return -EINVAL;
339
340 while (nr_to_read) {
341 int err;
342
343 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE;
344
345 if (this_chunk > nr_to_read)
346 this_chunk = nr_to_read;
347 err = __do_page_cache_readahead(mapping, filp,
348 offset, this_chunk);
349 if (err < 0) {
350 ret = err;
351 break;
352 }
353 ret += err;
354 offset += this_chunk;
355 nr_to_read -= this_chunk;
356 }
357 return ret;
358}
359
360
361
362
363
364
365
366static inline int check_ra_success(struct file_ra_state *ra,
367 unsigned long nr_to_read, unsigned long actual)
368{
369 if (actual == 0) {
370 ra->cache_hit += nr_to_read;
371 if (ra->cache_hit >= VM_MAX_CACHE_HIT) {
372 ra_off(ra);
373 ra->flags |= RA_FLAG_INCACHE;
374 return 0;
375 }
376 } else {
377 ra->cache_hit=0;
378 }
379 return 1;
380}
381
382
383
384
385
386
387
388
389int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
390 pgoff_t offset, unsigned long nr_to_read)
391{
392 if (bdi_read_congested(mapping->backing_dev_info))
393 return -1;
394
395 return __do_page_cache_readahead(mapping, filp, offset, nr_to_read);
396}
397
398
399
400
401
402
403
404
405static int
406blockable_page_cache_readahead(struct address_space *mapping, struct file *filp,
407 pgoff_t offset, unsigned long nr_to_read,
408 struct file_ra_state *ra, int block)
409{
410 int actual;
411
412 if (!block && bdi_read_congested(mapping->backing_dev_info))
413 return 0;
414
415 actual = __do_page_cache_readahead(mapping, filp, offset, nr_to_read);
416
417 return check_ra_success(ra, nr_to_read, actual);
418}
419
420static int make_ahead_window(struct address_space *mapping, struct file *filp,
421 struct file_ra_state *ra, int force)
422{
423 int block, ret;
424
425 ra->ahead_size = get_next_ra_size(ra);
426 ra->ahead_start = ra->start + ra->size;
427
428 block = force || (ra->prev_index >= ra->ahead_start);
429 ret = blockable_page_cache_readahead(mapping, filp,
430 ra->ahead_start, ra->ahead_size, ra, block);
431
432 if (!ret && !force) {
433
434
435
436
437
438
439
440
441
442
443
444
445 reset_ahead_window(ra);
446 }
447
448 return ret;
449}
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469unsigned long
470page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
471 struct file *filp, pgoff_t offset, unsigned long req_size)
472{
473 unsigned long max, newsize;
474 int sequential;
475
476
477
478
479
480 if (offset == ra->prev_index && --req_size)
481 ++offset;
482
483
484 sequential = (offset == ra->prev_index + 1);
485 ra->prev_index = offset;
486 ra->prev_offset = 0;
487
488 max = get_max_readahead(ra);
489 newsize = min(req_size, max);
490
491
492 if (newsize == 0 || (ra->flags & RA_FLAG_INCACHE))
493 goto out;
494
495 ra->prev_index += newsize - 1;
496
497
498
499
500
501
502 if (sequential && ra->size == 0) {
503 ra->size = get_init_ra_size(newsize, max);
504 ra->start = offset;
505 if (!blockable_page_cache_readahead(mapping, filp, offset,
506 ra->size, ra, 1))
507 goto out;
508
509
510
511
512
513
514
515
516
517 if (req_size >= max)
518 make_ahead_window(mapping, filp, ra, 1);
519
520 goto out;
521 }
522
523
524
525
526
527
528 if (!sequential) {
529 ra_off(ra);
530 blockable_page_cache_readahead(mapping, filp, offset,
531 newsize, ra, 1);
532 goto out;
533 }
534
535
536
537
538
539 if (ra->ahead_start == 0) {
540 if (!make_ahead_window(mapping, filp, ra, 0))
541 goto recheck;
542 }
543
544
545
546
547
548
549
550
551 if (ra->prev_index >= ra->ahead_start) {
552 ra->start = ra->ahead_start;
553 ra->size = ra->ahead_size;
554 make_ahead_window(mapping, filp, ra, 0);
555recheck:
556
557 ra->prev_index = min(ra->prev_index,
558 ra->ahead_start + ra->ahead_size - 1);
559 }
560
561out:
562 return ra->prev_index + 1;
563}
564EXPORT_SYMBOL_GPL(page_cache_readahead);
565
566
567
568
569
570
571
572
573
574
575void handle_ra_miss(struct address_space *mapping,
576 struct file_ra_state *ra, pgoff_t offset)
577{
578 ra->flags |= RA_FLAG_MISS;
579 ra->flags &= ~RA_FLAG_INCACHE;
580 ra->cache_hit = 0;
581}
582
583
584
585
586
587unsigned long max_sane_readahead(unsigned long nr)
588{
589 return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE)
590 + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
591}
592