1
2
3
4
5
6
7
8
9
10
11
12#include <linux/stat.h>
13#include <linux/sched.h>
14#include <linux/kernel.h>
15#include <linux/mm.h>
16#include <linux/shm.h>
17#include <linux/errno.h>
18#include <linux/mman.h>
19#include <linux/string.h>
20#include <linux/malloc.h>
21#include <linux/fs.h>
22#include <linux/locks.h>
23#include <linux/pagemap.h>
24#include <linux/swap.h>
25#include <linux/smp.h>
26#include <linux/smp_lock.h>
27
28#include <asm/system.h>
29#include <asm/pgtable.h>
30#include <asm/uaccess.h>
31
32
33
34
35
36
37
38
39unsigned long page_cache_size = 0;
40struct page * page_hash_table[PAGE_HASH_SIZE];
41
42
43
44
45
46#define release_page(page) __free_page((page))
47
48
49
50
51
52
53void invalidate_inode_pages(struct inode * inode)
54{
55 struct page ** p;
56 struct page * page;
57
58 p = &inode->i_pages;
59 while ((page = *p) != NULL) {
60 if (PageLocked(page)) {
61 p = &page->next;
62 continue;
63 }
64 inode->i_nrpages--;
65 if ((*p = page->next) != NULL)
66 (*p)->prev = page->prev;
67 page->next = NULL;
68 page->prev = NULL;
69 remove_page_from_hash_queue(page);
70 page->inode = NULL;
71 __free_page(page);
72 continue;
73 }
74}
75
76
77
78
79
80void truncate_inode_pages(struct inode * inode, unsigned long start)
81{
82 struct page ** p;
83 struct page * page;
84
85repeat:
86 p = &inode->i_pages;
87 while ((page = *p) != NULL) {
88 unsigned long offset = page->offset;
89
90
91 if (offset >= start) {
92 if (PageLocked(page)) {
93 wait_on_page(page);
94 goto repeat;
95 }
96 inode->i_nrpages--;
97 if ((*p = page->next) != NULL)
98 (*p)->prev = page->prev;
99 page->next = NULL;
100 page->prev = NULL;
101 remove_page_from_hash_queue(page);
102 page->inode = NULL;
103 __free_page(page);
104 continue;
105 }
106 p = &page->next;
107 offset = start - offset;
108
109 if (offset < PAGE_SIZE) {
110 unsigned long address = page_address(page);
111 memset((void *) (offset + address), 0, PAGE_SIZE - offset);
112 flush_page_to_ram(address);
113 }
114 }
115}
116
117int shrink_mmap(int priority, int dma)
118{
119 static unsigned long clock = 0;
120 struct page * page;
121 unsigned long limit = num_physpages;
122 struct buffer_head *tmp, *bh;
123 int count_max, count_min;
124
125 count_max = (limit<<1) >> (priority>>1);
126 count_min = (limit<<1) >> (priority);
127
128 page = mem_map + clock;
129 do {
130 count_max--;
131 if (page->inode || page->buffers)
132 count_min--;
133
134 if (PageLocked(page))
135 goto next;
136 if (dma && !PageDMA(page))
137 goto next;
138
139
140 bh = page->buffers;
141 if (bh) {
142 tmp = bh;
143 do {
144 if (buffer_touched(tmp)) {
145 clear_bit(BH_Touched, &tmp->b_state);
146 set_bit(PG_referenced, &page->flags);
147 }
148 tmp = tmp->b_this_page;
149 } while (tmp != bh);
150 }
151
152
153
154
155
156
157
158 switch (atomic_read(&page->count)) {
159 case 1:
160
161 if (test_and_clear_bit(PG_referenced, &page->flags))
162 break;
163
164
165 if (page->inode) {
166 remove_page_from_hash_queue(page);
167 remove_page_from_inode_queue(page);
168 __free_page(page);
169 return 1;
170 }
171
172
173 if (bh && try_to_free_buffer(bh, &bh, 6))
174 return 1;
175 break;
176
177 default:
178
179 set_bit(PG_referenced, &page->flags);
180
181 case 0:
182
183 }
184next:
185 page++;
186 clock++;
187 if (clock >= limit) {
188 clock = 0;
189 page = mem_map;
190 }
191 } while (count_max > 0 && count_min > 0);
192 return 0;
193}
194
195
196
197
198
199
200
201unsigned long page_unuse(unsigned long page)
202{
203 struct page * p = mem_map + MAP_NR(page);
204 int count = atomic_read(&p->count);
205
206 if (count != 2)
207 return count;
208 if (!p->inode)
209 return count;
210 remove_page_from_hash_queue(p);
211 remove_page_from_inode_queue(p);
212 free_page(page);
213 return 1;
214}
215
216
217
218
219
220void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
221{
222 unsigned long offset, len;
223
224 offset = (pos & ~PAGE_MASK);
225 pos = pos & PAGE_MASK;
226 len = PAGE_SIZE - offset;
227 do {
228 struct page * page;
229
230 if (len > count)
231 len = count;
232 page = find_page(inode, pos);
233 if (page) {
234 wait_on_page(page);
235 memcpy((void *) (offset + page_address(page)), buf, len);
236 release_page(page);
237 }
238 count -= len;
239 buf += len;
240 len = PAGE_SIZE;
241 offset = 0;
242 pos += PAGE_SIZE;
243 } while (count);
244}
245
246static inline void add_to_page_cache(struct page * page,
247 struct inode * inode, unsigned long offset,
248 struct page **hash)
249{
250 atomic_inc(&page->count);
251 page->flags &= ~((1 << PG_uptodate) | (1 << PG_error));
252 page->offset = offset;
253 add_page_to_inode_queue(inode, page);
254 __add_page_to_hash_queue(page, hash);
255}
256
257
258
259
260
261
262static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
263{
264 struct page * page;
265 struct page ** hash;
266
267 offset &= PAGE_MASK;
268 switch (page_cache) {
269 case 0:
270 page_cache = __get_free_page(GFP_KERNEL);
271 if (!page_cache)
272 break;
273 default:
274 if (offset >= inode->i_size)
275 break;
276 hash = page_hash(inode, offset);
277 page = __find_page(inode, offset, *hash);
278 if (!page) {
279
280
281
282 page = mem_map + MAP_NR(page_cache);
283 add_to_page_cache(page, inode, offset, hash);
284 inode->i_op->readpage(inode, page);
285 page_cache = 0;
286 }
287 release_page(page);
288 }
289 return page_cache;
290}
291
292
293
294
295
296
297
298
299void __wait_on_page(struct page *page)
300{
301 struct wait_queue wait = { current, NULL };
302
303 add_wait_queue(&page->wait, &wait);
304repeat:
305 run_task_queue(&tq_disk);
306 current->state = TASK_UNINTERRUPTIBLE;
307 if (PageLocked(page)) {
308 schedule();
309 goto repeat;
310 }
311 remove_wait_queue(&page->wait, &wait);
312 current->state = TASK_RUNNING;
313}
314
315#if 0
316#define PROFILE_READAHEAD
317#define DEBUG_READAHEAD
318#endif
319
320
321
322
323
324
325
326
327
328
329
330
331#ifdef PROFILE_READAHEAD
332
333#define PROFILE_MAXREADCOUNT 1000
334
335static unsigned long total_reada;
336static unsigned long total_async;
337static unsigned long total_ramax;
338static unsigned long total_ralen;
339static unsigned long total_rawin;
340
341static void profile_readahead(int async, struct file *filp)
342{
343 unsigned long flags;
344
345 ++total_reada;
346 if (async)
347 ++total_async;
348
349 total_ramax += filp->f_ramax;
350 total_ralen += filp->f_ralen;
351 total_rawin += filp->f_rawin;
352
353 if (total_reada > PROFILE_MAXREADCOUNT) {
354 save_flags(flags);
355 cli();
356 if (!(total_reada > PROFILE_MAXREADCOUNT)) {
357 restore_flags(flags);
358 return;
359 }
360
361 printk("Readahead average: max=%ld, len=%ld, win=%ld, async=%ld%%\n",
362 total_ramax/total_reada,
363 total_ralen/total_reada,
364 total_rawin/total_reada,
365 (total_async*100)/total_reada);
366#ifdef DEBUG_READAHEAD
367 printk("Readahead snapshot: max=%ld, len=%ld, win=%ld, raend=%ld\n",
368 filp->f_ramax, filp->f_ralen, filp->f_rawin, filp->f_raend);
369#endif
370
371 total_reada = 0;
372 total_async = 0;
373 total_ramax = 0;
374 total_ralen = 0;
375 total_rawin = 0;
376
377 restore_flags(flags);
378 }
379}
380#endif
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK)
439
440#if 0
441#define MAX_READAHEAD PageAlignSize(4096*7)
442#define MIN_READAHEAD PageAlignSize(4096*2)
443#else
444#define MAX_READAHEAD PageAlignSize(4096*18)
445#define MIN_READAHEAD PageAlignSize(4096*3)
446#endif
447
448static inline unsigned long generic_file_readahead(int reada_ok, struct file * filp, struct inode * inode,
449 unsigned long ppos, struct page * page,
450 unsigned long page_cache)
451{
452 unsigned long max_ahead, ahead;
453 unsigned long raend;
454
455 raend = filp->f_raend & PAGE_MASK;
456 max_ahead = 0;
457
458
459
460
461
462
463
464
465
466 if (PageLocked(page)) {
467 if (!filp->f_ralen || ppos >= raend || ppos + filp->f_ralen < raend) {
468 raend = ppos;
469 if (raend < inode->i_size)
470 max_ahead = filp->f_ramax;
471 filp->f_rawin = 0;
472 filp->f_ralen = PAGE_SIZE;
473 if (!max_ahead) {
474 filp->f_raend = ppos + filp->f_ralen;
475 filp->f_rawin += filp->f_ralen;
476 }
477 }
478 }
479
480
481
482
483
484
485
486
487 else if (reada_ok && filp->f_ramax && raend >= PAGE_SIZE &&
488 ppos <= raend && ppos + filp->f_ralen >= raend) {
489
490
491
492
493
494
495 raend -= PAGE_SIZE;
496 if (raend < inode->i_size)
497 max_ahead = filp->f_ramax + PAGE_SIZE;
498
499 if (max_ahead) {
500 filp->f_rawin = filp->f_ralen;
501 filp->f_ralen = 0;
502 reada_ok = 2;
503 }
504 }
505
506
507
508
509
510 ahead = 0;
511 while (ahead < max_ahead) {
512 ahead += PAGE_SIZE;
513 page_cache = try_to_read_ahead(inode, raend + ahead, page_cache);
514 }
515
516
517
518
519
520
521
522
523
524
525
526 if (ahead) {
527 if (reada_ok == 2) {
528 run_task_queue(&tq_disk);
529 }
530
531 filp->f_ralen += ahead;
532 filp->f_rawin += filp->f_ralen;
533 filp->f_raend = raend + ahead + PAGE_SIZE;
534
535 filp->f_ramax += filp->f_ramax;
536
537 if (filp->f_ramax > MAX_READAHEAD)
538 filp->f_ramax = MAX_READAHEAD;
539
540#ifdef PROFILE_READAHEAD
541 profile_readahead((reada_ok == 2), filp);
542#endif
543 }
544
545 return page_cache;
546}
547
548
549
550
551
552
553
554
555
556
557
558long generic_file_read(struct inode * inode, struct file * filp,
559 char * buf, unsigned long count)
560{
561 int error, read;
562 unsigned long pos, ppos, page_cache;
563 int reada_ok;
564
565 if (!access_ok(VERIFY_WRITE, buf, count))
566 return -EFAULT;
567 if (!count)
568 return 0;
569 error = 0;
570 read = 0;
571 page_cache = 0;
572
573 pos = filp->f_pos;
574 ppos = pos & PAGE_MASK;
575
576
577
578
579
580
581
582 if (ppos > filp->f_raend || ppos + filp->f_rawin < filp->f_raend) {
583 reada_ok = 0;
584 filp->f_raend = 0;
585 filp->f_ralen = 0;
586 filp->f_ramax = 0;
587 filp->f_rawin = 0;
588 } else {
589 reada_ok = 1;
590 }
591
592
593
594
595
596
597
598 if (pos + count <= (PAGE_SIZE >> 1)) {
599 filp->f_ramax = 0;
600 } else {
601 unsigned long needed;
602
603 needed = ((pos + count) & PAGE_MASK) - ppos;
604
605 if (filp->f_ramax < needed)
606 filp->f_ramax = needed;
607
608 if (reada_ok && filp->f_ramax < MIN_READAHEAD)
609 filp->f_ramax = MIN_READAHEAD;
610 if (filp->f_ramax > MAX_READAHEAD)
611 filp->f_ramax = MAX_READAHEAD;
612 }
613
614 for (;;) {
615 struct page *page, **hash;
616
617 if (pos >= inode->i_size)
618 break;
619
620
621
622
623 hash = page_hash(inode, pos & PAGE_MASK);
624 page = __find_page(inode, pos & PAGE_MASK, *hash);
625 if (!page)
626 goto no_cached_page;
627
628found_page:
629
630
631
632
633
634
635
636 if (PageUptodate(page) || PageLocked(page))
637 page_cache = generic_file_readahead(reada_ok, filp, inode, pos & PAGE_MASK, page, page_cache);
638 else if (reada_ok && filp->f_ramax > MIN_READAHEAD)
639 filp->f_ramax = MIN_READAHEAD;
640
641 wait_on_page(page);
642
643 if (!PageUptodate(page))
644 goto page_read_error;
645
646success:
647
648
649
650
651 {
652 unsigned long offset, nr;
653
654 offset = pos & ~PAGE_MASK;
655 nr = PAGE_SIZE - offset;
656 if (nr > count)
657 nr = count;
658 if (nr > inode->i_size - pos)
659 nr = inode->i_size - pos;
660 nr -= copy_to_user(buf, (void *) (page_address(page) + offset), nr);
661 release_page(page);
662 error = -EFAULT;
663 if (!nr)
664 break;
665 buf += nr;
666 pos += nr;
667 read += nr;
668 count -= nr;
669 if (count)
670 continue;
671 break;
672 }
673
674no_cached_page:
675
676
677
678
679 if (!page_cache) {
680 page_cache = __get_free_page(GFP_KERNEL);
681
682
683
684
685 if (page_cache)
686 continue;
687 error = -ENOMEM;
688 break;
689 }
690
691
692
693
694 page = mem_map + MAP_NR(page_cache);
695 page_cache = 0;
696 add_to_page_cache(page, inode, pos & PAGE_MASK, hash);
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713 if (reada_ok && filp->f_ramax > MIN_READAHEAD)
714 filp->f_ramax = MIN_READAHEAD;
715
716 error = inode->i_op->readpage(inode, page);
717 if (!error)
718 goto found_page;
719 release_page(page);
720 break;
721
722page_read_error:
723
724
725
726
727
728 error = inode->i_op->readpage(inode, page);
729 if (!error) {
730 wait_on_page(page);
731 if (PageUptodate(page) && !PageError(page))
732 goto success;
733 error = -EIO;
734 }
735 release_page(page);
736 break;
737 }
738
739 filp->f_pos = pos;
740 filp->f_reada = 1;
741 if (page_cache)
742 free_page(page_cache);
743 UPDATE_ATIME(inode)
744 if (!read)
745 read = error;
746 return read;
747}
748
749
750
751
752
753
754
755
756
757
758
759
760
761static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
762{
763 unsigned long offset;
764 struct page * page, **hash;
765 struct inode * inode = area->vm_dentry->d_inode;
766 unsigned long old_page, new_page;
767
768 new_page = 0;
769 offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
770 if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
771 goto no_page;
772
773
774
775
776 hash = page_hash(inode, offset);
777 page = __find_page(inode, offset, *hash);
778 if (!page)
779 goto no_cached_page;
780
781found_page:
782
783
784
785
786
787 if (no_share && !new_page) {
788 new_page = __get_free_page(GFP_KERNEL);
789 if (!new_page)
790 goto failure;
791 }
792
793 if (PageLocked(page))
794 goto page_locked_wait;
795 if (!PageUptodate(page))
796 goto page_read_error;
797
798success:
799
800
801
802
803 old_page = page_address(page);
804 if (!no_share) {
805
806
807
808
809 if (new_page)
810 free_page(new_page);
811
812 flush_page_to_ram(old_page);
813 return old_page;
814 }
815
816
817
818
819 copy_page(new_page, old_page);
820 flush_page_to_ram(new_page);
821 release_page(page);
822 return new_page;
823
824no_cached_page:
825 new_page = __get_free_page(GFP_KERNEL);
826 if (!new_page)
827 goto no_page;
828
829
830
831
832
833
834
835 page = find_page(inode, offset);
836 if (page)
837 goto found_page;
838
839
840
841
842 page = mem_map + MAP_NR(new_page);
843 new_page = 0;
844 add_to_page_cache(page, inode, offset, hash);
845
846 if (inode->i_op->readpage(inode, page) != 0)
847 goto failure;
848
849
850
851
852 if (PageLocked(page))
853 new_page = try_to_read_ahead(inode, offset + PAGE_SIZE, 0);
854 goto found_page;
855
856page_locked_wait:
857 __wait_on_page(page);
858 if (PageUptodate(page))
859 goto success;
860
861page_read_error:
862
863
864
865
866
867
868 if (inode->i_op->readpage(inode, page) != 0)
869 goto failure;
870 wait_on_page(page);
871 if (PageError(page))
872 goto failure;
873 if (PageUptodate(page))
874 goto success;
875
876
877
878
879
880failure:
881 release_page(page);
882 if (new_page)
883 free_page(new_page);
884no_page:
885 return 0;
886}
887
888
889
890
891
892static inline int do_write_page(struct inode * inode, struct file * file,
893 const char * page, unsigned long offset)
894{
895 int retval;
896 unsigned long size;
897 unsigned long old_fs;
898
899 size = offset + PAGE_SIZE;
900
901 if (S_ISREG(inode->i_mode)) {
902 if (size > inode->i_size)
903 size = inode->i_size;
904
905 if (size < offset)
906 return -EIO;
907 }
908 size -= offset;
909 old_fs = get_fs();
910 set_fs(KERNEL_DS);
911 retval = -EIO;
912 if (size == file->f_op->write(inode, file, (const char *) page, size))
913 retval = 0;
914 set_fs(old_fs);
915 return retval;
916}
917
918static int filemap_write_page(struct vm_area_struct * vma,
919 unsigned long offset,
920 unsigned long page)
921{
922 int result;
923 struct file file;
924 struct dentry * dentry;
925 struct inode * inode;
926 struct buffer_head * bh;
927
928 bh = mem_map[MAP_NR(page)].buffers;
929 if (bh) {
930
931 struct buffer_head * tmp = bh;
932 do {
933
934
935
936
937 mark_buffer_dirty(tmp, 0);
938 tmp = tmp->b_this_page;
939 } while (tmp != bh);
940 return 0;
941 }
942
943 dentry = vma->vm_dentry;
944 inode = dentry->d_inode;
945 file.f_op = inode->i_op->default_file_ops;
946 if (!file.f_op->write)
947 return -EIO;
948 file.f_mode = 3;
949 file.f_flags = 0;
950 file.f_count = 1;
951 file.f_dentry = dentry;
952 file.f_pos = offset;
953 file.f_reada = 0;
954
955
956
957
958
959 dget(dentry);
960 down(&inode->i_sem);
961 result = do_write_page(inode, &file, (const char *) page, offset);
962 up(&inode->i_sem);
963 dput(dentry);
964 return result;
965}
966
967
968
969
970
971
972
973
974
975
976
977
978int filemap_swapout(struct vm_area_struct * vma,
979 unsigned long offset,
980 pte_t *page_table)
981{
982 int error;
983 unsigned long page = pte_page(*page_table);
984 unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
985
986 flush_cache_page(vma, (offset + vma->vm_start - vma->vm_offset));
987 set_pte(page_table, __pte(entry));
988 flush_tlb_page(vma, (offset + vma->vm_start - vma->vm_offset));
989 error = filemap_write_page(vma, offset, page);
990 if (pte_val(*page_table) == entry)
991 pte_clear(page_table);
992 return error;
993}
994
995
996
997
998
999
1000
1001static pte_t filemap_swapin(struct vm_area_struct * vma,
1002 unsigned long offset,
1003 unsigned long entry)
1004{
1005 unsigned long page = SWP_OFFSET(entry);
1006
1007 atomic_inc(&mem_map[page].count);
1008 page = (page << PAGE_SHIFT) + PAGE_OFFSET;
1009 return mk_pte(page,vma->vm_page_prot);
1010}
1011
1012
1013static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
1014 unsigned long address, unsigned int flags)
1015{
1016 pte_t pte = *ptep;
1017 unsigned long page;
1018 int error;
1019
1020 if (!(flags & MS_INVALIDATE)) {
1021 if (!pte_present(pte))
1022 return 0;
1023 if (!pte_dirty(pte))
1024 return 0;
1025 flush_page_to_ram(pte_page(pte));
1026 flush_cache_page(vma, address);
1027 set_pte(ptep, pte_mkclean(pte));
1028 flush_tlb_page(vma, address);
1029 page = pte_page(pte);
1030 atomic_inc(&mem_map[MAP_NR(page)].count);
1031 } else {
1032 if (pte_none(pte))
1033 return 0;
1034 flush_cache_page(vma, address);
1035 pte_clear(ptep);
1036 flush_tlb_page(vma, address);
1037 if (!pte_present(pte)) {
1038 swap_free(pte_val(pte));
1039 return 0;
1040 }
1041 page = pte_page(pte);
1042 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
1043 free_page(page);
1044 return 0;
1045 }
1046 }
1047 error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
1048 free_page(page);
1049 return error;
1050}
1051
1052static inline int filemap_sync_pte_range(pmd_t * pmd,
1053 unsigned long address, unsigned long size,
1054 struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
1055{
1056 pte_t * pte;
1057 unsigned long end;
1058 int error;
1059
1060 if (pmd_none(*pmd))
1061 return 0;
1062 if (pmd_bad(*pmd)) {
1063 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
1064 pmd_clear(pmd);
1065 return 0;
1066 }
1067 pte = pte_offset(pmd, address);
1068 offset += address & PMD_MASK;
1069 address &= ~PMD_MASK;
1070 end = address + size;
1071 if (end > PMD_SIZE)
1072 end = PMD_SIZE;
1073 error = 0;
1074 do {
1075 error |= filemap_sync_pte(pte, vma, address + offset, flags);
1076 address += PAGE_SIZE;
1077 pte++;
1078 } while (address < end);
1079 return error;
1080}
1081
1082static inline int filemap_sync_pmd_range(pgd_t * pgd,
1083 unsigned long address, unsigned long size,
1084 struct vm_area_struct *vma, unsigned int flags)
1085{
1086 pmd_t * pmd;
1087 unsigned long offset, end;
1088 int error;
1089
1090 if (pgd_none(*pgd))
1091 return 0;
1092 if (pgd_bad(*pgd)) {
1093 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
1094 pgd_clear(pgd);
1095 return 0;
1096 }
1097 pmd = pmd_offset(pgd, address);
1098 offset = address & PGDIR_MASK;
1099 address &= ~PGDIR_MASK;
1100 end = address + size;
1101 if (end > PGDIR_SIZE)
1102 end = PGDIR_SIZE;
1103 error = 0;
1104 do {
1105 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
1106 address = (address + PMD_SIZE) & PMD_MASK;
1107 pmd++;
1108 } while (address < end);
1109 return error;
1110}
1111
1112static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
1113 size_t size, unsigned int flags)
1114{
1115 pgd_t * dir;
1116 unsigned long end = address + size;
1117 int error = 0;
1118
1119 dir = pgd_offset(vma->vm_mm, address);
1120 flush_cache_range(vma->vm_mm, end - size, end);
1121 while (address < end) {
1122 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
1123 address = (address + PGDIR_SIZE) & PGDIR_MASK;
1124 dir++;
1125 }
1126 flush_tlb_range(vma->vm_mm, end - size, end);
1127 return error;
1128}
1129
1130
1131
1132
1133static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
1134{
1135 filemap_sync(vma, start, len, MS_ASYNC);
1136}
1137
1138
1139
1140
1141
1142
1143static struct vm_operations_struct file_shared_mmap = {
1144 NULL,
1145 NULL,
1146 filemap_unmap,
1147 NULL,
1148 filemap_sync,
1149 NULL,
1150 filemap_nopage,
1151 NULL,
1152 filemap_swapout,
1153 filemap_swapin,
1154};
1155
1156
1157
1158
1159
1160
1161
1162static struct vm_operations_struct file_private_mmap = {
1163 NULL,
1164 NULL,
1165 NULL,
1166 NULL,
1167 NULL,
1168 NULL,
1169 filemap_nopage,
1170 NULL,
1171 NULL,
1172 NULL,
1173};
1174
1175
1176
1177int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
1178{
1179 struct vm_operations_struct * ops;
1180 struct inode *inode = file->f_dentry->d_inode;
1181
1182 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
1183 ops = &file_shared_mmap;
1184
1185
1186 if (vma->vm_offset & (PAGE_SIZE - 1))
1187 return -EINVAL;
1188 } else {
1189 ops = &file_private_mmap;
1190 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
1191 return -EINVAL;
1192 }
1193 if (!inode->i_sb || !S_ISREG(inode->i_mode))
1194 return -EACCES;
1195 if (!inode->i_op || !inode->i_op->readpage)
1196 return -ENOEXEC;
1197 UPDATE_ATIME(inode);
1198 vma->vm_dentry = dget(file->f_dentry);
1199 vma->vm_ops = ops;
1200 return 0;
1201}
1202
1203
1204
1205
1206
1207
1208static int msync_interval(struct vm_area_struct * vma,
1209 unsigned long start, unsigned long end, int flags)
1210{
1211 if (!vma->vm_dentry)
1212 return 0;
1213 if (vma->vm_ops->sync) {
1214 int error;
1215 error = vma->vm_ops->sync(vma, start, end-start, flags);
1216 if (!error && (flags & MS_SYNC)) {
1217 struct dentry * dentry = vma->vm_dentry;
1218 if (dentry) {
1219 struct inode * inode = dentry->d_inode;
1220 down(&inode->i_sem);
1221 error = file_fsync(NULL,dentry);
1222 up(&inode->i_sem);
1223 }
1224 }
1225 return error;
1226 }
1227 return 0;
1228}
1229
1230asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
1231{
1232 unsigned long end;
1233 struct vm_area_struct * vma;
1234 int unmapped_error, error = -EINVAL;
1235
1236 lock_kernel();
1237 if (start & ~PAGE_MASK)
1238 goto out;
1239 len = (len + ~PAGE_MASK) & PAGE_MASK;
1240 end = start + len;
1241 if (end < start)
1242 goto out;
1243 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
1244 goto out;
1245 error = 0;
1246 if (end == start)
1247 goto out;
1248
1249
1250
1251
1252 vma = find_vma(current->mm, start);
1253 unmapped_error = 0;
1254 for (;;) {
1255
1256 error = -EFAULT;
1257 if (!vma)
1258 goto out;
1259
1260 if (start < vma->vm_start) {
1261 unmapped_error = -EFAULT;
1262 start = vma->vm_start;
1263 }
1264
1265 if (end <= vma->vm_end) {
1266 if (start < end) {
1267 error = msync_interval(vma, start, end, flags);
1268 if (error)
1269 goto out;
1270 }
1271 error = unmapped_error;
1272 goto out;
1273 }
1274
1275 error = msync_interval(vma, start, vma->vm_end, flags);
1276 if (error)
1277 goto out;
1278 start = vma->vm_end;
1279 vma = vma->vm_next;
1280 }
1281out:
1282 unlock_kernel();
1283 return error;
1284}
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302long
1303generic_file_write(struct inode *inode, struct file *file, const char *buf, unsigned long count)
1304{
1305 struct page *page, **hash;
1306 unsigned long page_cache = 0;
1307 unsigned long ppos, offset;
1308 unsigned int bytes, written;
1309 unsigned long pos;
1310 int status, sync, didread;
1311
1312 if (!inode->i_op || !inode->i_op->updatepage)
1313 return -EIO;
1314
1315 sync = file->f_flags & O_SYNC;
1316 pos = file->f_pos;
1317 written = 0;
1318 status = 0;
1319
1320 if (file->f_flags & O_APPEND)
1321 pos = inode->i_size;
1322
1323 while (count) {
1324
1325
1326
1327
1328 offset = (pos & ~PAGE_MASK);
1329 ppos = pos & PAGE_MASK;
1330
1331 if ((bytes = PAGE_SIZE - offset) > count)
1332 bytes = count;
1333
1334 hash = page_hash(inode, ppos);
1335 if (!(page = __find_page(inode, ppos, *hash))) {
1336 if (!page_cache) {
1337 page_cache = __get_free_page(GFP_KERNEL);
1338 if (!page_cache) {
1339 status = -ENOMEM;
1340 break;
1341 }
1342 continue;
1343 }
1344 page = mem_map + MAP_NR(page_cache);
1345 add_to_page_cache(page, inode, ppos, hash);
1346 page_cache = 0;
1347 }
1348
1349
1350
1351
1352
1353
1354 didread = 0;
1355page_wait:
1356 wait_on_page(page);
1357
1358
1359
1360
1361
1362
1363
1364 if (!PageUptodate(page)) {
1365 if (bytes < PAGE_SIZE && ppos < inode->i_size) {
1366 if (didread < 2)
1367 status = inode->i_op->readpage(inode, page);
1368 else
1369 status = -EIO;
1370 if (status < 0)
1371 goto done_with_page;
1372 didread++;
1373 goto page_wait;
1374 }
1375 set_bit(PG_uptodate, &page->flags);
1376 }
1377
1378
1379 status = inode->i_op->updatepage(inode, page, buf,
1380 offset, bytes, sync);
1381done_with_page:
1382 __free_page(page);
1383 if (status < 0)
1384 break;
1385
1386 written += status;
1387 count -= status;
1388 pos += status;
1389 buf += status;
1390 }
1391 file->f_pos = pos;
1392 if (pos > inode->i_size)
1393 inode->i_size = pos;
1394
1395 if (page_cache)
1396 free_page(page_cache);
1397 if (written)
1398 return written;
1399 return status;
1400}
1401