1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include <linux/fs.h>
21#include <linux/file.h>
22#include <linux/pagemap.h>
23#include <linux/splice.h>
24#include <linux/memcontrol.h>
25#include <linux/mm_inline.h>
26#include <linux/swap.h>
27#include <linux/writeback.h>
28#include <linux/buffer_head.h>
29#include <linux/module.h>
30#include <linux/syscalls.h>
31#include <linux/uio.h>
32#include <linux/security.h>
33
34
35
36
37
38
39
40static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
41 struct pipe_buffer *buf)
42{
43 struct page *page = buf->page;
44 struct address_space *mapping;
45
46 lock_page(page);
47
48 mapping = page_mapping(page);
49 if (mapping) {
50 WARN_ON(!PageUptodate(page));
51
52
53
54
55
56
57
58
59
60 wait_on_page_writeback(page);
61
62 if (page_has_private(page) &&
63 !try_to_release_page(page, GFP_KERNEL))
64 goto out_unlock;
65
66
67
68
69
70 if (remove_mapping(mapping, page)) {
71 buf->flags |= PIPE_BUF_FLAG_LRU;
72 return 0;
73 }
74 }
75
76
77
78
79
80out_unlock:
81 unlock_page(page);
82 return 1;
83}
84
85static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
86 struct pipe_buffer *buf)
87{
88 page_cache_release(buf->page);
89 buf->flags &= ~PIPE_BUF_FLAG_LRU;
90}
91
92
93
94
95
96static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
97 struct pipe_buffer *buf)
98{
99 struct page *page = buf->page;
100 int err;
101
102 if (!PageUptodate(page)) {
103 lock_page(page);
104
105
106
107
108
109 if (!page->mapping) {
110 err = -ENODATA;
111 goto error;
112 }
113
114
115
116
117 if (!PageUptodate(page)) {
118 err = -EIO;
119 goto error;
120 }
121
122
123
124
125 unlock_page(page);
126 }
127
128 return 0;
129error:
130 unlock_page(page);
131 return err;
132}
133
134static const struct pipe_buf_operations page_cache_pipe_buf_ops = {
135 .can_merge = 0,
136 .map = generic_pipe_buf_map,
137 .unmap = generic_pipe_buf_unmap,
138 .confirm = page_cache_pipe_buf_confirm,
139 .release = page_cache_pipe_buf_release,
140 .steal = page_cache_pipe_buf_steal,
141 .get = generic_pipe_buf_get,
142};
143
144static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
145 struct pipe_buffer *buf)
146{
147 if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
148 return 1;
149
150 buf->flags |= PIPE_BUF_FLAG_LRU;
151 return generic_pipe_buf_steal(pipe, buf);
152}
153
154static const struct pipe_buf_operations user_page_pipe_buf_ops = {
155 .can_merge = 0,
156 .map = generic_pipe_buf_map,
157 .unmap = generic_pipe_buf_unmap,
158 .confirm = generic_pipe_buf_confirm,
159 .release = page_cache_pipe_buf_release,
160 .steal = user_page_pipe_buf_steal,
161 .get = generic_pipe_buf_get,
162};
163
164
165
166
167
168
169
170
171
172
173
174
175ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
176 struct splice_pipe_desc *spd)
177{
178 unsigned int spd_pages = spd->nr_pages;
179 int ret, do_wakeup, page_nr;
180
181 ret = 0;
182 do_wakeup = 0;
183 page_nr = 0;
184
185 pipe_lock(pipe);
186
187 for (;;) {
188 if (!pipe->readers) {
189 send_sig(SIGPIPE, current, 0);
190 if (!ret)
191 ret = -EPIPE;
192 break;
193 }
194
195 if (pipe->nrbufs < PIPE_BUFFERS) {
196 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
197 struct pipe_buffer *buf = pipe->bufs + newbuf;
198
199 buf->page = spd->pages[page_nr];
200 buf->offset = spd->partial[page_nr].offset;
201 buf->len = spd->partial[page_nr].len;
202 buf->private = spd->partial[page_nr].private;
203 buf->ops = spd->ops;
204 if (spd->flags & SPLICE_F_GIFT)
205 buf->flags |= PIPE_BUF_FLAG_GIFT;
206
207 pipe->nrbufs++;
208 page_nr++;
209 ret += buf->len;
210
211 if (pipe->inode)
212 do_wakeup = 1;
213
214 if (!--spd->nr_pages)
215 break;
216 if (pipe->nrbufs < PIPE_BUFFERS)
217 continue;
218
219 break;
220 }
221
222 if (spd->flags & SPLICE_F_NONBLOCK) {
223 if (!ret)
224 ret = -EAGAIN;
225 break;
226 }
227
228 if (signal_pending(current)) {
229 if (!ret)
230 ret = -ERESTARTSYS;
231 break;
232 }
233
234 if (do_wakeup) {
235 smp_mb();
236 if (waitqueue_active(&pipe->wait))
237 wake_up_interruptible_sync(&pipe->wait);
238 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
239 do_wakeup = 0;
240 }
241
242 pipe->waiting_writers++;
243 pipe_wait(pipe);
244 pipe->waiting_writers--;
245 }
246
247 pipe_unlock(pipe);
248
249 if (do_wakeup) {
250 smp_mb();
251 if (waitqueue_active(&pipe->wait))
252 wake_up_interruptible(&pipe->wait);
253 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
254 }
255
256 while (page_nr < spd_pages)
257 spd->spd_release(spd, page_nr++);
258
259 return ret;
260}
261
262static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
263{
264 page_cache_release(spd->pages[i]);
265}
266
267static int
268__generic_file_splice_read(struct file *in, loff_t *ppos,
269 struct pipe_inode_info *pipe, size_t len,
270 unsigned int flags)
271{
272 struct address_space *mapping = in->f_mapping;
273 unsigned int loff, nr_pages, req_pages;
274 struct page *pages[PIPE_BUFFERS];
275 struct partial_page partial[PIPE_BUFFERS];
276 struct page *page;
277 pgoff_t index, end_index;
278 loff_t isize;
279 int error, page_nr;
280 struct splice_pipe_desc spd = {
281 .pages = pages,
282 .partial = partial,
283 .flags = flags,
284 .ops = &page_cache_pipe_buf_ops,
285 .spd_release = spd_release_page,
286 };
287
288 index = *ppos >> PAGE_CACHE_SHIFT;
289 loff = *ppos & ~PAGE_CACHE_MASK;
290 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
291 nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS);
292
293
294
295
296 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
297 index += spd.nr_pages;
298
299
300
301
302
303 if (spd.nr_pages < nr_pages)
304 page_cache_sync_readahead(mapping, &in->f_ra, in,
305 index, req_pages - spd.nr_pages);
306
307 error = 0;
308 while (spd.nr_pages < nr_pages) {
309
310
311
312
313 page = find_get_page(mapping, index);
314 if (!page) {
315
316
317
318 page = page_cache_alloc_cold(mapping);
319 if (!page)
320 break;
321
322 error = add_to_page_cache_lru(page, mapping, index,
323 mapping_gfp_mask(mapping));
324 if (unlikely(error)) {
325 page_cache_release(page);
326 if (error == -EEXIST)
327 continue;
328 break;
329 }
330
331
332
333
334 unlock_page(page);
335 }
336
337 pages[spd.nr_pages++] = page;
338 index++;
339 }
340
341
342
343
344
345 index = *ppos >> PAGE_CACHE_SHIFT;
346 nr_pages = spd.nr_pages;
347 spd.nr_pages = 0;
348 for (page_nr = 0; page_nr < nr_pages; page_nr++) {
349 unsigned int this_len;
350
351 if (!len)
352 break;
353
354
355
356
357 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
358 page = pages[page_nr];
359
360 if (PageReadahead(page))
361 page_cache_async_readahead(mapping, &in->f_ra, in,
362 page, index, req_pages - page_nr);
363
364
365
366
367 if (!PageUptodate(page)) {
368 lock_page(page);
369
370
371
372
373
374
375
376 if (!page->mapping) {
377 unlock_page(page);
378 page = find_or_create_page(mapping, index,
379 mapping_gfp_mask(mapping));
380
381 if (!page) {
382 error = -ENOMEM;
383 break;
384 }
385 page_cache_release(pages[page_nr]);
386 pages[page_nr] = page;
387 }
388
389
390
391 if (PageUptodate(page)) {
392 unlock_page(page);
393 goto fill_it;
394 }
395
396
397
398
399 error = mapping->a_ops->readpage(in, page);
400 if (unlikely(error)) {
401
402
403
404
405
406
407 if (error == AOP_TRUNCATED_PAGE)
408 error = 0;
409
410 break;
411 }
412 }
413fill_it:
414
415
416
417 isize = i_size_read(mapping->host);
418 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
419 if (unlikely(!isize || index > end_index))
420 break;
421
422
423
424
425
426 if (end_index == index) {
427 unsigned int plen;
428
429
430
431
432 plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
433 if (plen <= loff)
434 break;
435
436
437
438
439 this_len = min(this_len, plen - loff);
440 len = this_len;
441 }
442
443 partial[page_nr].offset = loff;
444 partial[page_nr].len = this_len;
445 len -= this_len;
446 loff = 0;
447 spd.nr_pages++;
448 index++;
449 }
450
451
452
453
454
455 while (page_nr < nr_pages)
456 page_cache_release(pages[page_nr++]);
457 in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
458
459 if (spd.nr_pages)
460 return splice_to_pipe(pipe, &spd);
461
462 return error;
463}
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
480 struct pipe_inode_info *pipe, size_t len,
481 unsigned int flags)
482{
483 loff_t isize, left;
484 int ret;
485
486 isize = i_size_read(in->f_mapping->host);
487 if (unlikely(*ppos >= isize))
488 return 0;
489
490 left = isize - *ppos;
491 if (unlikely(left < len))
492 len = left;
493
494 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
495 if (ret > 0) {
496 *ppos += ret;
497 file_accessed(in);
498 }
499
500 return ret;
501}
502EXPORT_SYMBOL(generic_file_splice_read);
503
504static const struct pipe_buf_operations default_pipe_buf_ops = {
505 .can_merge = 0,
506 .map = generic_pipe_buf_map,
507 .unmap = generic_pipe_buf_unmap,
508 .confirm = generic_pipe_buf_confirm,
509 .release = generic_pipe_buf_release,
510 .steal = generic_pipe_buf_steal,
511 .get = generic_pipe_buf_get,
512};
513
514static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
515 unsigned long vlen, loff_t offset)
516{
517 mm_segment_t old_fs;
518 loff_t pos = offset;
519 ssize_t res;
520
521 old_fs = get_fs();
522 set_fs(get_ds());
523
524 res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
525 set_fs(old_fs);
526
527 return res;
528}
529
530static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
531 loff_t pos)
532{
533 mm_segment_t old_fs;
534 ssize_t res;
535
536 old_fs = get_fs();
537 set_fs(get_ds());
538
539 res = vfs_write(file, (const char __user *)buf, count, &pos);
540 set_fs(old_fs);
541
542 return res;
543}
544
545ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
546 struct pipe_inode_info *pipe, size_t len,
547 unsigned int flags)
548{
549 unsigned int nr_pages;
550 unsigned int nr_freed;
551 size_t offset;
552 struct page *pages[PIPE_BUFFERS];
553 struct partial_page partial[PIPE_BUFFERS];
554 struct iovec vec[PIPE_BUFFERS];
555 pgoff_t index;
556 ssize_t res;
557 size_t this_len;
558 int error;
559 int i;
560 struct splice_pipe_desc spd = {
561 .pages = pages,
562 .partial = partial,
563 .flags = flags,
564 .ops = &default_pipe_buf_ops,
565 .spd_release = spd_release_page,
566 };
567
568 index = *ppos >> PAGE_CACHE_SHIFT;
569 offset = *ppos & ~PAGE_CACHE_MASK;
570 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
571
572 for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) {
573 struct page *page;
574
575 page = alloc_page(GFP_USER);
576 error = -ENOMEM;
577 if (!page)
578 goto err;
579
580 this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
581 vec[i].iov_base = (void __user *) page_address(page);
582 vec[i].iov_len = this_len;
583 pages[i] = page;
584 spd.nr_pages++;
585 len -= this_len;
586 offset = 0;
587 }
588
589 res = kernel_readv(in, vec, spd.nr_pages, *ppos);
590 if (res < 0) {
591 error = res;
592 goto err;
593 }
594
595 error = 0;
596 if (!res)
597 goto err;
598
599 nr_freed = 0;
600 for (i = 0; i < spd.nr_pages; i++) {
601 this_len = min_t(size_t, vec[i].iov_len, res);
602 partial[i].offset = 0;
603 partial[i].len = this_len;
604 if (!this_len) {
605 __free_page(pages[i]);
606 pages[i] = NULL;
607 nr_freed++;
608 }
609 res -= this_len;
610 }
611 spd.nr_pages -= nr_freed;
612
613 res = splice_to_pipe(pipe, &spd);
614 if (res > 0)
615 *ppos += res;
616
617 return res;
618
619err:
620 for (i = 0; i < spd.nr_pages; i++)
621 __free_page(pages[i]);
622
623 return error;
624}
625EXPORT_SYMBOL(default_file_splice_read);
626
627
628
629
630
631static int pipe_to_sendpage(struct pipe_inode_info *pipe,
632 struct pipe_buffer *buf, struct splice_desc *sd)
633{
634 struct file *file = sd->u.file;
635 loff_t pos = sd->pos;
636 int ret, more;
637
638 ret = buf->ops->confirm(pipe, buf);
639 if (!ret) {
640 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
641 if (file->f_op && file->f_op->sendpage)
642 ret = file->f_op->sendpage(file, buf->page, buf->offset,
643 sd->len, &pos, more);
644 else
645 ret = -EINVAL;
646 }
647
648 return ret;
649}
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
672 struct splice_desc *sd)
673{
674 struct file *file = sd->u.file;
675 struct address_space *mapping = file->f_mapping;
676 unsigned int offset, this_len;
677 struct page *page;
678 void *fsdata;
679 int ret;
680
681
682
683
684 ret = buf->ops->confirm(pipe, buf);
685 if (unlikely(ret))
686 return ret;
687
688 offset = sd->pos & ~PAGE_CACHE_MASK;
689
690 this_len = sd->len;
691 if (this_len + offset > PAGE_CACHE_SIZE)
692 this_len = PAGE_CACHE_SIZE - offset;
693
694 ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
695 AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
696 if (unlikely(ret))
697 goto out;
698
699 if (buf->page != page) {
700
701
702
703 char *src = buf->ops->map(pipe, buf, 1);
704 char *dst = kmap_atomic(page, KM_USER1);
705
706 memcpy(dst + offset, src + buf->offset, this_len);
707 flush_dcache_page(page);
708 kunmap_atomic(dst, KM_USER1);
709 buf->ops->unmap(pipe, buf, src);
710 }
711 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
712 page, fsdata);
713out:
714 return ret;
715}
716EXPORT_SYMBOL(pipe_to_file);
717
718static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
719{
720 smp_mb();
721 if (waitqueue_active(&pipe->wait))
722 wake_up_interruptible(&pipe->wait);
723 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
724}
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
747 splice_actor *actor)
748{
749 int ret;
750
751 while (pipe->nrbufs) {
752 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
753 const struct pipe_buf_operations *ops = buf->ops;
754
755 sd->len = buf->len;
756 if (sd->len > sd->total_len)
757 sd->len = sd->total_len;
758
759 ret = actor(pipe, buf, sd);
760 if (ret <= 0) {
761 if (ret == -ENODATA)
762 ret = 0;
763 return ret;
764 }
765 buf->offset += ret;
766 buf->len -= ret;
767
768 sd->num_spliced += ret;
769 sd->len -= ret;
770 sd->pos += ret;
771 sd->total_len -= ret;
772
773 if (!buf->len) {
774 buf->ops = NULL;
775 ops->release(pipe, buf);
776 pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
777 pipe->nrbufs--;
778 if (pipe->inode)
779 sd->need_wakeup = true;
780 }
781
782 if (!sd->total_len)
783 return 0;
784 }
785
786 return 1;
787}
788EXPORT_SYMBOL(splice_from_pipe_feed);
789
790
791
792
793
794
795
796
797
798
799
800int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
801{
802 while (!pipe->nrbufs) {
803 if (!pipe->writers)
804 return 0;
805
806 if (!pipe->waiting_writers && sd->num_spliced)
807 return 0;
808
809 if (sd->flags & SPLICE_F_NONBLOCK)
810 return -EAGAIN;
811
812 if (signal_pending(current))
813 return -ERESTARTSYS;
814
815 if (sd->need_wakeup) {
816 wakeup_pipe_writers(pipe);
817 sd->need_wakeup = false;
818 }
819
820 pipe_wait(pipe);
821 }
822
823 return 1;
824}
825EXPORT_SYMBOL(splice_from_pipe_next);
826
827
828
829
830
831
832
833
834
835
836void splice_from_pipe_begin(struct splice_desc *sd)
837{
838 sd->num_spliced = 0;
839 sd->need_wakeup = false;
840}
841EXPORT_SYMBOL(splice_from_pipe_begin);
842
843
844
845
846
847
848
849
850
851
852
853void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
854{
855 if (sd->need_wakeup)
856 wakeup_pipe_writers(pipe);
857}
858EXPORT_SYMBOL(splice_from_pipe_end);
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
874 splice_actor *actor)
875{
876 int ret;
877
878 splice_from_pipe_begin(sd);
879 do {
880 ret = splice_from_pipe_next(pipe, sd);
881 if (ret > 0)
882 ret = splice_from_pipe_feed(pipe, sd, actor);
883 } while (ret > 0);
884 splice_from_pipe_end(pipe, sd);
885
886 return sd->num_spliced ? sd->num_spliced : ret;
887}
888EXPORT_SYMBOL(__splice_from_pipe);
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
905 loff_t *ppos, size_t len, unsigned int flags,
906 splice_actor *actor)
907{
908 ssize_t ret;
909 struct splice_desc sd = {
910 .total_len = len,
911 .flags = flags,
912 .pos = *ppos,
913 .u.file = out,
914 };
915
916 pipe_lock(pipe);
917 ret = __splice_from_pipe(pipe, &sd, actor);
918 pipe_unlock(pipe);
919
920 return ret;
921}
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936ssize_t
937generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
938 loff_t *ppos, size_t len, unsigned int flags)
939{
940 struct address_space *mapping = out->f_mapping;
941 struct inode *inode = mapping->host;
942 struct splice_desc sd = {
943 .total_len = len,
944 .flags = flags,
945 .pos = *ppos,
946 .u.file = out,
947 };
948 ssize_t ret;
949
950 pipe_lock(pipe);
951
952 splice_from_pipe_begin(&sd);
953 do {
954 ret = splice_from_pipe_next(pipe, &sd);
955 if (ret <= 0)
956 break;
957
958 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
959 ret = file_remove_suid(out);
960 if (!ret) {
961 file_update_time(out);
962 ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
963 }
964 mutex_unlock(&inode->i_mutex);
965 } while (ret > 0);
966 splice_from_pipe_end(pipe, &sd);
967
968 pipe_unlock(pipe);
969
970 if (sd.num_spliced)
971 ret = sd.num_spliced;
972
973 if (ret > 0) {
974 unsigned long nr_pages;
975 int err;
976
977 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
978
979 err = generic_write_sync(out, *ppos, ret);
980 if (err)
981 ret = err;
982 else
983 *ppos += ret;
984 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
985 }
986
987 return ret;
988}
989
990EXPORT_SYMBOL(generic_file_splice_write);
991
992static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
993 struct splice_desc *sd)
994{
995 int ret;
996 void *data;
997
998 ret = buf->ops->confirm(pipe, buf);
999 if (ret)
1000 return ret;
1001
1002 data = buf->ops->map(pipe, buf, 0);
1003 ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
1004 buf->ops->unmap(pipe, buf, data);
1005
1006 return ret;
1007}
1008
1009static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
1010 struct file *out, loff_t *ppos,
1011 size_t len, unsigned int flags)
1012{
1013 ssize_t ret;
1014
1015 ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
1016 if (ret > 0)
1017 *ppos += ret;
1018
1019 return ret;
1020}
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
1036 loff_t *ppos, size_t len, unsigned int flags)
1037{
1038 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
1039}
1040
1041EXPORT_SYMBOL(generic_splice_sendpage);
1042
1043
1044
1045
1046static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
1047 loff_t *ppos, size_t len, unsigned int flags)
1048{
1049 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
1050 loff_t *, size_t, unsigned int);
1051 int ret;
1052
1053 if (unlikely(!(out->f_mode & FMODE_WRITE)))
1054 return -EBADF;
1055
1056 if (unlikely(out->f_flags & O_APPEND))
1057 return -EINVAL;
1058
1059 ret = rw_verify_area(WRITE, out, ppos, len);
1060 if (unlikely(ret < 0))
1061 return ret;
1062
1063 if (out->f_op && out->f_op->splice_write)
1064 splice_write = out->f_op->splice_write;
1065 else
1066 splice_write = default_file_splice_write;
1067
1068 return splice_write(pipe, out, ppos, len, flags);
1069}
1070
1071
1072
1073
1074static long do_splice_to(struct file *in, loff_t *ppos,
1075 struct pipe_inode_info *pipe, size_t len,
1076 unsigned int flags)
1077{
1078 ssize_t (*splice_read)(struct file *, loff_t *,
1079 struct pipe_inode_info *, size_t, unsigned int);
1080 int ret;
1081
1082 if (unlikely(!(in->f_mode & FMODE_READ)))
1083 return -EBADF;
1084
1085 ret = rw_verify_area(READ, in, ppos, len);
1086 if (unlikely(ret < 0))
1087 return ret;
1088
1089 if (in->f_op && in->f_op->splice_read)
1090 splice_read = in->f_op->splice_read;
1091 else
1092 splice_read = default_file_splice_read;
1093
1094 return splice_read(in, ppos, pipe, len, flags);
1095}
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1111 splice_direct_actor *actor)
1112{
1113 struct pipe_inode_info *pipe;
1114 long ret, bytes;
1115 umode_t i_mode;
1116 size_t len;
1117 int i, flags;
1118
1119
1120
1121
1122
1123
1124 i_mode = in->f_path.dentry->d_inode->i_mode;
1125 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
1126 return -EINVAL;
1127
1128
1129
1130
1131
1132 pipe = current->splice_pipe;
1133 if (unlikely(!pipe)) {
1134 pipe = alloc_pipe_info(NULL);
1135 if (!pipe)
1136 return -ENOMEM;
1137
1138
1139
1140
1141
1142
1143 pipe->readers = 1;
1144
1145 current->splice_pipe = pipe;
1146 }
1147
1148
1149
1150
1151 ret = 0;
1152 bytes = 0;
1153 len = sd->total_len;
1154 flags = sd->flags;
1155
1156
1157
1158
1159 sd->flags &= ~SPLICE_F_NONBLOCK;
1160
1161 while (len) {
1162 size_t read_len;
1163 loff_t pos = sd->pos, prev_pos = pos;
1164
1165 ret = do_splice_to(in, &pos, pipe, len, flags);
1166 if (unlikely(ret <= 0))
1167 goto out_release;
1168
1169 read_len = ret;
1170 sd->total_len = read_len;
1171
1172
1173
1174
1175
1176
1177 ret = actor(pipe, sd);
1178 if (unlikely(ret <= 0)) {
1179 sd->pos = prev_pos;
1180 goto out_release;
1181 }
1182
1183 bytes += ret;
1184 len -= ret;
1185 sd->pos = pos;
1186
1187 if (ret < read_len) {
1188 sd->pos = prev_pos + ret;
1189 goto out_release;
1190 }
1191 }
1192
1193done:
1194 pipe->nrbufs = pipe->curbuf = 0;
1195 file_accessed(in);
1196 return bytes;
1197
1198out_release:
1199
1200
1201
1202
1203 for (i = 0; i < PIPE_BUFFERS; i++) {
1204 struct pipe_buffer *buf = pipe->bufs + i;
1205
1206 if (buf->ops) {
1207 buf->ops->release(pipe, buf);
1208 buf->ops = NULL;
1209 }
1210 }
1211
1212 if (!bytes)
1213 bytes = ret;
1214
1215 goto done;
1216}
1217EXPORT_SYMBOL(splice_direct_to_actor);
1218
1219static int direct_splice_actor(struct pipe_inode_info *pipe,
1220 struct splice_desc *sd)
1221{
1222 struct file *file = sd->u.file;
1223
1224 return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
1225 sd->flags);
1226}
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1244 size_t len, unsigned int flags)
1245{
1246 struct splice_desc sd = {
1247 .len = len,
1248 .total_len = len,
1249 .flags = flags,
1250 .pos = *ppos,
1251 .u.file = out,
1252 };
1253 long ret;
1254
1255 ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1256 if (ret > 0)
1257 *ppos = sd.pos;
1258
1259 return ret;
1260}
1261
1262static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
1263 struct pipe_inode_info *opipe,
1264 size_t len, unsigned int flags);
1265
1266
1267
1268
1269
1270static inline struct pipe_inode_info *pipe_info(struct inode *inode)
1271{
1272 if (S_ISFIFO(inode->i_mode))
1273 return inode->i_pipe;
1274
1275 return NULL;
1276}
1277
1278
1279
1280
1281static long do_splice(struct file *in, loff_t __user *off_in,
1282 struct file *out, loff_t __user *off_out,
1283 size_t len, unsigned int flags)
1284{
1285 struct pipe_inode_info *ipipe;
1286 struct pipe_inode_info *opipe;
1287 loff_t offset, *off;
1288 long ret;
1289
1290 ipipe = pipe_info(in->f_path.dentry->d_inode);
1291 opipe = pipe_info(out->f_path.dentry->d_inode);
1292
1293 if (ipipe && opipe) {
1294 if (off_in || off_out)
1295 return -ESPIPE;
1296
1297 if (!(in->f_mode & FMODE_READ))
1298 return -EBADF;
1299
1300 if (!(out->f_mode & FMODE_WRITE))
1301 return -EBADF;
1302
1303
1304 if (ipipe == opipe)
1305 return -EINVAL;
1306
1307 return splice_pipe_to_pipe(ipipe, opipe, len, flags);
1308 }
1309
1310 if (ipipe) {
1311 if (off_in)
1312 return -ESPIPE;
1313 if (off_out) {
1314 if (!out->f_op || !out->f_op->llseek ||
1315 out->f_op->llseek == no_llseek)
1316 return -EINVAL;
1317 if (copy_from_user(&offset, off_out, sizeof(loff_t)))
1318 return -EFAULT;
1319 off = &offset;
1320 } else
1321 off = &out->f_pos;
1322
1323 ret = do_splice_from(ipipe, out, off, len, flags);
1324
1325 if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
1326 ret = -EFAULT;
1327
1328 return ret;
1329 }
1330
1331 if (opipe) {
1332 if (off_out)
1333 return -ESPIPE;
1334 if (off_in) {
1335 if (!in->f_op || !in->f_op->llseek ||
1336 in->f_op->llseek == no_llseek)
1337 return -EINVAL;
1338 if (copy_from_user(&offset, off_in, sizeof(loff_t)))
1339 return -EFAULT;
1340 off = &offset;
1341 } else
1342 off = &in->f_pos;
1343
1344 ret = do_splice_to(in, off, opipe, len, flags);
1345
1346 if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
1347 ret = -EFAULT;
1348
1349 return ret;
1350 }
1351
1352 return -EINVAL;
1353}
1354
1355
1356
1357
1358
1359
1360
1361
1362static int get_iovec_page_array(const struct iovec __user *iov,
1363 unsigned int nr_vecs, struct page **pages,
1364 struct partial_page *partial, int aligned)
1365{
1366 int buffers = 0, error = 0;
1367
1368 while (nr_vecs) {
1369 unsigned long off, npages;
1370 struct iovec entry;
1371 void __user *base;
1372 size_t len;
1373 int i;
1374
1375 error = -EFAULT;
1376 if (copy_from_user(&entry, iov, sizeof(entry)))
1377 break;
1378
1379 base = entry.iov_base;
1380 len = entry.iov_len;
1381
1382
1383
1384
1385 error = 0;
1386 if (unlikely(!len))
1387 break;
1388 error = -EFAULT;
1389 if (!access_ok(VERIFY_READ, base, len))
1390 break;
1391
1392
1393
1394
1395
1396 off = (unsigned long) base & ~PAGE_MASK;
1397
1398
1399
1400
1401
1402 error = -EINVAL;
1403 if (aligned && (off || len & ~PAGE_MASK))
1404 break;
1405
1406 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1407 if (npages > PIPE_BUFFERS - buffers)
1408 npages = PIPE_BUFFERS - buffers;
1409
1410 error = get_user_pages_fast((unsigned long)base, npages,
1411 0, &pages[buffers]);
1412
1413 if (unlikely(error <= 0))
1414 break;
1415
1416
1417
1418
1419 for (i = 0; i < error; i++) {
1420 const int plen = min_t(size_t, len, PAGE_SIZE - off);
1421
1422 partial[buffers].offset = off;
1423 partial[buffers].len = plen;
1424
1425 off = 0;
1426 len -= plen;
1427 buffers++;
1428 }
1429
1430
1431
1432
1433
1434
1435 if (len)
1436 break;
1437
1438
1439
1440
1441
1442
1443 if (error < npages || buffers == PIPE_BUFFERS)
1444 break;
1445
1446 nr_vecs--;
1447 iov++;
1448 }
1449
1450 if (buffers)
1451 return buffers;
1452
1453 return error;
1454}
1455
1456static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1457 struct splice_desc *sd)
1458{
1459 char *src;
1460 int ret;
1461
1462 ret = buf->ops->confirm(pipe, buf);
1463 if (unlikely(ret))
1464 return ret;
1465
1466
1467
1468
1469
1470 if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
1471 src = buf->ops->map(pipe, buf, 1);
1472 ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
1473 sd->len);
1474 buf->ops->unmap(pipe, buf, src);
1475 if (!ret) {
1476 ret = sd->len;
1477 goto out;
1478 }
1479 }
1480
1481
1482
1483
1484 src = buf->ops->map(pipe, buf, 0);
1485
1486 ret = sd->len;
1487 if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
1488 ret = -EFAULT;
1489
1490 buf->ops->unmap(pipe, buf, src);
1491out:
1492 if (ret > 0)
1493 sd->u.userptr += ret;
1494 return ret;
1495}
1496
1497
1498
1499
1500
1501static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
1502 unsigned long nr_segs, unsigned int flags)
1503{
1504 struct pipe_inode_info *pipe;
1505 struct splice_desc sd;
1506 ssize_t size;
1507 int error;
1508 long ret;
1509
1510 pipe = pipe_info(file->f_path.dentry->d_inode);
1511 if (!pipe)
1512 return -EBADF;
1513
1514 pipe_lock(pipe);
1515
1516 error = ret = 0;
1517 while (nr_segs) {
1518 void __user *base;
1519 size_t len;
1520
1521
1522
1523
1524 error = get_user(base, &iov->iov_base);
1525 if (unlikely(error))
1526 break;
1527 error = get_user(len, &iov->iov_len);
1528 if (unlikely(error))
1529 break;
1530
1531
1532
1533
1534 if (unlikely(!len))
1535 break;
1536 if (unlikely(!base)) {
1537 error = -EFAULT;
1538 break;
1539 }
1540
1541 if (unlikely(!access_ok(VERIFY_WRITE, base, len))) {
1542 error = -EFAULT;
1543 break;
1544 }
1545
1546 sd.len = 0;
1547 sd.total_len = len;
1548 sd.flags = flags;
1549 sd.u.userptr = base;
1550 sd.pos = 0;
1551
1552 size = __splice_from_pipe(pipe, &sd, pipe_to_user);
1553 if (size < 0) {
1554 if (!ret)
1555 ret = size;
1556
1557 break;
1558 }
1559
1560 ret += size;
1561
1562 if (size < len)
1563 break;
1564
1565 nr_segs--;
1566 iov++;
1567 }
1568
1569 pipe_unlock(pipe);
1570
1571 if (!ret)
1572 ret = error;
1573
1574 return ret;
1575}
1576
1577
1578
1579
1580
1581
1582static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1583 unsigned long nr_segs, unsigned int flags)
1584{
1585 struct pipe_inode_info *pipe;
1586 struct page *pages[PIPE_BUFFERS];
1587 struct partial_page partial[PIPE_BUFFERS];
1588 struct splice_pipe_desc spd = {
1589 .pages = pages,
1590 .partial = partial,
1591 .flags = flags,
1592 .ops = &user_page_pipe_buf_ops,
1593 .spd_release = spd_release_page,
1594 };
1595
1596 pipe = pipe_info(file->f_path.dentry->d_inode);
1597 if (!pipe)
1598 return -EBADF;
1599
1600 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
1601 flags & SPLICE_F_GIFT);
1602 if (spd.nr_pages <= 0)
1603 return spd.nr_pages;
1604
1605 return splice_to_pipe(pipe, &spd);
1606}
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
1625 unsigned long, nr_segs, unsigned int, flags)
1626{
1627 struct file *file;
1628 long error;
1629 int fput;
1630
1631 if (unlikely(nr_segs > UIO_MAXIOV))
1632 return -EINVAL;
1633 else if (unlikely(!nr_segs))
1634 return 0;
1635
1636 error = -EBADF;
1637 file = fget_light(fd, &fput);
1638 if (file) {
1639 if (file->f_mode & FMODE_WRITE)
1640 error = vmsplice_to_pipe(file, iov, nr_segs, flags);
1641 else if (file->f_mode & FMODE_READ)
1642 error = vmsplice_to_user(file, iov, nr_segs, flags);
1643
1644 fput_light(file, fput);
1645 }
1646
1647 return error;
1648}
1649
1650SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
1651 int, fd_out, loff_t __user *, off_out,
1652 size_t, len, unsigned int, flags)
1653{
1654 long error;
1655 struct file *in, *out;
1656 int fput_in, fput_out;
1657
1658 if (unlikely(!len))
1659 return 0;
1660
1661 error = -EBADF;
1662 in = fget_light(fd_in, &fput_in);
1663 if (in) {
1664 if (in->f_mode & FMODE_READ) {
1665 out = fget_light(fd_out, &fput_out);
1666 if (out) {
1667 if (out->f_mode & FMODE_WRITE)
1668 error = do_splice(in, off_in,
1669 out, off_out,
1670 len, flags);
1671 fput_light(out, fput_out);
1672 }
1673 }
1674
1675 fput_light(in, fput_in);
1676 }
1677
1678 return error;
1679}
1680
1681
1682
1683
1684
1685static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1686{
1687 int ret;
1688
1689
1690
1691
1692
1693 if (pipe->nrbufs)
1694 return 0;
1695
1696 ret = 0;
1697 pipe_lock(pipe);
1698
1699 while (!pipe->nrbufs) {
1700 if (signal_pending(current)) {
1701 ret = -ERESTARTSYS;
1702 break;
1703 }
1704 if (!pipe->writers)
1705 break;
1706 if (!pipe->waiting_writers) {
1707 if (flags & SPLICE_F_NONBLOCK) {
1708 ret = -EAGAIN;
1709 break;
1710 }
1711 }
1712 pipe_wait(pipe);
1713 }
1714
1715 pipe_unlock(pipe);
1716 return ret;
1717}
1718
1719
1720
1721
1722
1723static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1724{
1725 int ret;
1726
1727
1728
1729
1730
1731 if (pipe->nrbufs < PIPE_BUFFERS)
1732 return 0;
1733
1734 ret = 0;
1735 pipe_lock(pipe);
1736
1737 while (pipe->nrbufs >= PIPE_BUFFERS) {
1738 if (!pipe->readers) {
1739 send_sig(SIGPIPE, current, 0);
1740 ret = -EPIPE;
1741 break;
1742 }
1743 if (flags & SPLICE_F_NONBLOCK) {
1744 ret = -EAGAIN;
1745 break;
1746 }
1747 if (signal_pending(current)) {
1748 ret = -ERESTARTSYS;
1749 break;
1750 }
1751 pipe->waiting_writers++;
1752 pipe_wait(pipe);
1753 pipe->waiting_writers--;
1754 }
1755
1756 pipe_unlock(pipe);
1757 return ret;
1758}
1759
1760
1761
1762
1763static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
1764 struct pipe_inode_info *opipe,
1765 size_t len, unsigned int flags)
1766{
1767 struct pipe_buffer *ibuf, *obuf;
1768 int ret = 0, nbuf;
1769 bool input_wakeup = false;
1770
1771
1772retry:
1773 ret = ipipe_prep(ipipe, flags);
1774 if (ret)
1775 return ret;
1776
1777 ret = opipe_prep(opipe, flags);
1778 if (ret)
1779 return ret;
1780
1781
1782
1783
1784
1785
1786 pipe_double_lock(ipipe, opipe);
1787
1788 do {
1789 if (!opipe->readers) {
1790 send_sig(SIGPIPE, current, 0);
1791 if (!ret)
1792 ret = -EPIPE;
1793 break;
1794 }
1795
1796 if (!ipipe->nrbufs && !ipipe->writers)
1797 break;
1798
1799
1800
1801
1802
1803 if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) {
1804
1805 if (ret)
1806 break;
1807
1808 if (flags & SPLICE_F_NONBLOCK) {
1809 ret = -EAGAIN;
1810 break;
1811 }
1812
1813
1814
1815
1816
1817
1818 pipe_unlock(ipipe);
1819 pipe_unlock(opipe);
1820 goto retry;
1821 }
1822
1823 ibuf = ipipe->bufs + ipipe->curbuf;
1824 nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS;
1825 obuf = opipe->bufs + nbuf;
1826
1827 if (len >= ibuf->len) {
1828
1829
1830
1831 *obuf = *ibuf;
1832 ibuf->ops = NULL;
1833 opipe->nrbufs++;
1834 ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS;
1835 ipipe->nrbufs--;
1836 input_wakeup = true;
1837 } else {
1838
1839
1840
1841
1842 ibuf->ops->get(ipipe, ibuf);
1843 *obuf = *ibuf;
1844
1845
1846
1847
1848
1849 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1850
1851 obuf->len = len;
1852 opipe->nrbufs++;
1853 ibuf->offset += obuf->len;
1854 ibuf->len -= obuf->len;
1855 }
1856 ret += obuf->len;
1857 len -= obuf->len;
1858 } while (len);
1859
1860 pipe_unlock(ipipe);
1861 pipe_unlock(opipe);
1862
1863
1864
1865
1866 if (ret > 0) {
1867 smp_mb();
1868 if (waitqueue_active(&opipe->wait))
1869 wake_up_interruptible(&opipe->wait);
1870 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1871 }
1872 if (input_wakeup)
1873 wakeup_pipe_writers(ipipe);
1874
1875 return ret;
1876}
1877
1878
1879
1880
1881static int link_pipe(struct pipe_inode_info *ipipe,
1882 struct pipe_inode_info *opipe,
1883 size_t len, unsigned int flags)
1884{
1885 struct pipe_buffer *ibuf, *obuf;
1886 int ret = 0, i = 0, nbuf;
1887
1888
1889
1890
1891
1892
1893 pipe_double_lock(ipipe, opipe);
1894
1895 do {
1896 if (!opipe->readers) {
1897 send_sig(SIGPIPE, current, 0);
1898 if (!ret)
1899 ret = -EPIPE;
1900 break;
1901 }
1902
1903
1904
1905
1906
1907 if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
1908 break;
1909
1910 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
1911 nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
1912
1913
1914
1915
1916
1917 ibuf->ops->get(ipipe, ibuf);
1918
1919 obuf = opipe->bufs + nbuf;
1920 *obuf = *ibuf;
1921
1922
1923
1924
1925
1926 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1927
1928 if (obuf->len > len)
1929 obuf->len = len;
1930
1931 opipe->nrbufs++;
1932 ret += obuf->len;
1933 len -= obuf->len;
1934 i++;
1935 } while (len);
1936
1937
1938
1939
1940
1941 if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
1942 ret = -EAGAIN;
1943
1944 pipe_unlock(ipipe);
1945 pipe_unlock(opipe);
1946
1947
1948
1949
1950 if (ret > 0) {
1951 smp_mb();
1952 if (waitqueue_active(&opipe->wait))
1953 wake_up_interruptible(&opipe->wait);
1954 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1955 }
1956
1957 return ret;
1958}
1959
1960
1961
1962
1963
1964
1965
1966static long do_tee(struct file *in, struct file *out, size_t len,
1967 unsigned int flags)
1968{
1969 struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode);
1970 struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode);
1971 int ret = -EINVAL;
1972
1973
1974
1975
1976
1977 if (ipipe && opipe && ipipe != opipe) {
1978
1979
1980
1981
1982 ret = ipipe_prep(ipipe, flags);
1983 if (!ret) {
1984 ret = opipe_prep(opipe, flags);
1985 if (!ret)
1986 ret = link_pipe(ipipe, opipe, len, flags);
1987 }
1988 }
1989
1990 return ret;
1991}
1992
1993SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
1994{
1995 struct file *in;
1996 int error, fput_in;
1997
1998 if (unlikely(!len))
1999 return 0;
2000
2001 error = -EBADF;
2002 in = fget_light(fdin, &fput_in);
2003 if (in) {
2004 if (in->f_mode & FMODE_READ) {
2005 int fput_out;
2006 struct file *out = fget_light(fdout, &fput_out);
2007
2008 if (out) {
2009 if (out->f_mode & FMODE_WRITE)
2010 error = do_tee(in, out, len, flags);
2011 fput_light(out, fput_out);
2012 }
2013 }
2014 fput_light(in, fput_in);
2015 }
2016
2017 return error;
2018}
2019