linux/fs/read_write.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/read_write.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/slab.h> 
   8#include <linux/stat.h>
   9#include <linux/fcntl.h>
  10#include <linux/file.h>
  11#include <linux/uio.h>
  12#include <linux/smp_lock.h>
  13#include <linux/fsnotify.h>
  14#include <linux/security.h>
  15#include <linux/module.h>
  16#include <linux/syscalls.h>
  17#include <linux/pagemap.h>
  18#include <linux/splice.h>
  19#include "read_write.h"
  20
  21#include <asm/uaccess.h>
  22#include <asm/unistd.h>
  23
  24const struct file_operations generic_ro_fops = {
  25        .llseek         = generic_file_llseek,
  26        .read           = do_sync_read,
  27        .aio_read       = generic_file_aio_read,
  28        .mmap           = generic_file_readonly_mmap,
  29        .splice_read    = generic_file_splice_read,
  30};
  31
  32EXPORT_SYMBOL(generic_ro_fops);
  33
  34loff_t
  35generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
  36{
  37        loff_t retval;
  38        struct inode *inode = file->f_mapping->host;
  39
  40        switch (origin) {
  41                case SEEK_END:
  42                        offset += inode->i_size;
  43                        break;
  44                case SEEK_CUR:
  45                        offset += file->f_pos;
  46        }
  47        retval = -EINVAL;
  48        if (offset>=0 && offset<=inode->i_sb->s_maxbytes) {
  49                /* Special lock needed here? */
  50                if (offset != file->f_pos) {
  51                        file->f_pos = offset;
  52                        file->f_version = 0;
  53                }
  54                retval = offset;
  55        }
  56        return retval;
  57}
  58EXPORT_SYMBOL(generic_file_llseek_unlocked);
  59
  60loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
  61{
  62        loff_t n;
  63        mutex_lock(&file->f_dentry->d_inode->i_mutex);
  64        n = generic_file_llseek_unlocked(file, offset, origin);
  65        mutex_unlock(&file->f_dentry->d_inode->i_mutex);
  66        return n;
  67}
  68EXPORT_SYMBOL(generic_file_llseek);
  69
  70loff_t no_llseek(struct file *file, loff_t offset, int origin)
  71{
  72        return -ESPIPE;
  73}
  74EXPORT_SYMBOL(no_llseek);
  75
  76loff_t default_llseek(struct file *file, loff_t offset, int origin)
  77{
  78        loff_t retval;
  79
  80        lock_kernel();
  81        switch (origin) {
  82                case SEEK_END:
  83                        offset += i_size_read(file->f_path.dentry->d_inode);
  84                        break;
  85                case SEEK_CUR:
  86                        offset += file->f_pos;
  87        }
  88        retval = -EINVAL;
  89        if (offset >= 0) {
  90                if (offset != file->f_pos) {
  91                        file->f_pos = offset;
  92                        file->f_version = 0;
  93                }
  94                retval = offset;
  95        }
  96        unlock_kernel();
  97        return retval;
  98}
  99EXPORT_SYMBOL(default_llseek);
 100
 101loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
 102{
 103        loff_t (*fn)(struct file *, loff_t, int);
 104
 105        fn = no_llseek;
 106        if (file->f_mode & FMODE_LSEEK) {
 107                fn = default_llseek;
 108                if (file->f_op && file->f_op->llseek)
 109                        fn = file->f_op->llseek;
 110        }
 111        return fn(file, offset, origin);
 112}
 113EXPORT_SYMBOL(vfs_llseek);
 114
 115SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
 116{
 117        off_t retval;
 118        struct file * file;
 119        int fput_needed;
 120
 121        retval = -EBADF;
 122        file = fget_light(fd, &fput_needed);
 123        if (!file)
 124                goto bad;
 125
 126        retval = -EINVAL;
 127        if (origin <= SEEK_MAX) {
 128                loff_t res = vfs_llseek(file, offset, origin);
 129                retval = res;
 130                if (res != (loff_t)retval)
 131                        retval = -EOVERFLOW;    /* LFS: should only happen on 32 bit platforms */
 132        }
 133        fput_light(file, fput_needed);
 134bad:
 135        return retval;
 136}
 137
 138#ifdef __ARCH_WANT_SYS_LLSEEK
 139SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 140                unsigned long, offset_low, loff_t __user *, result,
 141                unsigned int, origin)
 142{
 143        int retval;
 144        struct file * file;
 145        loff_t offset;
 146        int fput_needed;
 147
 148        retval = -EBADF;
 149        file = fget_light(fd, &fput_needed);
 150        if (!file)
 151                goto bad;
 152
 153        retval = -EINVAL;
 154        if (origin > SEEK_MAX)
 155                goto out_putf;
 156
 157        offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
 158                        origin);
 159
 160        retval = (int)offset;
 161        if (offset >= 0) {
 162                retval = -EFAULT;
 163                if (!copy_to_user(result, &offset, sizeof(offset)))
 164                        retval = 0;
 165        }
 166out_putf:
 167        fput_light(file, fput_needed);
 168bad:
 169        return retval;
 170}
 171#endif
 172
 173/*
 174 * rw_verify_area doesn't like huge counts. We limit
 175 * them to something that fits in "int" so that others
 176 * won't have to do range checks all the time.
 177 */
 178#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
 179
 180int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
 181{
 182        struct inode *inode;
 183        loff_t pos;
 184        int retval = -EINVAL;
 185
 186        inode = file->f_path.dentry->d_inode;
 187        if (unlikely((ssize_t) count < 0))
 188                return retval;
 189        pos = *ppos;
 190        if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
 191                return retval;
 192
 193        if (unlikely(inode->i_flock && mandatory_lock(inode))) {
 194                retval = locks_mandatory_area(
 195                        read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
 196                        inode, file, pos, count);
 197                if (retval < 0)
 198                        return retval;
 199        }
 200        retval = security_file_permission(file,
 201                                read_write == READ ? MAY_READ : MAY_WRITE);
 202        if (retval)
 203                return retval;
 204        return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
 205}
 206
 207static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
 208{
 209        set_current_state(TASK_UNINTERRUPTIBLE);
 210        if (!kiocbIsKicked(iocb))
 211                schedule();
 212        else
 213                kiocbClearKicked(iocb);
 214        __set_current_state(TASK_RUNNING);
 215}
 216
 217ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 218{
 219        struct iovec iov = { .iov_base = buf, .iov_len = len };
 220        struct kiocb kiocb;
 221        ssize_t ret;
 222
 223        init_sync_kiocb(&kiocb, filp);
 224        kiocb.ki_pos = *ppos;
 225        kiocb.ki_left = len;
 226
 227        for (;;) {
 228                ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
 229                if (ret != -EIOCBRETRY)
 230                        break;
 231                wait_on_retry_sync_kiocb(&kiocb);
 232        }
 233
 234        if (-EIOCBQUEUED == ret)
 235                ret = wait_on_sync_kiocb(&kiocb);
 236        *ppos = kiocb.ki_pos;
 237        return ret;
 238}
 239
 240EXPORT_SYMBOL(do_sync_read);
 241
 242ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 243{
 244        ssize_t ret;
 245
 246        if (!(file->f_mode & FMODE_READ))
 247                return -EBADF;
 248        if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
 249                return -EINVAL;
 250        if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
 251                return -EFAULT;
 252
 253        ret = rw_verify_area(READ, file, pos, count);
 254        if (ret >= 0) {
 255                count = ret;
 256                if (file->f_op->read)
 257                        ret = file->f_op->read(file, buf, count, pos);
 258                else
 259                        ret = do_sync_read(file, buf, count, pos);
 260                if (ret > 0) {
 261                        fsnotify_access(file->f_path.dentry);
 262                        add_rchar(current, ret);
 263                }
 264                inc_syscr(current);
 265        }
 266
 267        return ret;
 268}
 269
 270EXPORT_SYMBOL(vfs_read);
 271
 272ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 273{
 274        struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
 275        struct kiocb kiocb;
 276        ssize_t ret;
 277
 278        init_sync_kiocb(&kiocb, filp);
 279        kiocb.ki_pos = *ppos;
 280        kiocb.ki_left = len;
 281
 282        for (;;) {
 283                ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
 284                if (ret != -EIOCBRETRY)
 285                        break;
 286                wait_on_retry_sync_kiocb(&kiocb);
 287        }
 288
 289        if (-EIOCBQUEUED == ret)
 290                ret = wait_on_sync_kiocb(&kiocb);
 291        *ppos = kiocb.ki_pos;
 292        return ret;
 293}
 294
 295EXPORT_SYMBOL(do_sync_write);
 296
 297ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
 298{
 299        ssize_t ret;
 300
 301        if (!(file->f_mode & FMODE_WRITE))
 302                return -EBADF;
 303        if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
 304                return -EINVAL;
 305        if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 306                return -EFAULT;
 307
 308        ret = rw_verify_area(WRITE, file, pos, count);
 309        if (ret >= 0) {
 310                count = ret;
 311                if (file->f_op->write)
 312                        ret = file->f_op->write(file, buf, count, pos);
 313                else
 314                        ret = do_sync_write(file, buf, count, pos);
 315                if (ret > 0) {
 316                        fsnotify_modify(file->f_path.dentry);
 317                        add_wchar(current, ret);
 318                }
 319                inc_syscw(current);
 320        }
 321
 322        return ret;
 323}
 324
 325EXPORT_SYMBOL(vfs_write);
 326
 327static inline loff_t file_pos_read(struct file *file)
 328{
 329        return file->f_pos;
 330}
 331
 332static inline void file_pos_write(struct file *file, loff_t pos)
 333{
 334        file->f_pos = pos;
 335}
 336
 337SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
 338{
 339        struct file *file;
 340        ssize_t ret = -EBADF;
 341        int fput_needed;
 342
 343        file = fget_light(fd, &fput_needed);
 344        if (file) {
 345                loff_t pos = file_pos_read(file);
 346                ret = vfs_read(file, buf, count, &pos);
 347                file_pos_write(file, pos);
 348                fput_light(file, fput_needed);
 349        }
 350
 351        return ret;
 352}
 353
 354SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
 355                size_t, count)
 356{
 357        struct file *file;
 358        ssize_t ret = -EBADF;
 359        int fput_needed;
 360
 361        file = fget_light(fd, &fput_needed);
 362        if (file) {
 363                loff_t pos = file_pos_read(file);
 364                ret = vfs_write(file, buf, count, &pos);
 365                file_pos_write(file, pos);
 366                fput_light(file, fput_needed);
 367        }
 368
 369        return ret;
 370}
 371
 372SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
 373                        size_t count, loff_t pos)
 374{
 375        struct file *file;
 376        ssize_t ret = -EBADF;
 377        int fput_needed;
 378
 379        if (pos < 0)
 380                return -EINVAL;
 381
 382        file = fget_light(fd, &fput_needed);
 383        if (file) {
 384                ret = -ESPIPE;
 385                if (file->f_mode & FMODE_PREAD)
 386                        ret = vfs_read(file, buf, count, &pos);
 387                fput_light(file, fput_needed);
 388        }
 389
 390        return ret;
 391}
 392#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 393asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos)
 394{
 395        return SYSC_pread64((unsigned int) fd, (char __user *) buf,
 396                            (size_t) count, pos);
 397}
 398SYSCALL_ALIAS(sys_pread64, SyS_pread64);
 399#endif
 400
 401SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
 402                         size_t count, loff_t pos)
 403{
 404        struct file *file;
 405        ssize_t ret = -EBADF;
 406        int fput_needed;
 407
 408        if (pos < 0)
 409                return -EINVAL;
 410
 411        file = fget_light(fd, &fput_needed);
 412        if (file) {
 413                ret = -ESPIPE;
 414                if (file->f_mode & FMODE_PWRITE)  
 415                        ret = vfs_write(file, buf, count, &pos);
 416                fput_light(file, fput_needed);
 417        }
 418
 419        return ret;
 420}
 421#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 422asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos)
 423{
 424        return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf,
 425                             (size_t) count, pos);
 426}
 427SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64);
 428#endif
 429
 430/*
 431 * Reduce an iovec's length in-place.  Return the resulting number of segments
 432 */
 433unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 434{
 435        unsigned long seg = 0;
 436        size_t len = 0;
 437
 438        while (seg < nr_segs) {
 439                seg++;
 440                if (len + iov->iov_len >= to) {
 441                        iov->iov_len = to - len;
 442                        break;
 443                }
 444                len += iov->iov_len;
 445                iov++;
 446        }
 447        return seg;
 448}
 449EXPORT_SYMBOL(iov_shorten);
 450
 451ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 452                unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 453{
 454        struct kiocb kiocb;
 455        ssize_t ret;
 456
 457        init_sync_kiocb(&kiocb, filp);
 458        kiocb.ki_pos = *ppos;
 459        kiocb.ki_left = len;
 460        kiocb.ki_nbytes = len;
 461
 462        for (;;) {
 463                ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
 464                if (ret != -EIOCBRETRY)
 465                        break;
 466                wait_on_retry_sync_kiocb(&kiocb);
 467        }
 468
 469        if (ret == -EIOCBQUEUED)
 470                ret = wait_on_sync_kiocb(&kiocb);
 471        *ppos = kiocb.ki_pos;
 472        return ret;
 473}
 474
 475/* Do it by hand, with file-ops */
 476ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
 477                unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
 478{
 479        struct iovec *vector = iov;
 480        ssize_t ret = 0;
 481
 482        while (nr_segs > 0) {
 483                void __user *base;
 484                size_t len;
 485                ssize_t nr;
 486
 487                base = vector->iov_base;
 488                len = vector->iov_len;
 489                vector++;
 490                nr_segs--;
 491
 492                nr = fn(filp, base, len, ppos);
 493
 494                if (nr < 0) {
 495                        if (!ret)
 496                                ret = nr;
 497                        break;
 498                }
 499                ret += nr;
 500                if (nr != len)
 501                        break;
 502        }
 503
 504        return ret;
 505}
 506
 507/* A write operation does a read from user space and vice versa */
 508#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 509
 510ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 511                              unsigned long nr_segs, unsigned long fast_segs,
 512                              struct iovec *fast_pointer,
 513                              struct iovec **ret_pointer)
 514  {
 515        unsigned long seg;
 516        ssize_t ret;
 517        struct iovec *iov = fast_pointer;
 518
 519        /*
 520         * SuS says "The readv() function *may* fail if the iovcnt argument
 521         * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
 522         * traditionally returned zero for zero segments, so...
 523         */
 524        if (nr_segs == 0) {
 525                ret = 0;
 526                goto out;
 527        }
 528
 529        /*
 530         * First get the "struct iovec" from user memory and
 531         * verify all the pointers
 532         */
 533        if (nr_segs > UIO_MAXIOV) {
 534                ret = -EINVAL;
 535                goto out;
 536        }
 537        if (nr_segs > fast_segs) {
 538                iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
 539                if (iov == NULL) {
 540                        ret = -ENOMEM;
 541                        goto out;
 542                }
 543        }
 544        if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
 545                ret = -EFAULT;
 546                goto out;
 547        }
 548
 549        /*
 550         * According to the Single Unix Specification we should return EINVAL
 551         * if an element length is < 0 when cast to ssize_t or if the
 552         * total length would overflow the ssize_t return value of the
 553         * system call.
 554         */
 555        ret = 0;
 556        for (seg = 0; seg < nr_segs; seg++) {
 557                void __user *buf = iov[seg].iov_base;
 558                ssize_t len = (ssize_t)iov[seg].iov_len;
 559
 560                /* see if we we're about to use an invalid len or if
 561                 * it's about to overflow ssize_t */
 562                if (len < 0 || (ret + len < ret)) {
 563                        ret = -EINVAL;
 564                        goto out;
 565                }
 566                if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
 567                        ret = -EFAULT;
 568                        goto out;
 569                }
 570
 571                ret += len;
 572        }
 573out:
 574        *ret_pointer = iov;
 575        return ret;
 576}
 577
 578static ssize_t do_readv_writev(int type, struct file *file,
 579                               const struct iovec __user * uvector,
 580                               unsigned long nr_segs, loff_t *pos)
 581{
 582        size_t tot_len;
 583        struct iovec iovstack[UIO_FASTIOV];
 584        struct iovec *iov = iovstack;
 585        ssize_t ret;
 586        io_fn_t fn;
 587        iov_fn_t fnv;
 588
 589        if (!file->f_op) {
 590                ret = -EINVAL;
 591                goto out;
 592        }
 593
 594        ret = rw_copy_check_uvector(type, uvector, nr_segs,
 595                        ARRAY_SIZE(iovstack), iovstack, &iov);
 596        if (ret <= 0)
 597                goto out;
 598
 599        tot_len = ret;
 600        ret = rw_verify_area(type, file, pos, tot_len);
 601        if (ret < 0)
 602                goto out;
 603
 604        fnv = NULL;
 605        if (type == READ) {
 606                fn = file->f_op->read;
 607                fnv = file->f_op->aio_read;
 608        } else {
 609                fn = (io_fn_t)file->f_op->write;
 610                fnv = file->f_op->aio_write;
 611        }
 612
 613        if (fnv)
 614                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 615                                                pos, fnv);
 616        else
 617                ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 618
 619out:
 620        if (iov != iovstack)
 621                kfree(iov);
 622        if ((ret + (type == READ)) > 0) {
 623                if (type == READ)
 624                        fsnotify_access(file->f_path.dentry);
 625                else
 626                        fsnotify_modify(file->f_path.dentry);
 627        }
 628        return ret;
 629}
 630
 631ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 632                  unsigned long vlen, loff_t *pos)
 633{
 634        if (!(file->f_mode & FMODE_READ))
 635                return -EBADF;
 636        if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
 637                return -EINVAL;
 638
 639        return do_readv_writev(READ, file, vec, vlen, pos);
 640}
 641
 642EXPORT_SYMBOL(vfs_readv);
 643
 644ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 645                   unsigned long vlen, loff_t *pos)
 646{
 647        if (!(file->f_mode & FMODE_WRITE))
 648                return -EBADF;
 649        if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
 650                return -EINVAL;
 651
 652        return do_readv_writev(WRITE, file, vec, vlen, pos);
 653}
 654
 655EXPORT_SYMBOL(vfs_writev);
 656
 657SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
 658                unsigned long, vlen)
 659{
 660        struct file *file;
 661        ssize_t ret = -EBADF;
 662        int fput_needed;
 663
 664        file = fget_light(fd, &fput_needed);
 665        if (file) {
 666                loff_t pos = file_pos_read(file);
 667                ret = vfs_readv(file, vec, vlen, &pos);
 668                file_pos_write(file, pos);
 669                fput_light(file, fput_needed);
 670        }
 671
 672        if (ret > 0)
 673                add_rchar(current, ret);
 674        inc_syscr(current);
 675        return ret;
 676}
 677
 678SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
 679                unsigned long, vlen)
 680{
 681        struct file *file;
 682        ssize_t ret = -EBADF;
 683        int fput_needed;
 684
 685        file = fget_light(fd, &fput_needed);
 686        if (file) {
 687                loff_t pos = file_pos_read(file);
 688                ret = vfs_writev(file, vec, vlen, &pos);
 689                file_pos_write(file, pos);
 690                fput_light(file, fput_needed);
 691        }
 692
 693        if (ret > 0)
 694                add_wchar(current, ret);
 695        inc_syscw(current);
 696        return ret;
 697}
 698
 699static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 700                           size_t count, loff_t max)
 701{
 702        struct file * in_file, * out_file;
 703        struct inode * in_inode, * out_inode;
 704        loff_t pos;
 705        ssize_t retval;
 706        int fput_needed_in, fput_needed_out, fl;
 707
 708        /*
 709         * Get input file, and verify that it is ok..
 710         */
 711        retval = -EBADF;
 712        in_file = fget_light(in_fd, &fput_needed_in);
 713        if (!in_file)
 714                goto out;
 715        if (!(in_file->f_mode & FMODE_READ))
 716                goto fput_in;
 717        retval = -EINVAL;
 718        in_inode = in_file->f_path.dentry->d_inode;
 719        if (!in_inode)
 720                goto fput_in;
 721        if (!in_file->f_op || !in_file->f_op->splice_read)
 722                goto fput_in;
 723        retval = -ESPIPE;
 724        if (!ppos)
 725                ppos = &in_file->f_pos;
 726        else
 727                if (!(in_file->f_mode & FMODE_PREAD))
 728                        goto fput_in;
 729        retval = rw_verify_area(READ, in_file, ppos, count);
 730        if (retval < 0)
 731                goto fput_in;
 732        count = retval;
 733
 734        /*
 735         * Get output file, and verify that it is ok..
 736         */
 737        retval = -EBADF;
 738        out_file = fget_light(out_fd, &fput_needed_out);
 739        if (!out_file)
 740                goto fput_in;
 741        if (!(out_file->f_mode & FMODE_WRITE))
 742                goto fput_out;
 743        retval = -EINVAL;
 744        if (!out_file->f_op || !out_file->f_op->sendpage)
 745                goto fput_out;
 746        out_inode = out_file->f_path.dentry->d_inode;
 747        retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
 748        if (retval < 0)
 749                goto fput_out;
 750        count = retval;
 751
 752        if (!max)
 753                max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
 754
 755        pos = *ppos;
 756        retval = -EINVAL;
 757        if (unlikely(pos < 0))
 758                goto fput_out;
 759        if (unlikely(pos + count > max)) {
 760                retval = -EOVERFLOW;
 761                if (pos >= max)
 762                        goto fput_out;
 763                count = max - pos;
 764        }
 765
 766        fl = 0;
 767#if 0
 768        /*
 769         * We need to debate whether we can enable this or not. The
 770         * man page documents EAGAIN return for the output at least,
 771         * and the application is arguably buggy if it doesn't expect
 772         * EAGAIN on a non-blocking file descriptor.
 773         */
 774        if (in_file->f_flags & O_NONBLOCK)
 775                fl = SPLICE_F_NONBLOCK;
 776#endif
 777        retval = do_splice_direct(in_file, ppos, out_file, count, fl);
 778
 779        if (retval > 0) {
 780                add_rchar(current, retval);
 781                add_wchar(current, retval);
 782        }
 783
 784        inc_syscr(current);
 785        inc_syscw(current);
 786        if (*ppos > max)
 787                retval = -EOVERFLOW;
 788
 789fput_out:
 790        fput_light(out_file, fput_needed_out);
 791fput_in:
 792        fput_light(in_file, fput_needed_in);
 793out:
 794        return retval;
 795}
 796
 797SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
 798{
 799        loff_t pos;
 800        off_t off;
 801        ssize_t ret;
 802
 803        if (offset) {
 804                if (unlikely(get_user(off, offset)))
 805                        return -EFAULT;
 806                pos = off;
 807                ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
 808                if (unlikely(put_user(pos, offset)))
 809                        return -EFAULT;
 810                return ret;
 811        }
 812
 813        return do_sendfile(out_fd, in_fd, NULL, count, 0);
 814}
 815
 816SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
 817{
 818        loff_t pos;
 819        ssize_t ret;
 820
 821        if (offset) {
 822                if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
 823                        return -EFAULT;
 824                ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
 825                if (unlikely(put_user(pos, offset)))
 826                        return -EFAULT;
 827                return ret;
 828        }
 829
 830        return do_sendfile(out_fd, in_fd, NULL, count, 0);
 831}
 832
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.