linux/fs/read_write.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/read_write.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/slab.h> 
   8#include <linux/stat.h>
   9#include <linux/fcntl.h>
  10#include <linux/file.h>
  11#include <linux/uio.h>
  12#include <linux/fsnotify.h>
  13#include <linux/security.h>
  14#include <linux/export.h>
  15#include <linux/syscalls.h>
  16#include <linux/pagemap.h>
  17#include <linux/splice.h>
  18#include "read_write.h"
  19
  20#include <asm/uaccess.h>
  21#include <asm/unistd.h>
  22
  23const struct file_operations generic_ro_fops = {
  24        .llseek         = generic_file_llseek,
  25        .read           = do_sync_read,
  26        .aio_read       = generic_file_aio_read,
  27        .mmap           = generic_file_readonly_mmap,
  28        .splice_read    = generic_file_splice_read,
  29};
  30
  31EXPORT_SYMBOL(generic_ro_fops);
  32
  33static inline int unsigned_offsets(struct file *file)
  34{
  35        return file->f_mode & FMODE_UNSIGNED_OFFSET;
  36}
  37
  38static loff_t lseek_execute(struct file *file, struct inode *inode,
  39                loff_t offset, loff_t maxsize)
  40{
  41        if (offset < 0 && !unsigned_offsets(file))
  42                return -EINVAL;
  43        if (offset > maxsize)
  44                return -EINVAL;
  45
  46        if (offset != file->f_pos) {
  47                file->f_pos = offset;
  48                file->f_version = 0;
  49        }
  50        return offset;
  51}
  52
  53/**
  54 * generic_file_llseek_size - generic llseek implementation for regular files
  55 * @file:       file structure to seek on
  56 * @offset:     file offset to seek to
  57 * @origin:     type of seek
  58 * @size:       max size of this file in file system
  59 * @eof:        offset used for SEEK_END position
  60 *
  61 * This is a variant of generic_file_llseek that allows passing in a custom
  62 * maximum file size and a custom EOF position, for e.g. hashed directories
  63 *
  64 * Synchronization:
  65 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
  66 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
  67 * read/writes behave like SEEK_SET against seeks.
  68 */
  69loff_t
  70generic_file_llseek_size(struct file *file, loff_t offset, int origin,
  71                loff_t maxsize, loff_t eof)
  72{
  73        struct inode *inode = file->f_mapping->host;
  74
  75        switch (origin) {
  76        case SEEK_END:
  77                offset += eof;
  78                break;
  79        case SEEK_CUR:
  80                /*
  81                 * Here we special-case the lseek(fd, 0, SEEK_CUR)
  82                 * position-querying operation.  Avoid rewriting the "same"
  83                 * f_pos value back to the file because a concurrent read(),
  84                 * write() or lseek() might have altered it
  85                 */
  86                if (offset == 0)
  87                        return file->f_pos;
  88                /*
  89                 * f_lock protects against read/modify/write race with other
  90                 * SEEK_CURs. Note that parallel writes and reads behave
  91                 * like SEEK_SET.
  92                 */
  93                spin_lock(&file->f_lock);
  94                offset = lseek_execute(file, inode, file->f_pos + offset,
  95                                       maxsize);
  96                spin_unlock(&file->f_lock);
  97                return offset;
  98        case SEEK_DATA:
  99                /*
 100                 * In the generic case the entire file is data, so as long as
 101                 * offset isn't at the end of the file then the offset is data.
 102                 */
 103                if (offset >= eof)
 104                        return -ENXIO;
 105                break;
 106        case SEEK_HOLE:
 107                /*
 108                 * There is a virtual hole at the end of the file, so as long as
 109                 * offset isn't i_size or larger, return i_size.
 110                 */
 111                if (offset >= eof)
 112                        return -ENXIO;
 113                offset = eof;
 114                break;
 115        }
 116
 117        return lseek_execute(file, inode, offset, maxsize);
 118}
 119EXPORT_SYMBOL(generic_file_llseek_size);
 120
 121/**
 122 * generic_file_llseek - generic llseek implementation for regular files
 123 * @file:       file structure to seek on
 124 * @offset:     file offset to seek to
 125 * @origin:     type of seek
 126 *
 127 * This is a generic implemenation of ->llseek useable for all normal local
 128 * filesystems.  It just updates the file offset to the value specified by
 129 * @offset and @origin under i_mutex.
 130 */
 131loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
 132{
 133        struct inode *inode = file->f_mapping->host;
 134
 135        return generic_file_llseek_size(file, offset, origin,
 136                                        inode->i_sb->s_maxbytes,
 137                                        i_size_read(inode));
 138}
 139EXPORT_SYMBOL(generic_file_llseek);
 140
 141/**
 142 * noop_llseek - No Operation Performed llseek implementation
 143 * @file:       file structure to seek on
 144 * @offset:     file offset to seek to
 145 * @origin:     type of seek
 146 *
 147 * This is an implementation of ->llseek useable for the rare special case when
 148 * userspace expects the seek to succeed but the (device) file is actually not
 149 * able to perform the seek. In this case you use noop_llseek() instead of
 150 * falling back to the default implementation of ->llseek.
 151 */
 152loff_t noop_llseek(struct file *file, loff_t offset, int origin)
 153{
 154        return file->f_pos;
 155}
 156EXPORT_SYMBOL(noop_llseek);
 157
 158loff_t no_llseek(struct file *file, loff_t offset, int origin)
 159{
 160        return -ESPIPE;
 161}
 162EXPORT_SYMBOL(no_llseek);
 163
 164loff_t default_llseek(struct file *file, loff_t offset, int origin)
 165{
 166        struct inode *inode = file->f_path.dentry->d_inode;
 167        loff_t retval;
 168
 169        mutex_lock(&inode->i_mutex);
 170        switch (origin) {
 171                case SEEK_END:
 172                        offset += i_size_read(inode);
 173                        break;
 174                case SEEK_CUR:
 175                        if (offset == 0) {
 176                                retval = file->f_pos;
 177                                goto out;
 178                        }
 179                        offset += file->f_pos;
 180                        break;
 181                case SEEK_DATA:
 182                        /*
 183                         * In the generic case the entire file is data, so as
 184                         * long as offset isn't at the end of the file then the
 185                         * offset is data.
 186                         */
 187                        if (offset >= inode->i_size) {
 188                                retval = -ENXIO;
 189                                goto out;
 190                        }
 191                        break;
 192                case SEEK_HOLE:
 193                        /*
 194                         * There is a virtual hole at the end of the file, so
 195                         * as long as offset isn't i_size or larger, return
 196                         * i_size.
 197                         */
 198                        if (offset >= inode->i_size) {
 199                                retval = -ENXIO;
 200                                goto out;
 201                        }
 202                        offset = inode->i_size;
 203                        break;
 204        }
 205        retval = -EINVAL;
 206        if (offset >= 0 || unsigned_offsets(file)) {
 207                if (offset != file->f_pos) {
 208                        file->f_pos = offset;
 209                        file->f_version = 0;
 210                }
 211                retval = offset;
 212        }
 213out:
 214        mutex_unlock(&inode->i_mutex);
 215        return retval;
 216}
 217EXPORT_SYMBOL(default_llseek);
 218
 219loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
 220{
 221        loff_t (*fn)(struct file *, loff_t, int);
 222
 223        fn = no_llseek;
 224        if (file->f_mode & FMODE_LSEEK) {
 225                if (file->f_op && file->f_op->llseek)
 226                        fn = file->f_op->llseek;
 227        }
 228        return fn(file, offset, origin);
 229}
 230EXPORT_SYMBOL(vfs_llseek);
 231
 232SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
 233{
 234        off_t retval;
 235        struct fd f = fdget(fd);
 236        if (!f.file)
 237                return -EBADF;
 238
 239        retval = -EINVAL;
 240        if (origin <= SEEK_MAX) {
 241                loff_t res = vfs_llseek(f.file, offset, origin);
 242                retval = res;
 243                if (res != (loff_t)retval)
 244                        retval = -EOVERFLOW;    /* LFS: should only happen on 32 bit platforms */
 245        }
 246        fdput(f);
 247        return retval;
 248}
 249
 250#ifdef __ARCH_WANT_SYS_LLSEEK
 251SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 252                unsigned long, offset_low, loff_t __user *, result,
 253                unsigned int, origin)
 254{
 255        int retval;
 256        struct fd f = fdget(fd);
 257        loff_t offset;
 258
 259        if (!f.file)
 260                return -EBADF;
 261
 262        retval = -EINVAL;
 263        if (origin > SEEK_MAX)
 264                goto out_putf;
 265
 266        offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
 267                        origin);
 268
 269        retval = (int)offset;
 270        if (offset >= 0) {
 271                retval = -EFAULT;
 272                if (!copy_to_user(result, &offset, sizeof(offset)))
 273                        retval = 0;
 274        }
 275out_putf:
 276        fdput(f);
 277        return retval;
 278}
 279#endif
 280
 281
 282/*
 283 * rw_verify_area doesn't like huge counts. We limit
 284 * them to something that fits in "int" so that others
 285 * won't have to do range checks all the time.
 286 */
 287int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
 288{
 289        struct inode *inode;
 290        loff_t pos;
 291        int retval = -EINVAL;
 292
 293        inode = file->f_path.dentry->d_inode;
 294        if (unlikely((ssize_t) count < 0))
 295                return retval;
 296        pos = *ppos;
 297        if (unlikely(pos < 0)) {
 298                if (!unsigned_offsets(file))
 299                        return retval;
 300                if (count >= -pos) /* both values are in 0..LLONG_MAX */
 301                        return -EOVERFLOW;
 302        } else if (unlikely((loff_t) (pos + count) < 0)) {
 303                if (!unsigned_offsets(file))
 304                        return retval;
 305        }
 306
 307        if (unlikely(inode->i_flock && mandatory_lock(inode))) {
 308                retval = locks_mandatory_area(
 309                        read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
 310                        inode, file, pos, count);
 311                if (retval < 0)
 312                        return retval;
 313        }
 314        retval = security_file_permission(file,
 315                                read_write == READ ? MAY_READ : MAY_WRITE);
 316        if (retval)
 317                return retval;
 318        return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
 319}
 320
 321static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
 322{
 323        set_current_state(TASK_UNINTERRUPTIBLE);
 324        if (!kiocbIsKicked(iocb))
 325                schedule();
 326        else
 327                kiocbClearKicked(iocb);
 328        __set_current_state(TASK_RUNNING);
 329}
 330
 331ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 332{
 333        struct iovec iov = { .iov_base = buf, .iov_len = len };
 334        struct kiocb kiocb;
 335        ssize_t ret;
 336
 337        init_sync_kiocb(&kiocb, filp);
 338        kiocb.ki_pos = *ppos;
 339        kiocb.ki_left = len;
 340        kiocb.ki_nbytes = len;
 341
 342        for (;;) {
 343                ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
 344                if (ret != -EIOCBRETRY)
 345                        break;
 346                wait_on_retry_sync_kiocb(&kiocb);
 347        }
 348
 349        if (-EIOCBQUEUED == ret)
 350                ret = wait_on_sync_kiocb(&kiocb);
 351        *ppos = kiocb.ki_pos;
 352        return ret;
 353}
 354
 355EXPORT_SYMBOL(do_sync_read);
 356
 357ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 358{
 359        ssize_t ret;
 360
 361        if (!(file->f_mode & FMODE_READ))
 362                return -EBADF;
 363        if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
 364                return -EINVAL;
 365        if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
 366                return -EFAULT;
 367
 368        ret = rw_verify_area(READ, file, pos, count);
 369        if (ret >= 0) {
 370                count = ret;
 371                if (file->f_op->read)
 372                        ret = file->f_op->read(file, buf, count, pos);
 373                else
 374                        ret = do_sync_read(file, buf, count, pos);
 375                if (ret > 0) {
 376                        fsnotify_access(file);
 377                        add_rchar(current, ret);
 378                }
 379                inc_syscr(current);
 380        }
 381
 382        return ret;
 383}
 384
 385EXPORT_SYMBOL(vfs_read);
 386
 387ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 388{
 389        struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
 390        struct kiocb kiocb;
 391        ssize_t ret;
 392
 393        init_sync_kiocb(&kiocb, filp);
 394        kiocb.ki_pos = *ppos;
 395        kiocb.ki_left = len;
 396        kiocb.ki_nbytes = len;
 397
 398        for (;;) {
 399                ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
 400                if (ret != -EIOCBRETRY)
 401                        break;
 402                wait_on_retry_sync_kiocb(&kiocb);
 403        }
 404
 405        if (-EIOCBQUEUED == ret)
 406                ret = wait_on_sync_kiocb(&kiocb);
 407        *ppos = kiocb.ki_pos;
 408        return ret;
 409}
 410
 411EXPORT_SYMBOL(do_sync_write);
 412
 413ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
 414{
 415        ssize_t ret;
 416
 417        if (!(file->f_mode & FMODE_WRITE))
 418                return -EBADF;
 419        if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
 420                return -EINVAL;
 421        if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 422                return -EFAULT;
 423
 424        ret = rw_verify_area(WRITE, file, pos, count);
 425        if (ret >= 0) {
 426                count = ret;
 427                if (file->f_op->write)
 428                        ret = file->f_op->write(file, buf, count, pos);
 429                else
 430                        ret = do_sync_write(file, buf, count, pos);
 431                if (ret > 0) {
 432                        fsnotify_modify(file);
 433                        add_wchar(current, ret);
 434                }
 435                inc_syscw(current);
 436        }
 437
 438        return ret;
 439}
 440
 441EXPORT_SYMBOL(vfs_write);
 442
 443static inline loff_t file_pos_read(struct file *file)
 444{
 445        return file->f_pos;
 446}
 447
 448static inline void file_pos_write(struct file *file, loff_t pos)
 449{
 450        file->f_pos = pos;
 451}
 452
 453SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
 454{
 455        struct fd f = fdget(fd);
 456        ssize_t ret = -EBADF;
 457
 458        if (f.file) {
 459                loff_t pos = file_pos_read(f.file);
 460                ret = vfs_read(f.file, buf, count, &pos);
 461                file_pos_write(f.file, pos);
 462                fdput(f);
 463        }
 464        return ret;
 465}
 466
 467SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
 468                size_t, count)
 469{
 470        struct fd f = fdget(fd);
 471        ssize_t ret = -EBADF;
 472
 473        if (f.file) {
 474                loff_t pos = file_pos_read(f.file);
 475                ret = vfs_write(f.file, buf, count, &pos);
 476                file_pos_write(f.file, pos);
 477                fdput(f);
 478        }
 479
 480        return ret;
 481}
 482
 483SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
 484                        size_t count, loff_t pos)
 485{
 486        struct fd f;
 487        ssize_t ret = -EBADF;
 488
 489        if (pos < 0)
 490                return -EINVAL;
 491
 492        f = fdget(fd);
 493        if (f.file) {
 494                ret = -ESPIPE;
 495                if (f.file->f_mode & FMODE_PREAD)
 496                        ret = vfs_read(f.file, buf, count, &pos);
 497                fdput(f);
 498        }
 499
 500        return ret;
 501}
 502#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 503asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos)
 504{
 505        return SYSC_pread64((unsigned int) fd, (char __user *) buf,
 506                            (size_t) count, pos);
 507}
 508SYSCALL_ALIAS(sys_pread64, SyS_pread64);
 509#endif
 510
 511SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
 512                         size_t count, loff_t pos)
 513{
 514        struct fd f;
 515        ssize_t ret = -EBADF;
 516
 517        if (pos < 0)
 518                return -EINVAL;
 519
 520        f = fdget(fd);
 521        if (f.file) {
 522                ret = -ESPIPE;
 523                if (f.file->f_mode & FMODE_PWRITE)  
 524                        ret = vfs_write(f.file, buf, count, &pos);
 525                fdput(f);
 526        }
 527
 528        return ret;
 529}
 530#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 531asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos)
 532{
 533        return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf,
 534                             (size_t) count, pos);
 535}
 536SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64);
 537#endif
 538
 539/*
 540 * Reduce an iovec's length in-place.  Return the resulting number of segments
 541 */
 542unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 543{
 544        unsigned long seg = 0;
 545        size_t len = 0;
 546
 547        while (seg < nr_segs) {
 548                seg++;
 549                if (len + iov->iov_len >= to) {
 550                        iov->iov_len = to - len;
 551                        break;
 552                }
 553                len += iov->iov_len;
 554                iov++;
 555        }
 556        return seg;
 557}
 558EXPORT_SYMBOL(iov_shorten);
 559
 560ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 561                unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 562{
 563        struct kiocb kiocb;
 564        ssize_t ret;
 565
 566        init_sync_kiocb(&kiocb, filp);
 567        kiocb.ki_pos = *ppos;
 568        kiocb.ki_left = len;
 569        kiocb.ki_nbytes = len;
 570
 571        for (;;) {
 572                ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
 573                if (ret != -EIOCBRETRY)
 574                        break;
 575                wait_on_retry_sync_kiocb(&kiocb);
 576        }
 577
 578        if (ret == -EIOCBQUEUED)
 579                ret = wait_on_sync_kiocb(&kiocb);
 580        *ppos = kiocb.ki_pos;
 581        return ret;
 582}
 583
 584/* Do it by hand, with file-ops */
 585ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
 586                unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
 587{
 588        struct iovec *vector = iov;
 589        ssize_t ret = 0;
 590
 591        while (nr_segs > 0) {
 592                void __user *base;
 593                size_t len;
 594                ssize_t nr;
 595
 596                base = vector->iov_base;
 597                len = vector->iov_len;
 598                vector++;
 599                nr_segs--;
 600
 601                nr = fn(filp, base, len, ppos);
 602
 603                if (nr < 0) {
 604                        if (!ret)
 605                                ret = nr;
 606                        break;
 607                }
 608                ret += nr;
 609                if (nr != len)
 610                        break;
 611        }
 612
 613        return ret;
 614}
 615
 616/* A write operation does a read from user space and vice versa */
 617#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 618
 619ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 620                              unsigned long nr_segs, unsigned long fast_segs,
 621                              struct iovec *fast_pointer,
 622                              struct iovec **ret_pointer)
 623{
 624        unsigned long seg;
 625        ssize_t ret;
 626        struct iovec *iov = fast_pointer;
 627
 628        /*
 629         * SuS says "The readv() function *may* fail if the iovcnt argument
 630         * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
 631         * traditionally returned zero for zero segments, so...
 632         */
 633        if (nr_segs == 0) {
 634                ret = 0;
 635                goto out;
 636        }
 637
 638        /*
 639         * First get the "struct iovec" from user memory and
 640         * verify all the pointers
 641         */
 642        if (nr_segs > UIO_MAXIOV) {
 643                ret = -EINVAL;
 644                goto out;
 645        }
 646        if (nr_segs > fast_segs) {
 647                iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
 648                if (iov == NULL) {
 649                        ret = -ENOMEM;
 650                        goto out;
 651                }
 652        }
 653        if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
 654                ret = -EFAULT;
 655                goto out;
 656        }
 657
 658        /*
 659         * According to the Single Unix Specification we should return EINVAL
 660         * if an element length is < 0 when cast to ssize_t or if the
 661         * total length would overflow the ssize_t return value of the
 662         * system call.
 663         *
 664         * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
 665         * overflow case.
 666         */
 667        ret = 0;
 668        for (seg = 0; seg < nr_segs; seg++) {
 669                void __user *buf = iov[seg].iov_base;
 670                ssize_t len = (ssize_t)iov[seg].iov_len;
 671
 672                /* see if we we're about to use an invalid len or if
 673                 * it's about to overflow ssize_t */
 674                if (len < 0) {
 675                        ret = -EINVAL;
 676                        goto out;
 677                }
 678                if (type >= 0
 679                    && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
 680                        ret = -EFAULT;
 681                        goto out;
 682                }
 683                if (len > MAX_RW_COUNT - ret) {
 684                        len = MAX_RW_COUNT - ret;
 685                        iov[seg].iov_len = len;
 686                }
 687                ret += len;
 688        }
 689out:
 690        *ret_pointer = iov;
 691        return ret;
 692}
 693
 694static ssize_t do_readv_writev(int type, struct file *file,
 695                               const struct iovec __user * uvector,
 696                               unsigned long nr_segs, loff_t *pos)
 697{
 698        size_t tot_len;
 699        struct iovec iovstack[UIO_FASTIOV];
 700        struct iovec *iov = iovstack;
 701        ssize_t ret;
 702        io_fn_t fn;
 703        iov_fn_t fnv;
 704
 705        if (!file->f_op) {
 706                ret = -EINVAL;
 707                goto out;
 708        }
 709
 710        ret = rw_copy_check_uvector(type, uvector, nr_segs,
 711                                    ARRAY_SIZE(iovstack), iovstack, &iov);
 712        if (ret <= 0)
 713                goto out;
 714
 715        tot_len = ret;
 716        ret = rw_verify_area(type, file, pos, tot_len);
 717        if (ret < 0)
 718                goto out;
 719
 720        fnv = NULL;
 721        if (type == READ) {
 722                fn = file->f_op->read;
 723                fnv = file->f_op->aio_read;
 724        } else {
 725                fn = (io_fn_t)file->f_op->write;
 726                fnv = file->f_op->aio_write;
 727        }
 728
 729        if (fnv)
 730                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 731                                                pos, fnv);
 732        else
 733                ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 734
 735out:
 736        if (iov != iovstack)
 737                kfree(iov);
 738        if ((ret + (type == READ)) > 0) {
 739                if (type == READ)
 740                        fsnotify_access(file);
 741                else
 742                        fsnotify_modify(file);
 743        }
 744        return ret;
 745}
 746
 747ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 748                  unsigned long vlen, loff_t *pos)
 749{
 750        if (!(file->f_mode & FMODE_READ))
 751                return -EBADF;
 752        if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
 753                return -EINVAL;
 754
 755        return do_readv_writev(READ, file, vec, vlen, pos);
 756}
 757
 758EXPORT_SYMBOL(vfs_readv);
 759
 760ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 761                   unsigned long vlen, loff_t *pos)
 762{
 763        if (!(file->f_mode & FMODE_WRITE))
 764                return -EBADF;
 765        if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
 766                return -EINVAL;
 767
 768        return do_readv_writev(WRITE, file, vec, vlen, pos);
 769}
 770
 771EXPORT_SYMBOL(vfs_writev);
 772
 773SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
 774                unsigned long, vlen)
 775{
 776        struct fd f = fdget(fd);
 777        ssize_t ret = -EBADF;
 778
 779        if (f.file) {
 780                loff_t pos = file_pos_read(f.file);
 781                ret = vfs_readv(f.file, vec, vlen, &pos);
 782                file_pos_write(f.file, pos);
 783                fdput(f);
 784        }
 785
 786        if (ret > 0)
 787                add_rchar(current, ret);
 788        inc_syscr(current);
 789        return ret;
 790}
 791
 792SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
 793                unsigned long, vlen)
 794{
 795        struct fd f = fdget(fd);
 796        ssize_t ret = -EBADF;
 797
 798        if (f.file) {
 799                loff_t pos = file_pos_read(f.file);
 800                ret = vfs_writev(f.file, vec, vlen, &pos);
 801                file_pos_write(f.file, pos);
 802                fdput(f);
 803        }
 804
 805        if (ret > 0)
 806                add_wchar(current, ret);
 807        inc_syscw(current);
 808        return ret;
 809}
 810
 811static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
 812{
 813#define HALF_LONG_BITS (BITS_PER_LONG / 2)
 814        return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
 815}
 816
 817SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
 818                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 819{
 820        loff_t pos = pos_from_hilo(pos_h, pos_l);
 821        struct fd f;
 822        ssize_t ret = -EBADF;
 823
 824        if (pos < 0)
 825                return -EINVAL;
 826
 827        f = fdget(fd);
 828        if (f.file) {
 829                ret = -ESPIPE;
 830                if (f.file->f_mode & FMODE_PREAD)
 831                        ret = vfs_readv(f.file, vec, vlen, &pos);
 832                fdput(f);
 833        }
 834
 835        if (ret > 0)
 836                add_rchar(current, ret);
 837        inc_syscr(current);
 838        return ret;
 839}
 840
 841SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 842                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 843{
 844        loff_t pos = pos_from_hilo(pos_h, pos_l);
 845        struct fd f;
 846        ssize_t ret = -EBADF;
 847
 848        if (pos < 0)
 849                return -EINVAL;
 850
 851        f = fdget(fd);
 852        if (f.file) {
 853                ret = -ESPIPE;
 854                if (f.file->f_mode & FMODE_PWRITE)
 855                        ret = vfs_writev(f.file, vec, vlen, &pos);
 856                fdput(f);
 857        }
 858
 859        if (ret > 0)
 860                add_wchar(current, ret);
 861        inc_syscw(current);
 862        return ret;
 863}
 864
 865ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
 866                    loff_t max)
 867{
 868        struct fd in, out;
 869        struct inode *in_inode, *out_inode;
 870        loff_t pos;
 871        ssize_t retval;
 872        int fl;
 873
 874        /*
 875         * Get input file, and verify that it is ok..
 876         */
 877        retval = -EBADF;
 878        in = fdget(in_fd);
 879        if (!in.file)
 880                goto out;
 881        if (!(in.file->f_mode & FMODE_READ))
 882                goto fput_in;
 883        retval = -ESPIPE;
 884        if (!ppos)
 885                ppos = &in.file->f_pos;
 886        else
 887                if (!(in.file->f_mode & FMODE_PREAD))
 888                        goto fput_in;
 889        retval = rw_verify_area(READ, in.file, ppos, count);
 890        if (retval < 0)
 891                goto fput_in;
 892        count = retval;
 893
 894        /*
 895         * Get output file, and verify that it is ok..
 896         */
 897        retval = -EBADF;
 898        out = fdget(out_fd);
 899        if (!out.file)
 900                goto fput_in;
 901        if (!(out.file->f_mode & FMODE_WRITE))
 902                goto fput_out;
 903        retval = -EINVAL;
 904        in_inode = in.file->f_path.dentry->d_inode;
 905        out_inode = out.file->f_path.dentry->d_inode;
 906        retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
 907        if (retval < 0)
 908                goto fput_out;
 909        count = retval;
 910
 911        if (!max)
 912                max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
 913
 914        pos = *ppos;
 915        if (unlikely(pos + count > max)) {
 916                retval = -EOVERFLOW;
 917                if (pos >= max)
 918                        goto fput_out;
 919                count = max - pos;
 920        }
 921
 922        fl = 0;
 923#if 0
 924        /*
 925         * We need to debate whether we can enable this or not. The
 926         * man page documents EAGAIN return for the output at least,
 927         * and the application is arguably buggy if it doesn't expect
 928         * EAGAIN on a non-blocking file descriptor.
 929         */
 930        if (in.file->f_flags & O_NONBLOCK)
 931                fl = SPLICE_F_NONBLOCK;
 932#endif
 933        retval = do_splice_direct(in.file, ppos, out.file, count, fl);
 934
 935        if (retval > 0) {
 936                add_rchar(current, retval);
 937                add_wchar(current, retval);
 938        }
 939
 940        inc_syscr(current);
 941        inc_syscw(current);
 942        if (*ppos > max)
 943                retval = -EOVERFLOW;
 944
 945fput_out:
 946        fdput(out);
 947fput_in:
 948        fdput(in);
 949out:
 950        return retval;
 951}
 952
 953SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
 954{
 955        loff_t pos;
 956        off_t off;
 957        ssize_t ret;
 958
 959        if (offset) {
 960                if (unlikely(get_user(off, offset)))
 961                        return -EFAULT;
 962                pos = off;
 963                ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
 964                if (unlikely(put_user(pos, offset)))
 965                        return -EFAULT;
 966                return ret;
 967        }
 968
 969        return do_sendfile(out_fd, in_fd, NULL, count, 0);
 970}
 971
 972SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
 973{
 974        loff_t pos;
 975        ssize_t ret;
 976
 977        if (offset) {
 978                if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
 979                        return -EFAULT;
 980                ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
 981                if (unlikely(put_user(pos, offset)))
 982                        return -EFAULT;
 983                return ret;
 984        }
 985
 986        return do_sendfile(out_fd, in_fd, NULL, count, 0);
 987}
 988
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.