linux/fs/read_write.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/read_write.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/slab.h> 
   8#include <linux/stat.h>
   9#include <linux/fcntl.h>
  10#include <linux/file.h>
  11#include <linux/uio.h>
  12#include <linux/aio.h>
  13#include <linux/fsnotify.h>
  14#include <linux/security.h>
  15#include <linux/export.h>
  16#include <linux/syscalls.h>
  17#include <linux/pagemap.h>
  18#include <linux/splice.h>
  19#include <linux/compat.h>
  20#include "internal.h"
  21
  22#include <asm/uaccess.h>
  23#include <asm/unistd.h>
  24
  25typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
  26typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
  27                unsigned long, loff_t);
  28
  29const struct file_operations generic_ro_fops = {
  30        .llseek         = generic_file_llseek,
  31        .read           = do_sync_read,
  32        .aio_read       = generic_file_aio_read,
  33        .mmap           = generic_file_readonly_mmap,
  34        .splice_read    = generic_file_splice_read,
  35};
  36
  37EXPORT_SYMBOL(generic_ro_fops);
  38
  39static inline int unsigned_offsets(struct file *file)
  40{
  41        return file->f_mode & FMODE_UNSIGNED_OFFSET;
  42}
  43
  44static loff_t lseek_execute(struct file *file, struct inode *inode,
  45                loff_t offset, loff_t maxsize)
  46{
  47        if (offset < 0 && !unsigned_offsets(file))
  48                return -EINVAL;
  49        if (offset > maxsize)
  50                return -EINVAL;
  51
  52        if (offset != file->f_pos) {
  53                file->f_pos = offset;
  54                file->f_version = 0;
  55        }
  56        return offset;
  57}
  58
  59/**
  60 * generic_file_llseek_size - generic llseek implementation for regular files
  61 * @file:       file structure to seek on
  62 * @offset:     file offset to seek to
  63 * @whence:     type of seek
  64 * @size:       max size of this file in file system
  65 * @eof:        offset used for SEEK_END position
  66 *
  67 * This is a variant of generic_file_llseek that allows passing in a custom
  68 * maximum file size and a custom EOF position, for e.g. hashed directories
  69 *
  70 * Synchronization:
  71 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
  72 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
  73 * read/writes behave like SEEK_SET against seeks.
  74 */
  75loff_t
  76generic_file_llseek_size(struct file *file, loff_t offset, int whence,
  77                loff_t maxsize, loff_t eof)
  78{
  79        struct inode *inode = file->f_mapping->host;
  80
  81        switch (whence) {
  82        case SEEK_END:
  83                offset += eof;
  84                break;
  85        case SEEK_CUR:
  86                /*
  87                 * Here we special-case the lseek(fd, 0, SEEK_CUR)
  88                 * position-querying operation.  Avoid rewriting the "same"
  89                 * f_pos value back to the file because a concurrent read(),
  90                 * write() or lseek() might have altered it
  91                 */
  92                if (offset == 0)
  93                        return file->f_pos;
  94                /*
  95                 * f_lock protects against read/modify/write race with other
  96                 * SEEK_CURs. Note that parallel writes and reads behave
  97                 * like SEEK_SET.
  98                 */
  99                spin_lock(&file->f_lock);
 100                offset = lseek_execute(file, inode, file->f_pos + offset,
 101                                       maxsize);
 102                spin_unlock(&file->f_lock);
 103                return offset;
 104        case SEEK_DATA:
 105                /*
 106                 * In the generic case the entire file is data, so as long as
 107                 * offset isn't at the end of the file then the offset is data.
 108                 */
 109                if (offset >= eof)
 110                        return -ENXIO;
 111                break;
 112        case SEEK_HOLE:
 113                /*
 114                 * There is a virtual hole at the end of the file, so as long as
 115                 * offset isn't i_size or larger, return i_size.
 116                 */
 117                if (offset >= eof)
 118                        return -ENXIO;
 119                offset = eof;
 120                break;
 121        }
 122
 123        return lseek_execute(file, inode, offset, maxsize);
 124}
 125EXPORT_SYMBOL(generic_file_llseek_size);
 126
 127/**
 128 * generic_file_llseek - generic llseek implementation for regular files
 129 * @file:       file structure to seek on
 130 * @offset:     file offset to seek to
 131 * @whence:     type of seek
 132 *
 133 * This is a generic implemenation of ->llseek useable for all normal local
 134 * filesystems.  It just updates the file offset to the value specified by
 135 * @offset and @whence.
 136 */
 137loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
 138{
 139        struct inode *inode = file->f_mapping->host;
 140
 141        return generic_file_llseek_size(file, offset, whence,
 142                                        inode->i_sb->s_maxbytes,
 143                                        i_size_read(inode));
 144}
 145EXPORT_SYMBOL(generic_file_llseek);
 146
 147/**
 148 * noop_llseek - No Operation Performed llseek implementation
 149 * @file:       file structure to seek on
 150 * @offset:     file offset to seek to
 151 * @whence:     type of seek
 152 *
 153 * This is an implementation of ->llseek useable for the rare special case when
 154 * userspace expects the seek to succeed but the (device) file is actually not
 155 * able to perform the seek. In this case you use noop_llseek() instead of
 156 * falling back to the default implementation of ->llseek.
 157 */
 158loff_t noop_llseek(struct file *file, loff_t offset, int whence)
 159{
 160        return file->f_pos;
 161}
 162EXPORT_SYMBOL(noop_llseek);
 163
 164loff_t no_llseek(struct file *file, loff_t offset, int whence)
 165{
 166        return -ESPIPE;
 167}
 168EXPORT_SYMBOL(no_llseek);
 169
 170loff_t default_llseek(struct file *file, loff_t offset, int whence)
 171{
 172        struct inode *inode = file_inode(file);
 173        loff_t retval;
 174
 175        mutex_lock(&inode->i_mutex);
 176        switch (whence) {
 177                case SEEK_END:
 178                        offset += i_size_read(inode);
 179                        break;
 180                case SEEK_CUR:
 181                        if (offset == 0) {
 182                                retval = file->f_pos;
 183                                goto out;
 184                        }
 185                        offset += file->f_pos;
 186                        break;
 187                case SEEK_DATA:
 188                        /*
 189                         * In the generic case the entire file is data, so as
 190                         * long as offset isn't at the end of the file then the
 191                         * offset is data.
 192                         */
 193                        if (offset >= inode->i_size) {
 194                                retval = -ENXIO;
 195                                goto out;
 196                        }
 197                        break;
 198                case SEEK_HOLE:
 199                        /*
 200                         * There is a virtual hole at the end of the file, so
 201                         * as long as offset isn't i_size or larger, return
 202                         * i_size.
 203                         */
 204                        if (offset >= inode->i_size) {
 205                                retval = -ENXIO;
 206                                goto out;
 207                        }
 208                        offset = inode->i_size;
 209                        break;
 210        }
 211        retval = -EINVAL;
 212        if (offset >= 0 || unsigned_offsets(file)) {
 213                if (offset != file->f_pos) {
 214                        file->f_pos = offset;
 215                        file->f_version = 0;
 216                }
 217                retval = offset;
 218        }
 219out:
 220        mutex_unlock(&inode->i_mutex);
 221        return retval;
 222}
 223EXPORT_SYMBOL(default_llseek);
 224
 225loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
 226{
 227        loff_t (*fn)(struct file *, loff_t, int);
 228
 229        fn = no_llseek;
 230        if (file->f_mode & FMODE_LSEEK) {
 231                if (file->f_op && file->f_op->llseek)
 232                        fn = file->f_op->llseek;
 233        }
 234        return fn(file, offset, whence);
 235}
 236EXPORT_SYMBOL(vfs_llseek);
 237
 238SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
 239{
 240        off_t retval;
 241        struct fd f = fdget(fd);
 242        if (!f.file)
 243                return -EBADF;
 244
 245        retval = -EINVAL;
 246        if (whence <= SEEK_MAX) {
 247                loff_t res = vfs_llseek(f.file, offset, whence);
 248                retval = res;
 249                if (res != (loff_t)retval)
 250                        retval = -EOVERFLOW;    /* LFS: should only happen on 32 bit platforms */
 251        }
 252        fdput(f);
 253        return retval;
 254}
 255
 256#ifdef CONFIG_COMPAT
 257COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
 258{
 259        return sys_lseek(fd, offset, whence);
 260}
 261#endif
 262
 263#ifdef __ARCH_WANT_SYS_LLSEEK
 264SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 265                unsigned long, offset_low, loff_t __user *, result,
 266                unsigned int, whence)
 267{
 268        int retval;
 269        struct fd f = fdget(fd);
 270        loff_t offset;
 271
 272        if (!f.file)
 273                return -EBADF;
 274
 275        retval = -EINVAL;
 276        if (whence > SEEK_MAX)
 277                goto out_putf;
 278
 279        offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
 280                        whence);
 281
 282        retval = (int)offset;
 283        if (offset >= 0) {
 284                retval = -EFAULT;
 285                if (!copy_to_user(result, &offset, sizeof(offset)))
 286                        retval = 0;
 287        }
 288out_putf:
 289        fdput(f);
 290        return retval;
 291}
 292#endif
 293
 294/*
 295 * rw_verify_area doesn't like huge counts. We limit
 296 * them to something that fits in "int" so that others
 297 * won't have to do range checks all the time.
 298 */
 299int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
 300{
 301        struct inode *inode;
 302        loff_t pos;
 303        int retval = -EINVAL;
 304
 305        inode = file_inode(file);
 306        if (unlikely((ssize_t) count < 0))
 307                return retval;
 308        pos = *ppos;
 309        if (unlikely(pos < 0)) {
 310                if (!unsigned_offsets(file))
 311                        return retval;
 312                if (count >= -pos) /* both values are in 0..LLONG_MAX */
 313                        return -EOVERFLOW;
 314        } else if (unlikely((loff_t) (pos + count) < 0)) {
 315                if (!unsigned_offsets(file))
 316                        return retval;
 317        }
 318
 319        if (unlikely(inode->i_flock && mandatory_lock(inode))) {
 320                retval = locks_mandatory_area(
 321                        read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
 322                        inode, file, pos, count);
 323                if (retval < 0)
 324                        return retval;
 325        }
 326        retval = security_file_permission(file,
 327                                read_write == READ ? MAY_READ : MAY_WRITE);
 328        if (retval)
 329                return retval;
 330        return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
 331}
 332
 333ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 334{
 335        struct iovec iov = { .iov_base = buf, .iov_len = len };
 336        struct kiocb kiocb;
 337        ssize_t ret;
 338
 339        init_sync_kiocb(&kiocb, filp);
 340        kiocb.ki_pos = *ppos;
 341        kiocb.ki_left = len;
 342        kiocb.ki_nbytes = len;
 343
 344        ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
 345        if (-EIOCBQUEUED == ret)
 346                ret = wait_on_sync_kiocb(&kiocb);
 347        *ppos = kiocb.ki_pos;
 348        return ret;
 349}
 350
 351EXPORT_SYMBOL(do_sync_read);
 352
 353ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 354{
 355        ssize_t ret;
 356
 357        if (!(file->f_mode & FMODE_READ))
 358                return -EBADF;
 359        if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
 360                return -EINVAL;
 361        if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
 362                return -EFAULT;
 363
 364        ret = rw_verify_area(READ, file, pos, count);
 365        if (ret >= 0) {
 366                count = ret;
 367                if (file->f_op->read)
 368                        ret = file->f_op->read(file, buf, count, pos);
 369                else
 370                        ret = do_sync_read(file, buf, count, pos);
 371                if (ret > 0) {
 372                        fsnotify_access(file);
 373                        add_rchar(current, ret);
 374                }
 375                inc_syscr(current);
 376        }
 377
 378        return ret;
 379}
 380
 381EXPORT_SYMBOL(vfs_read);
 382
 383ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 384{
 385        struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
 386        struct kiocb kiocb;
 387        ssize_t ret;
 388
 389        init_sync_kiocb(&kiocb, filp);
 390        kiocb.ki_pos = *ppos;
 391        kiocb.ki_left = len;
 392        kiocb.ki_nbytes = len;
 393
 394        ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
 395        if (-EIOCBQUEUED == ret)
 396                ret = wait_on_sync_kiocb(&kiocb);
 397        *ppos = kiocb.ki_pos;
 398        return ret;
 399}
 400
 401EXPORT_SYMBOL(do_sync_write);
 402
 403ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
 404{
 405        mm_segment_t old_fs;
 406        const char __user *p;
 407        ssize_t ret;
 408
 409        if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
 410                return -EINVAL;
 411
 412        old_fs = get_fs();
 413        set_fs(get_ds());
 414        p = (__force const char __user *)buf;
 415        if (count > MAX_RW_COUNT)
 416                count =  MAX_RW_COUNT;
 417        if (file->f_op->write)
 418                ret = file->f_op->write(file, p, count, pos);
 419        else
 420                ret = do_sync_write(file, p, count, pos);
 421        set_fs(old_fs);
 422        if (ret > 0) {
 423                fsnotify_modify(file);
 424                add_wchar(current, ret);
 425        }
 426        inc_syscw(current);
 427        return ret;
 428}
 429
 430ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
 431{
 432        ssize_t ret;
 433
 434        if (!(file->f_mode & FMODE_WRITE))
 435                return -EBADF;
 436        if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
 437                return -EINVAL;
 438        if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 439                return -EFAULT;
 440
 441        ret = rw_verify_area(WRITE, file, pos, count);
 442        if (ret >= 0) {
 443                count = ret;
 444                file_start_write(file);
 445                if (file->f_op->write)
 446                        ret = file->f_op->write(file, buf, count, pos);
 447                else
 448                        ret = do_sync_write(file, buf, count, pos);
 449                if (ret > 0) {
 450                        fsnotify_modify(file);
 451                        add_wchar(current, ret);
 452                }
 453                inc_syscw(current);
 454                file_end_write(file);
 455        }
 456
 457        return ret;
 458}
 459
 460EXPORT_SYMBOL(vfs_write);
 461
 462static inline loff_t file_pos_read(struct file *file)
 463{
 464        return file->f_pos;
 465}
 466
 467static inline void file_pos_write(struct file *file, loff_t pos)
 468{
 469        file->f_pos = pos;
 470}
 471
 472SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
 473{
 474        struct fd f = fdget(fd);
 475        ssize_t ret = -EBADF;
 476
 477        if (f.file) {
 478                loff_t pos = file_pos_read(f.file);
 479                ret = vfs_read(f.file, buf, count, &pos);
 480                file_pos_write(f.file, pos);
 481                fdput(f);
 482        }
 483        return ret;
 484}
 485
 486SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
 487                size_t, count)
 488{
 489        struct fd f = fdget(fd);
 490        ssize_t ret = -EBADF;
 491
 492        if (f.file) {
 493                loff_t pos = file_pos_read(f.file);
 494                ret = vfs_write(f.file, buf, count, &pos);
 495                file_pos_write(f.file, pos);
 496                fdput(f);
 497        }
 498
 499        return ret;
 500}
 501
 502SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
 503                        size_t, count, loff_t, pos)
 504{
 505        struct fd f;
 506        ssize_t ret = -EBADF;
 507
 508        if (pos < 0)
 509                return -EINVAL;
 510
 511        f = fdget(fd);
 512        if (f.file) {
 513                ret = -ESPIPE;
 514                if (f.file->f_mode & FMODE_PREAD)
 515                        ret = vfs_read(f.file, buf, count, &pos);
 516                fdput(f);
 517        }
 518
 519        return ret;
 520}
 521
 522SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
 523                         size_t, count, loff_t, pos)
 524{
 525        struct fd f;
 526        ssize_t ret = -EBADF;
 527
 528        if (pos < 0)
 529                return -EINVAL;
 530
 531        f = fdget(fd);
 532        if (f.file) {
 533                ret = -ESPIPE;
 534                if (f.file->f_mode & FMODE_PWRITE)  
 535                        ret = vfs_write(f.file, buf, count, &pos);
 536                fdput(f);
 537        }
 538
 539        return ret;
 540}
 541
 542/*
 543 * Reduce an iovec's length in-place.  Return the resulting number of segments
 544 */
 545unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 546{
 547        unsigned long seg = 0;
 548        size_t len = 0;
 549
 550        while (seg < nr_segs) {
 551                seg++;
 552                if (len + iov->iov_len >= to) {
 553                        iov->iov_len = to - len;
 554                        break;
 555                }
 556                len += iov->iov_len;
 557                iov++;
 558        }
 559        return seg;
 560}
 561EXPORT_SYMBOL(iov_shorten);
 562
 563static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 564                unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 565{
 566        struct kiocb kiocb;
 567        ssize_t ret;
 568
 569        init_sync_kiocb(&kiocb, filp);
 570        kiocb.ki_pos = *ppos;
 571        kiocb.ki_left = len;
 572        kiocb.ki_nbytes = len;
 573
 574        ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
 575        if (ret == -EIOCBQUEUED)
 576                ret = wait_on_sync_kiocb(&kiocb);
 577        *ppos = kiocb.ki_pos;
 578        return ret;
 579}
 580
 581/* Do it by hand, with file-ops */
 582static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
 583                unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
 584{
 585        struct iovec *vector = iov;
 586        ssize_t ret = 0;
 587
 588        while (nr_segs > 0) {
 589                void __user *base;
 590                size_t len;
 591                ssize_t nr;
 592
 593                base = vector->iov_base;
 594                len = vector->iov_len;
 595                vector++;
 596                nr_segs--;
 597
 598                nr = fn(filp, base, len, ppos);
 599
 600                if (nr < 0) {
 601                        if (!ret)
 602                                ret = nr;
 603                        break;
 604                }
 605                ret += nr;
 606                if (nr != len)
 607                        break;
 608        }
 609
 610        return ret;
 611}
 612
 613/* A write operation does a read from user space and vice versa */
 614#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 615
 616ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 617                              unsigned long nr_segs, unsigned long fast_segs,
 618                              struct iovec *fast_pointer,
 619                              struct iovec **ret_pointer)
 620{
 621        unsigned long seg;
 622        ssize_t ret;
 623        struct iovec *iov = fast_pointer;
 624
 625        /*
 626         * SuS says "The readv() function *may* fail if the iovcnt argument
 627         * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
 628         * traditionally returned zero for zero segments, so...
 629         */
 630        if (nr_segs == 0) {
 631                ret = 0;
 632                goto out;
 633        }
 634
 635        /*
 636         * First get the "struct iovec" from user memory and
 637         * verify all the pointers
 638         */
 639        if (nr_segs > UIO_MAXIOV) {
 640                ret = -EINVAL;
 641                goto out;
 642        }
 643        if (nr_segs > fast_segs) {
 644                iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
 645                if (iov == NULL) {
 646                        ret = -ENOMEM;
 647                        goto out;
 648                }
 649        }
 650        if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
 651                ret = -EFAULT;
 652                goto out;
 653        }
 654
 655        /*
 656         * According to the Single Unix Specification we should return EINVAL
 657         * if an element length is < 0 when cast to ssize_t or if the
 658         * total length would overflow the ssize_t return value of the
 659         * system call.
 660         *
 661         * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
 662         * overflow case.
 663         */
 664        ret = 0;
 665        for (seg = 0; seg < nr_segs; seg++) {
 666                void __user *buf = iov[seg].iov_base;
 667                ssize_t len = (ssize_t)iov[seg].iov_len;
 668
 669                /* see if we we're about to use an invalid len or if
 670                 * it's about to overflow ssize_t */
 671                if (len < 0) {
 672                        ret = -EINVAL;
 673                        goto out;
 674                }
 675                if (type >= 0
 676                    && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
 677                        ret = -EFAULT;
 678                        goto out;
 679                }
 680                if (len > MAX_RW_COUNT - ret) {
 681                        len = MAX_RW_COUNT - ret;
 682                        iov[seg].iov_len = len;
 683                }
 684                ret += len;
 685        }
 686out:
 687        *ret_pointer = iov;
 688        return ret;
 689}
 690
 691static ssize_t do_readv_writev(int type, struct file *file,
 692                               const struct iovec __user * uvector,
 693                               unsigned long nr_segs, loff_t *pos)
 694{
 695        size_t tot_len;
 696        struct iovec iovstack[UIO_FASTIOV];
 697        struct iovec *iov = iovstack;
 698        ssize_t ret;
 699        io_fn_t fn;
 700        iov_fn_t fnv;
 701
 702        if (!file->f_op) {
 703                ret = -EINVAL;
 704                goto out;
 705        }
 706
 707        ret = rw_copy_check_uvector(type, uvector, nr_segs,
 708                                    ARRAY_SIZE(iovstack), iovstack, &iov);
 709        if (ret <= 0)
 710                goto out;
 711
 712        tot_len = ret;
 713        ret = rw_verify_area(type, file, pos, tot_len);
 714        if (ret < 0)
 715                goto out;
 716
 717        fnv = NULL;
 718        if (type == READ) {
 719                fn = file->f_op->read;
 720                fnv = file->f_op->aio_read;
 721        } else {
 722                fn = (io_fn_t)file->f_op->write;
 723                fnv = file->f_op->aio_write;
 724                file_start_write(file);
 725        }
 726
 727        if (fnv)
 728                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 729                                                pos, fnv);
 730        else
 731                ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 732
 733        if (type != READ)
 734                file_end_write(file);
 735
 736out:
 737        if (iov != iovstack)
 738                kfree(iov);
 739        if ((ret + (type == READ)) > 0) {
 740                if (type == READ)
 741                        fsnotify_access(file);
 742                else
 743                        fsnotify_modify(file);
 744        }
 745        return ret;
 746}
 747
 748ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 749                  unsigned long vlen, loff_t *pos)
 750{
 751        if (!(file->f_mode & FMODE_READ))
 752                return -EBADF;
 753        if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
 754                return -EINVAL;
 755
 756        return do_readv_writev(READ, file, vec, vlen, pos);
 757}
 758
 759EXPORT_SYMBOL(vfs_readv);
 760
 761ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 762                   unsigned long vlen, loff_t *pos)
 763{
 764        if (!(file->f_mode & FMODE_WRITE))
 765                return -EBADF;
 766        if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
 767                return -EINVAL;
 768
 769        return do_readv_writev(WRITE, file, vec, vlen, pos);
 770}
 771
 772EXPORT_SYMBOL(vfs_writev);
 773
 774SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
 775                unsigned long, vlen)
 776{
 777        struct fd f = fdget(fd);
 778        ssize_t ret = -EBADF;
 779
 780        if (f.file) {
 781                loff_t pos = file_pos_read(f.file);
 782                ret = vfs_readv(f.file, vec, vlen, &pos);
 783                file_pos_write(f.file, pos);
 784                fdput(f);
 785        }
 786
 787        if (ret > 0)
 788                add_rchar(current, ret);
 789        inc_syscr(current);
 790        return ret;
 791}
 792
 793SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
 794                unsigned long, vlen)
 795{
 796        struct fd f = fdget(fd);
 797        ssize_t ret = -EBADF;
 798
 799        if (f.file) {
 800                loff_t pos = file_pos_read(f.file);
 801                ret = vfs_writev(f.file, vec, vlen, &pos);
 802                file_pos_write(f.file, pos);
 803                fdput(f);
 804        }
 805
 806        if (ret > 0)
 807                add_wchar(current, ret);
 808        inc_syscw(current);
 809        return ret;
 810}
 811
 812static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
 813{
 814#define HALF_LONG_BITS (BITS_PER_LONG / 2)
 815        return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
 816}
 817
 818SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
 819                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 820{
 821        loff_t pos = pos_from_hilo(pos_h, pos_l);
 822        struct fd f;
 823        ssize_t ret = -EBADF;
 824
 825        if (pos < 0)
 826                return -EINVAL;
 827
 828        f = fdget(fd);
 829        if (f.file) {
 830                ret = -ESPIPE;
 831                if (f.file->f_mode & FMODE_PREAD)
 832                        ret = vfs_readv(f.file, vec, vlen, &pos);
 833                fdput(f);
 834        }
 835
 836        if (ret > 0)
 837                add_rchar(current, ret);
 838        inc_syscr(current);
 839        return ret;
 840}
 841
 842SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 843                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 844{
 845        loff_t pos = pos_from_hilo(pos_h, pos_l);
 846        struct fd f;
 847        ssize_t ret = -EBADF;
 848
 849        if (pos < 0)
 850                return -EINVAL;
 851
 852        f = fdget(fd);
 853        if (f.file) {
 854                ret = -ESPIPE;
 855                if (f.file->f_mode & FMODE_PWRITE)
 856                        ret = vfs_writev(f.file, vec, vlen, &pos);
 857                fdput(f);
 858        }
 859
 860        if (ret > 0)
 861                add_wchar(current, ret);
 862        inc_syscw(current);
 863        return ret;
 864}
 865
 866#ifdef CONFIG_COMPAT
 867
 868static ssize_t compat_do_readv_writev(int type, struct file *file,
 869                               const struct compat_iovec __user *uvector,
 870                               unsigned long nr_segs, loff_t *pos)
 871{
 872        compat_ssize_t tot_len;
 873        struct iovec iovstack[UIO_FASTIOV];
 874        struct iovec *iov = iovstack;
 875        ssize_t ret;
 876        io_fn_t fn;
 877        iov_fn_t fnv;
 878
 879        ret = -EINVAL;
 880        if (!file->f_op)
 881                goto out;
 882
 883        ret = -EFAULT;
 884        if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
 885                goto out;
 886
 887        ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
 888                                               UIO_FASTIOV, iovstack, &iov);
 889        if (ret <= 0)
 890                goto out;
 891
 892        tot_len = ret;
 893        ret = rw_verify_area(type, file, pos, tot_len);
 894        if (ret < 0)
 895                goto out;
 896
 897        fnv = NULL;
 898        if (type == READ) {
 899                fn = file->f_op->read;
 900                fnv = file->f_op->aio_read;
 901        } else {
 902                fn = (io_fn_t)file->f_op->write;
 903                fnv = file->f_op->aio_write;
 904                file_start_write(file);
 905        }
 906
 907        if (fnv)
 908                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 909                                                pos, fnv);
 910        else
 911                ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 912
 913        if (type != READ)
 914                file_end_write(file);
 915
 916out:
 917        if (iov != iovstack)
 918                kfree(iov);
 919        if ((ret + (type == READ)) > 0) {
 920                if (type == READ)
 921                        fsnotify_access(file);
 922                else
 923                        fsnotify_modify(file);
 924        }
 925        return ret;
 926}
 927
 928static size_t compat_readv(struct file *file,
 929                           const struct compat_iovec __user *vec,
 930                           unsigned long vlen, loff_t *pos)
 931{
 932        ssize_t ret = -EBADF;
 933
 934        if (!(file->f_mode & FMODE_READ))
 935                goto out;
 936
 937        ret = -EINVAL;
 938        if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
 939                goto out;
 940
 941        ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
 942
 943out:
 944        if (ret > 0)
 945                add_rchar(current, ret);
 946        inc_syscr(current);
 947        return ret;
 948}
 949
 950COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd,
 951                const struct compat_iovec __user *,vec,
 952                unsigned long, vlen)
 953{
 954        struct fd f = fdget(fd);
 955        ssize_t ret;
 956        loff_t pos;
 957
 958        if (!f.file)
 959                return -EBADF;
 960        pos = f.file->f_pos;
 961        ret = compat_readv(f.file, vec, vlen, &pos);
 962        f.file->f_pos = pos;
 963        fdput(f);
 964        return ret;
 965}
 966
 967COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
 968                const struct compat_iovec __user *,vec,
 969                unsigned long, vlen, loff_t, pos)
 970{
 971        struct fd f;
 972        ssize_t ret;
 973
 974        if (pos < 0)
 975                return -EINVAL;
 976        f = fdget(fd);
 977        if (!f.file)
 978                return -EBADF;
 979        ret = -ESPIPE;
 980        if (f.file->f_mode & FMODE_PREAD)
 981                ret = compat_readv(f.file, vec, vlen, &pos);
 982        fdput(f);
 983        return ret;
 984}
 985
 986COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd,
 987                const struct compat_iovec __user *,vec,
 988                unsigned long, vlen, u32, pos_low, u32, pos_high)
 989{
 990        loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 991        return compat_sys_preadv64(fd, vec, vlen, pos);
 992}
 993
 994static size_t compat_writev(struct file *file,
 995                            const struct compat_iovec __user *vec,
 996                            unsigned long vlen, loff_t *pos)
 997{
 998        ssize_t ret = -EBADF;
 999
1000        if (!(file->f_mode & FMODE_WRITE))
1001                goto out;
1002
1003        ret = -EINVAL;
1004        if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
1005                goto out;
1006
1007        ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
1008
1009out:
1010        if (ret > 0)
1011                add_wchar(current, ret);
1012        inc_syscw(current);
1013        return ret;
1014}
1015
1016COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd,
1017                const struct compat_iovec __user *, vec,
1018                unsigned long, vlen)
1019{
1020        struct fd f = fdget(fd);
1021        ssize_t ret;
1022        loff_t pos;
1023
1024        if (!f.file)
1025                return -EBADF;
1026        pos = f.file->f_pos;
1027        ret = compat_writev(f.file, vec, vlen, &pos);
1028        f.file->f_pos = pos;
1029        fdput(f);
1030        return ret;
1031}
1032
1033COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
1034                const struct compat_iovec __user *,vec,
1035                unsigned long, vlen, loff_t, pos)
1036{
1037        struct fd f;
1038        ssize_t ret;
1039
1040        if (pos < 0)
1041                return -EINVAL;
1042        f = fdget(fd);
1043        if (!f.file)
1044                return -EBADF;
1045        ret = -ESPIPE;
1046        if (f.file->f_mode & FMODE_PWRITE)
1047                ret = compat_writev(f.file, vec, vlen, &pos);
1048        fdput(f);
1049        return ret;
1050}
1051
1052COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd,
1053                const struct compat_iovec __user *,vec,
1054                unsigned long, vlen, u32, pos_low, u32, pos_high)
1055{
1056        loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1057        return compat_sys_pwritev64(fd, vec, vlen, pos);
1058}
1059#endif
1060
1061static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1062                           size_t count, loff_t max)
1063{
1064        struct fd in, out;
1065        struct inode *in_inode, *out_inode;
1066        loff_t pos;
1067        loff_t out_pos;
1068        ssize_t retval;
1069        int fl;
1070
1071        /*
1072         * Get input file, and verify that it is ok..
1073         */
1074        retval = -EBADF;
1075        in = fdget(in_fd);
1076        if (!in.file)
1077                goto out;
1078        if (!(in.file->f_mode & FMODE_READ))
1079                goto fput_in;
1080        retval = -ESPIPE;
1081        if (!ppos) {
1082                pos = in.file->f_pos;
1083        } else {
1084                pos = *ppos;
1085                if (!(in.file->f_mode & FMODE_PREAD))
1086                        goto fput_in;
1087        }
1088        retval = rw_verify_area(READ, in.file, &pos, count);
1089        if (retval < 0)
1090                goto fput_in;
1091        count = retval;
1092
1093        /*
1094         * Get output file, and verify that it is ok..
1095         */
1096        retval = -EBADF;
1097        out = fdget(out_fd);
1098        if (!out.file)
1099                goto fput_in;
1100        if (!(out.file->f_mode & FMODE_WRITE))
1101                goto fput_out;
1102        retval = -EINVAL;
1103        in_inode = file_inode(in.file);
1104        out_inode = file_inode(out.file);
1105        out_pos = out.file->f_pos;
1106        retval = rw_verify_area(WRITE, out.file, &out_pos, count);
1107        if (retval < 0)
1108                goto fput_out;
1109        count = retval;
1110
1111        if (!max)
1112                max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1113
1114        if (unlikely(pos + count > max)) {
1115                retval = -EOVERFLOW;
1116                if (pos >= max)
1117                        goto fput_out;
1118                count = max - pos;
1119        }
1120
1121        fl = 0;
1122#if 0
1123        /*
1124         * We need to debate whether we can enable this or not. The
1125         * man page documents EAGAIN return for the output at least,
1126         * and the application is arguably buggy if it doesn't expect
1127         * EAGAIN on a non-blocking file descriptor.
1128         */
1129        if (in.file->f_flags & O_NONBLOCK)
1130                fl = SPLICE_F_NONBLOCK;
1131#endif
1132        retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
1133
1134        if (retval > 0) {
1135                add_rchar(current, retval);
1136                add_wchar(current, retval);
1137                fsnotify_access(in.file);
1138                fsnotify_modify(out.file);
1139                out.file->f_pos = out_pos;
1140                if (ppos)
1141                        *ppos = pos;
1142                else
1143                        in.file->f_pos = pos;
1144        }
1145
1146        inc_syscr(current);
1147        inc_syscw(current);
1148        if (pos > max)
1149                retval = -EOVERFLOW;
1150
1151fput_out:
1152        fdput(out);
1153fput_in:
1154        fdput(in);
1155out:
1156        return retval;
1157}
1158
1159SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1160{
1161        loff_t pos;
1162        off_t off;
1163        ssize_t ret;
1164
1165        if (offset) {
1166                if (unlikely(get_user(off, offset)))
1167                        return -EFAULT;
1168                pos = off;
1169                ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1170                if (unlikely(put_user(pos, offset)))
1171                        return -EFAULT;
1172                return ret;
1173        }
1174
1175        return do_sendfile(out_fd, in_fd, NULL, count, 0);
1176}
1177
1178SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1179{
1180        loff_t pos;
1181        ssize_t ret;
1182
1183        if (offset) {
1184                if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1185                        return -EFAULT;
1186                ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1187                if (unlikely(put_user(pos, offset)))
1188                        return -EFAULT;
1189                return ret;
1190        }
1191
1192        return do_sendfile(out_fd, in_fd, NULL, count, 0);
1193}
1194
1195#ifdef CONFIG_COMPAT
1196COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
1197                compat_off_t __user *, offset, compat_size_t, count)
1198{
1199        loff_t pos;
1200        off_t off;
1201        ssize_t ret;
1202
1203        if (offset) {
1204                if (unlikely(get_user(off, offset)))
1205                        return -EFAULT;
1206                pos = off;
1207                ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1208                if (unlikely(put_user(pos, offset)))
1209                        return -EFAULT;
1210                return ret;
1211        }
1212
1213        return do_sendfile(out_fd, in_fd, NULL, count, 0);
1214}
1215
1216COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
1217                compat_loff_t __user *, offset, compat_size_t, count)
1218{
1219        loff_t pos;
1220        ssize_t ret;
1221
1222        if (offset) {
1223                if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1224                        return -EFAULT;
1225                ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1226                if (unlikely(put_user(pos, offset)))
1227                        return -EFAULT;
1228                return ret;
1229        }
1230
1231        return do_sendfile(out_fd, in_fd, NULL, count, 0);
1232}
1233#endif
1234
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.