linux/fs/read_write.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/read_write.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/slab.h> 
   8#include <linux/stat.h>
   9#include <linux/fcntl.h>
  10#include <linux/file.h>
  11#include <linux/uio.h>
  12#include <linux/fsnotify.h>
  13#include <linux/security.h>
  14#include <linux/export.h>
  15#include <linux/syscalls.h>
  16#include <linux/pagemap.h>
  17#include <linux/splice.h>
  18#include "read_write.h"
  19
  20#include <asm/uaccess.h>
  21#include <asm/unistd.h>
  22
  23const struct file_operations generic_ro_fops = {
  24        .llseek         = generic_file_llseek,
  25        .read           = do_sync_read,
  26        .aio_read       = generic_file_aio_read,
  27        .mmap           = generic_file_readonly_mmap,
  28        .splice_read    = generic_file_splice_read,
  29};
  30
  31EXPORT_SYMBOL(generic_ro_fops);
  32
  33static inline int unsigned_offsets(struct file *file)
  34{
  35        return file->f_mode & FMODE_UNSIGNED_OFFSET;
  36}
  37
  38static loff_t lseek_execute(struct file *file, struct inode *inode,
  39                loff_t offset, loff_t maxsize)
  40{
  41        if (offset < 0 && !unsigned_offsets(file))
  42                return -EINVAL;
  43        if (offset > maxsize)
  44                return -EINVAL;
  45
  46        if (offset != file->f_pos) {
  47                file->f_pos = offset;
  48                file->f_version = 0;
  49        }
  50        return offset;
  51}
  52
  53/**
  54 * generic_file_llseek_size - generic llseek implementation for regular files
  55 * @file:       file structure to seek on
  56 * @offset:     file offset to seek to
  57 * @origin:     type of seek
  58 * @size:       max size of file system
  59 *
  60 * This is a variant of generic_file_llseek that allows passing in a custom
  61 * file size.
  62 *
  63 * Synchronization:
  64 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
  65 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
  66 * read/writes behave like SEEK_SET against seeks.
  67 */
  68loff_t
  69generic_file_llseek_size(struct file *file, loff_t offset, int origin,
  70                loff_t maxsize)
  71{
  72        struct inode *inode = file->f_mapping->host;
  73
  74        switch (origin) {
  75        case SEEK_END:
  76                offset += i_size_read(inode);
  77                break;
  78        case SEEK_CUR:
  79                /*
  80                 * Here we special-case the lseek(fd, 0, SEEK_CUR)
  81                 * position-querying operation.  Avoid rewriting the "same"
  82                 * f_pos value back to the file because a concurrent read(),
  83                 * write() or lseek() might have altered it
  84                 */
  85                if (offset == 0)
  86                        return file->f_pos;
  87                /*
  88                 * f_lock protects against read/modify/write race with other
  89                 * SEEK_CURs. Note that parallel writes and reads behave
  90                 * like SEEK_SET.
  91                 */
  92                spin_lock(&file->f_lock);
  93                offset = lseek_execute(file, inode, file->f_pos + offset,
  94                                       maxsize);
  95                spin_unlock(&file->f_lock);
  96                return offset;
  97        case SEEK_DATA:
  98                /*
  99                 * In the generic case the entire file is data, so as long as
 100                 * offset isn't at the end of the file then the offset is data.
 101                 */
 102                if (offset >= i_size_read(inode))
 103                        return -ENXIO;
 104                break;
 105        case SEEK_HOLE:
 106                /*
 107                 * There is a virtual hole at the end of the file, so as long as
 108                 * offset isn't i_size or larger, return i_size.
 109                 */
 110                if (offset >= i_size_read(inode))
 111                        return -ENXIO;
 112                offset = i_size_read(inode);
 113                break;
 114        }
 115
 116        return lseek_execute(file, inode, offset, maxsize);
 117}
 118EXPORT_SYMBOL(generic_file_llseek_size);
 119
 120/**
 121 * generic_file_llseek - generic llseek implementation for regular files
 122 * @file:       file structure to seek on
 123 * @offset:     file offset to seek to
 124 * @origin:     type of seek
 125 *
 126 * This is a generic implemenation of ->llseek useable for all normal local
 127 * filesystems.  It just updates the file offset to the value specified by
 128 * @offset and @origin under i_mutex.
 129 */
 130loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
 131{
 132        struct inode *inode = file->f_mapping->host;
 133
 134        return generic_file_llseek_size(file, offset, origin,
 135                                        inode->i_sb->s_maxbytes);
 136}
 137EXPORT_SYMBOL(generic_file_llseek);
 138
 139/**
 140 * noop_llseek - No Operation Performed llseek implementation
 141 * @file:       file structure to seek on
 142 * @offset:     file offset to seek to
 143 * @origin:     type of seek
 144 *
 145 * This is an implementation of ->llseek useable for the rare special case when
 146 * userspace expects the seek to succeed but the (device) file is actually not
 147 * able to perform the seek. In this case you use noop_llseek() instead of
 148 * falling back to the default implementation of ->llseek.
 149 */
 150loff_t noop_llseek(struct file *file, loff_t offset, int origin)
 151{
 152        return file->f_pos;
 153}
 154EXPORT_SYMBOL(noop_llseek);
 155
 156loff_t no_llseek(struct file *file, loff_t offset, int origin)
 157{
 158        return -ESPIPE;
 159}
 160EXPORT_SYMBOL(no_llseek);
 161
 162loff_t default_llseek(struct file *file, loff_t offset, int origin)
 163{
 164        struct inode *inode = file->f_path.dentry->d_inode;
 165        loff_t retval;
 166
 167        mutex_lock(&inode->i_mutex);
 168        switch (origin) {
 169                case SEEK_END:
 170                        offset += i_size_read(inode);
 171                        break;
 172                case SEEK_CUR:
 173                        if (offset == 0) {
 174                                retval = file->f_pos;
 175                                goto out;
 176                        }
 177                        offset += file->f_pos;
 178                        break;
 179                case SEEK_DATA:
 180                        /*
 181                         * In the generic case the entire file is data, so as
 182                         * long as offset isn't at the end of the file then the
 183                         * offset is data.
 184                         */
 185                        if (offset >= inode->i_size) {
 186                                retval = -ENXIO;
 187                                goto out;
 188                        }
 189                        break;
 190                case SEEK_HOLE:
 191                        /*
 192                         * There is a virtual hole at the end of the file, so
 193                         * as long as offset isn't i_size or larger, return
 194                         * i_size.
 195                         */
 196                        if (offset >= inode->i_size) {
 197                                retval = -ENXIO;
 198                                goto out;
 199                        }
 200                        offset = inode->i_size;
 201                        break;
 202        }
 203        retval = -EINVAL;
 204        if (offset >= 0 || unsigned_offsets(file)) {
 205                if (offset != file->f_pos) {
 206                        file->f_pos = offset;
 207                        file->f_version = 0;
 208                }
 209                retval = offset;
 210        }
 211out:
 212        mutex_unlock(&inode->i_mutex);
 213        return retval;
 214}
 215EXPORT_SYMBOL(default_llseek);
 216
 217loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
 218{
 219        loff_t (*fn)(struct file *, loff_t, int);
 220
 221        fn = no_llseek;
 222        if (file->f_mode & FMODE_LSEEK) {
 223                if (file->f_op && file->f_op->llseek)
 224                        fn = file->f_op->llseek;
 225        }
 226        return fn(file, offset, origin);
 227}
 228EXPORT_SYMBOL(vfs_llseek);
 229
 230SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
 231{
 232        off_t retval;
 233        struct file * file;
 234        int fput_needed;
 235
 236        retval = -EBADF;
 237        file = fget_light(fd, &fput_needed);
 238        if (!file)
 239                goto bad;
 240
 241        retval = -EINVAL;
 242        if (origin <= SEEK_MAX) {
 243                loff_t res = vfs_llseek(file, offset, origin);
 244                retval = res;
 245                if (res != (loff_t)retval)
 246                        retval = -EOVERFLOW;    /* LFS: should only happen on 32 bit platforms */
 247        }
 248        fput_light(file, fput_needed);
 249bad:
 250        return retval;
 251}
 252
 253#ifdef __ARCH_WANT_SYS_LLSEEK
 254SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 255                unsigned long, offset_low, loff_t __user *, result,
 256                unsigned int, origin)
 257{
 258        int retval;
 259        struct file * file;
 260        loff_t offset;
 261        int fput_needed;
 262
 263        retval = -EBADF;
 264        file = fget_light(fd, &fput_needed);
 265        if (!file)
 266                goto bad;
 267
 268        retval = -EINVAL;
 269        if (origin > SEEK_MAX)
 270                goto out_putf;
 271
 272        offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
 273                        origin);
 274
 275        retval = (int)offset;
 276        if (offset >= 0) {
 277                retval = -EFAULT;
 278                if (!copy_to_user(result, &offset, sizeof(offset)))
 279                        retval = 0;
 280        }
 281out_putf:
 282        fput_light(file, fput_needed);
 283bad:
 284        return retval;
 285}
 286#endif
 287
 288
 289/*
 290 * rw_verify_area doesn't like huge counts. We limit
 291 * them to something that fits in "int" so that others
 292 * won't have to do range checks all the time.
 293 */
 294int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
 295{
 296        struct inode *inode;
 297        loff_t pos;
 298        int retval = -EINVAL;
 299
 300        inode = file->f_path.dentry->d_inode;
 301        if (unlikely((ssize_t) count < 0))
 302                return retval;
 303        pos = *ppos;
 304        if (unlikely(pos < 0)) {
 305                if (!unsigned_offsets(file))
 306                        return retval;
 307                if (count >= -pos) /* both values are in 0..LLONG_MAX */
 308                        return -EOVERFLOW;
 309        } else if (unlikely((loff_t) (pos + count) < 0)) {
 310                if (!unsigned_offsets(file))
 311                        return retval;
 312        }
 313
 314        if (unlikely(inode->i_flock && mandatory_lock(inode))) {
 315                retval = locks_mandatory_area(
 316                        read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
 317                        inode, file, pos, count);
 318                if (retval < 0)
 319                        return retval;
 320        }
 321        retval = security_file_permission(file,
 322                                read_write == READ ? MAY_READ : MAY_WRITE);
 323        if (retval)
 324                return retval;
 325        return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
 326}
 327
 328static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
 329{
 330        set_current_state(TASK_UNINTERRUPTIBLE);
 331        if (!kiocbIsKicked(iocb))
 332                schedule();
 333        else
 334                kiocbClearKicked(iocb);
 335        __set_current_state(TASK_RUNNING);
 336}
 337
 338ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 339{
 340        struct iovec iov = { .iov_base = buf, .iov_len = len };
 341        struct kiocb kiocb;
 342        ssize_t ret;
 343
 344        init_sync_kiocb(&kiocb, filp);
 345        kiocb.ki_pos = *ppos;
 346        kiocb.ki_left = len;
 347        kiocb.ki_nbytes = len;
 348
 349        for (;;) {
 350                ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
 351                if (ret != -EIOCBRETRY)
 352                        break;
 353                wait_on_retry_sync_kiocb(&kiocb);
 354        }
 355
 356        if (-EIOCBQUEUED == ret)
 357                ret = wait_on_sync_kiocb(&kiocb);
 358        *ppos = kiocb.ki_pos;
 359        return ret;
 360}
 361
 362EXPORT_SYMBOL(do_sync_read);
 363
 364ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 365{
 366        ssize_t ret;
 367
 368        if (!(file->f_mode & FMODE_READ))
 369                return -EBADF;
 370        if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
 371                return -EINVAL;
 372        if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
 373                return -EFAULT;
 374
 375        ret = rw_verify_area(READ, file, pos, count);
 376        if (ret >= 0) {
 377                count = ret;
 378                if (file->f_op->read)
 379                        ret = file->f_op->read(file, buf, count, pos);
 380                else
 381                        ret = do_sync_read(file, buf, count, pos);
 382                if (ret > 0) {
 383                        fsnotify_access(file);
 384                        add_rchar(current, ret);
 385                }
 386                inc_syscr(current);
 387        }
 388
 389        return ret;
 390}
 391
 392EXPORT_SYMBOL(vfs_read);
 393
 394ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 395{
 396        struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
 397        struct kiocb kiocb;
 398        ssize_t ret;
 399
 400        init_sync_kiocb(&kiocb, filp);
 401        kiocb.ki_pos = *ppos;
 402        kiocb.ki_left = len;
 403        kiocb.ki_nbytes = len;
 404
 405        for (;;) {
 406                ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
 407                if (ret != -EIOCBRETRY)
 408                        break;
 409                wait_on_retry_sync_kiocb(&kiocb);
 410        }
 411
 412        if (-EIOCBQUEUED == ret)
 413                ret = wait_on_sync_kiocb(&kiocb);
 414        *ppos = kiocb.ki_pos;
 415        return ret;
 416}
 417
 418EXPORT_SYMBOL(do_sync_write);
 419
 420ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
 421{
 422        ssize_t ret;
 423
 424        if (!(file->f_mode & FMODE_WRITE))
 425                return -EBADF;
 426        if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
 427                return -EINVAL;
 428        if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 429                return -EFAULT;
 430
 431        ret = rw_verify_area(WRITE, file, pos, count);
 432        if (ret >= 0) {
 433                count = ret;
 434                if (file->f_op->write)
 435                        ret = file->f_op->write(file, buf, count, pos);
 436                else
 437                        ret = do_sync_write(file, buf, count, pos);
 438                if (ret > 0) {
 439                        fsnotify_modify(file);
 440                        add_wchar(current, ret);
 441                }
 442                inc_syscw(current);
 443        }
 444
 445        return ret;
 446}
 447
 448EXPORT_SYMBOL(vfs_write);
 449
 450static inline loff_t file_pos_read(struct file *file)
 451{
 452        return file->f_pos;
 453}
 454
 455static inline void file_pos_write(struct file *file, loff_t pos)
 456{
 457        file->f_pos = pos;
 458}
 459
 460SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
 461{
 462        struct file *file;
 463        ssize_t ret = -EBADF;
 464        int fput_needed;
 465
 466        file = fget_light(fd, &fput_needed);
 467        if (file) {
 468                loff_t pos = file_pos_read(file);
 469                ret = vfs_read(file, buf, count, &pos);
 470                file_pos_write(file, pos);
 471                fput_light(file, fput_needed);
 472        }
 473
 474        return ret;
 475}
 476
 477SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
 478                size_t, count)
 479{
 480        struct file *file;
 481        ssize_t ret = -EBADF;
 482        int fput_needed;
 483
 484        file = fget_light(fd, &fput_needed);
 485        if (file) {
 486                loff_t pos = file_pos_read(file);
 487                ret = vfs_write(file, buf, count, &pos);
 488                file_pos_write(file, pos);
 489                fput_light(file, fput_needed);
 490        }
 491
 492        return ret;
 493}
 494
 495SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
 496                        size_t count, loff_t pos)
 497{
 498        struct file *file;
 499        ssize_t ret = -EBADF;
 500        int fput_needed;
 501
 502        if (pos < 0)
 503                return -EINVAL;
 504
 505        file = fget_light(fd, &fput_needed);
 506        if (file) {
 507                ret = -ESPIPE;
 508                if (file->f_mode & FMODE_PREAD)
 509                        ret = vfs_read(file, buf, count, &pos);
 510                fput_light(file, fput_needed);
 511        }
 512
 513        return ret;
 514}
 515#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 516asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos)
 517{
 518        return SYSC_pread64((unsigned int) fd, (char __user *) buf,
 519                            (size_t) count, pos);
 520}
 521SYSCALL_ALIAS(sys_pread64, SyS_pread64);
 522#endif
 523
 524SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
 525                         size_t count, loff_t pos)
 526{
 527        struct file *file;
 528        ssize_t ret = -EBADF;
 529        int fput_needed;
 530
 531        if (pos < 0)
 532                return -EINVAL;
 533
 534        file = fget_light(fd, &fput_needed);
 535        if (file) {
 536                ret = -ESPIPE;
 537                if (file->f_mode & FMODE_PWRITE)  
 538                        ret = vfs_write(file, buf, count, &pos);
 539                fput_light(file, fput_needed);
 540        }
 541
 542        return ret;
 543}
 544#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 545asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos)
 546{
 547        return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf,
 548                             (size_t) count, pos);
 549}
 550SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64);
 551#endif
 552
 553/*
 554 * Reduce an iovec's length in-place.  Return the resulting number of segments
 555 */
 556unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 557{
 558        unsigned long seg = 0;
 559        size_t len = 0;
 560
 561        while (seg < nr_segs) {
 562                seg++;
 563                if (len + iov->iov_len >= to) {
 564                        iov->iov_len = to - len;
 565                        break;
 566                }
 567                len += iov->iov_len;
 568                iov++;
 569        }
 570        return seg;
 571}
 572EXPORT_SYMBOL(iov_shorten);
 573
 574ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 575                unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 576{
 577        struct kiocb kiocb;
 578        ssize_t ret;
 579
 580        init_sync_kiocb(&kiocb, filp);
 581        kiocb.ki_pos = *ppos;
 582        kiocb.ki_left = len;
 583        kiocb.ki_nbytes = len;
 584
 585        for (;;) {
 586                ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
 587                if (ret != -EIOCBRETRY)
 588                        break;
 589                wait_on_retry_sync_kiocb(&kiocb);
 590        }
 591
 592        if (ret == -EIOCBQUEUED)
 593                ret = wait_on_sync_kiocb(&kiocb);
 594        *ppos = kiocb.ki_pos;
 595        return ret;
 596}
 597
 598/* Do it by hand, with file-ops */
 599ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
 600                unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
 601{
 602        struct iovec *vector = iov;
 603        ssize_t ret = 0;
 604
 605        while (nr_segs > 0) {
 606                void __user *base;
 607                size_t len;
 608                ssize_t nr;
 609
 610                base = vector->iov_base;
 611                len = vector->iov_len;
 612                vector++;
 613                nr_segs--;
 614
 615                nr = fn(filp, base, len, ppos);
 616
 617                if (nr < 0) {
 618                        if (!ret)
 619                                ret = nr;
 620                        break;
 621                }
 622                ret += nr;
 623                if (nr != len)
 624                        break;
 625        }
 626
 627        return ret;
 628}
 629
 630/* A write operation does a read from user space and vice versa */
 631#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 632
 633ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 634                              unsigned long nr_segs, unsigned long fast_segs,
 635                              struct iovec *fast_pointer,
 636                              struct iovec **ret_pointer)
 637{
 638        unsigned long seg;
 639        ssize_t ret;
 640        struct iovec *iov = fast_pointer;
 641
 642        /*
 643         * SuS says "The readv() function *may* fail if the iovcnt argument
 644         * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
 645         * traditionally returned zero for zero segments, so...
 646         */
 647        if (nr_segs == 0) {
 648                ret = 0;
 649                goto out;
 650        }
 651
 652        /*
 653         * First get the "struct iovec" from user memory and
 654         * verify all the pointers
 655         */
 656        if (nr_segs > UIO_MAXIOV) {
 657                ret = -EINVAL;
 658                goto out;
 659        }
 660        if (nr_segs > fast_segs) {
 661                iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
 662                if (iov == NULL) {
 663                        ret = -ENOMEM;
 664                        goto out;
 665                }
 666        }
 667        if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
 668                ret = -EFAULT;
 669                goto out;
 670        }
 671
 672        /*
 673         * According to the Single Unix Specification we should return EINVAL
 674         * if an element length is < 0 when cast to ssize_t or if the
 675         * total length would overflow the ssize_t return value of the
 676         * system call.
 677         *
 678         * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
 679         * overflow case.
 680         */
 681        ret = 0;
 682        for (seg = 0; seg < nr_segs; seg++) {
 683                void __user *buf = iov[seg].iov_base;
 684                ssize_t len = (ssize_t)iov[seg].iov_len;
 685
 686                /* see if we we're about to use an invalid len or if
 687                 * it's about to overflow ssize_t */
 688                if (len < 0) {
 689                        ret = -EINVAL;
 690                        goto out;
 691                }
 692                if (type >= 0
 693                    && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
 694                        ret = -EFAULT;
 695                        goto out;
 696                }
 697                if (len > MAX_RW_COUNT - ret) {
 698                        len = MAX_RW_COUNT - ret;
 699                        iov[seg].iov_len = len;
 700                }
 701                ret += len;
 702        }
 703out:
 704        *ret_pointer = iov;
 705        return ret;
 706}
 707
 708static ssize_t do_readv_writev(int type, struct file *file,
 709                               const struct iovec __user * uvector,
 710                               unsigned long nr_segs, loff_t *pos)
 711{
 712        size_t tot_len;
 713        struct iovec iovstack[UIO_FASTIOV];
 714        struct iovec *iov = iovstack;
 715        ssize_t ret;
 716        io_fn_t fn;
 717        iov_fn_t fnv;
 718
 719        if (!file->f_op) {
 720                ret = -EINVAL;
 721                goto out;
 722        }
 723
 724        ret = rw_copy_check_uvector(type, uvector, nr_segs,
 725                                    ARRAY_SIZE(iovstack), iovstack, &iov);
 726        if (ret <= 0)
 727                goto out;
 728
 729        tot_len = ret;
 730        ret = rw_verify_area(type, file, pos, tot_len);
 731        if (ret < 0)
 732                goto out;
 733
 734        fnv = NULL;
 735        if (type == READ) {
 736                fn = file->f_op->read;
 737                fnv = file->f_op->aio_read;
 738        } else {
 739                fn = (io_fn_t)file->f_op->write;
 740                fnv = file->f_op->aio_write;
 741        }
 742
 743        if (fnv)
 744                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 745                                                pos, fnv);
 746        else
 747                ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 748
 749out:
 750        if (iov != iovstack)
 751                kfree(iov);
 752        if ((ret + (type == READ)) > 0) {
 753                if (type == READ)
 754                        fsnotify_access(file);
 755                else
 756                        fsnotify_modify(file);
 757        }
 758        return ret;
 759}
 760
 761ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 762                  unsigned long vlen, loff_t *pos)
 763{
 764        if (!(file->f_mode & FMODE_READ))
 765                return -EBADF;
 766        if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
 767                return -EINVAL;
 768
 769        return do_readv_writev(READ, file, vec, vlen, pos);
 770}
 771
 772EXPORT_SYMBOL(vfs_readv);
 773
 774ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 775                   unsigned long vlen, loff_t *pos)
 776{
 777        if (!(file->f_mode & FMODE_WRITE))
 778                return -EBADF;
 779        if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
 780                return -EINVAL;
 781
 782        return do_readv_writev(WRITE, file, vec, vlen, pos);
 783}
 784
 785EXPORT_SYMBOL(vfs_writev);
 786
 787SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
 788                unsigned long, vlen)
 789{
 790        struct file *file;
 791        ssize_t ret = -EBADF;
 792        int fput_needed;
 793
 794        file = fget_light(fd, &fput_needed);
 795        if (file) {
 796                loff_t pos = file_pos_read(file);
 797                ret = vfs_readv(file, vec, vlen, &pos);
 798                file_pos_write(file, pos);
 799                fput_light(file, fput_needed);
 800        }
 801
 802        if (ret > 0)
 803                add_rchar(current, ret);
 804        inc_syscr(current);
 805        return ret;
 806}
 807
 808SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
 809                unsigned long, vlen)
 810{
 811        struct file *file;
 812        ssize_t ret = -EBADF;
 813        int fput_needed;
 814
 815        file = fget_light(fd, &fput_needed);
 816        if (file) {
 817                loff_t pos = file_pos_read(file);
 818                ret = vfs_writev(file, vec, vlen, &pos);
 819                file_pos_write(file, pos);
 820                fput_light(file, fput_needed);
 821        }
 822
 823        if (ret > 0)
 824                add_wchar(current, ret);
 825        inc_syscw(current);
 826        return ret;
 827}
 828
 829static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
 830{
 831#define HALF_LONG_BITS (BITS_PER_LONG / 2)
 832        return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
 833}
 834
 835SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
 836                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 837{
 838        loff_t pos = pos_from_hilo(pos_h, pos_l);
 839        struct file *file;
 840        ssize_t ret = -EBADF;
 841        int fput_needed;
 842
 843        if (pos < 0)
 844                return -EINVAL;
 845
 846        file = fget_light(fd, &fput_needed);
 847        if (file) {
 848                ret = -ESPIPE;
 849                if (file->f_mode & FMODE_PREAD)
 850                        ret = vfs_readv(file, vec, vlen, &pos);
 851                fput_light(file, fput_needed);
 852        }
 853
 854        if (ret > 0)
 855                add_rchar(current, ret);
 856        inc_syscr(current);
 857        return ret;
 858}
 859
 860SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 861                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 862{
 863        loff_t pos = pos_from_hilo(pos_h, pos_l);
 864        struct file *file;
 865        ssize_t ret = -EBADF;
 866        int fput_needed;
 867
 868        if (pos < 0)
 869                return -EINVAL;
 870
 871        file = fget_light(fd, &fput_needed);
 872        if (file) {
 873                ret = -ESPIPE;
 874                if (file->f_mode & FMODE_PWRITE)
 875                        ret = vfs_writev(file, vec, vlen, &pos);
 876                fput_light(file, fput_needed);
 877        }
 878
 879        if (ret > 0)
 880                add_wchar(current, ret);
 881        inc_syscw(current);
 882        return ret;
 883}
 884
 885static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 886                           size_t count, loff_t max)
 887{
 888        struct file * in_file, * out_file;
 889        struct inode * in_inode, * out_inode;
 890        loff_t pos;
 891        ssize_t retval;
 892        int fput_needed_in, fput_needed_out, fl;
 893
 894        /*
 895         * Get input file, and verify that it is ok..
 896         */
 897        retval = -EBADF;
 898        in_file = fget_light(in_fd, &fput_needed_in);
 899        if (!in_file)
 900                goto out;
 901        if (!(in_file->f_mode & FMODE_READ))
 902                goto fput_in;
 903        retval = -ESPIPE;
 904        if (!ppos)
 905                ppos = &in_file->f_pos;
 906        else
 907                if (!(in_file->f_mode & FMODE_PREAD))
 908                        goto fput_in;
 909        retval = rw_verify_area(READ, in_file, ppos, count);
 910        if (retval < 0)
 911                goto fput_in;
 912        count = retval;
 913
 914        /*
 915         * Get output file, and verify that it is ok..
 916         */
 917        retval = -EBADF;
 918        out_file = fget_light(out_fd, &fput_needed_out);
 919        if (!out_file)
 920                goto fput_in;
 921        if (!(out_file->f_mode & FMODE_WRITE))
 922                goto fput_out;
 923        retval = -EINVAL;
 924        in_inode = in_file->f_path.dentry->d_inode;
 925        out_inode = out_file->f_path.dentry->d_inode;
 926        retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
 927        if (retval < 0)
 928                goto fput_out;
 929        count = retval;
 930
 931        if (!max)
 932                max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
 933
 934        pos = *ppos;
 935        if (unlikely(pos + count > max)) {
 936                retval = -EOVERFLOW;
 937                if (pos >= max)
 938                        goto fput_out;
 939                count = max - pos;
 940        }
 941
 942        fl = 0;
 943#if 0
 944        /*
 945         * We need to debate whether we can enable this or not. The
 946         * man page documents EAGAIN return for the output at least,
 947         * and the application is arguably buggy if it doesn't expect
 948         * EAGAIN on a non-blocking file descriptor.
 949         */
 950        if (in_file->f_flags & O_NONBLOCK)
 951                fl = SPLICE_F_NONBLOCK;
 952#endif
 953        retval = do_splice_direct(in_file, ppos, out_file, count, fl);
 954
 955        if (retval > 0) {
 956                add_rchar(current, retval);
 957                add_wchar(current, retval);
 958        }
 959
 960        inc_syscr(current);
 961        inc_syscw(current);
 962        if (*ppos > max)
 963                retval = -EOVERFLOW;
 964
 965fput_out:
 966        fput_light(out_file, fput_needed_out);
 967fput_in:
 968        fput_light(in_file, fput_needed_in);
 969out:
 970        return retval;
 971}
 972
 973SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
 974{
 975        loff_t pos;
 976        off_t off;
 977        ssize_t ret;
 978
 979        if (offset) {
 980                if (unlikely(get_user(off, offset)))
 981                        return -EFAULT;
 982                pos = off;
 983                ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
 984                if (unlikely(put_user(pos, offset)))
 985                        return -EFAULT;
 986                return ret;
 987        }
 988
 989        return do_sendfile(out_fd, in_fd, NULL, count, 0);
 990}
 991
 992SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
 993{
 994        loff_t pos;
 995        ssize_t ret;
 996
 997        if (offset) {
 998                if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
 999                        return -EFAULT;
1000                ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1001                if (unlikely(put_user(pos, offset)))
1002                        return -EFAULT;
1003                return ret;
1004        }
1005
1006        return do_sendfile(out_fd, in_fd, NULL, count, 0);
1007}
1008
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.