linux/fs/read_write.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/read_write.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/slab.h> 
   8#include <linux/stat.h>
   9#include <linux/fcntl.h>
  10#include <linux/file.h>
  11#include <linux/uio.h>
  12#include <linux/fsnotify.h>
  13#include <linux/security.h>
  14#include <linux/module.h>
  15#include <linux/syscalls.h>
  16#include <linux/pagemap.h>
  17#include <linux/splice.h>
  18#include "read_write.h"
  19
  20#include <asm/uaccess.h>
  21#include <asm/unistd.h>
  22
  23const struct file_operations generic_ro_fops = {
  24        .llseek         = generic_file_llseek,
  25        .read           = do_sync_read,
  26        .aio_read       = generic_file_aio_read,
  27        .mmap           = generic_file_readonly_mmap,
  28        .splice_read    = generic_file_splice_read,
  29};
  30
  31EXPORT_SYMBOL(generic_ro_fops);
  32
  33static inline int unsigned_offsets(struct file *file)
  34{
  35        return file->f_mode & FMODE_UNSIGNED_OFFSET;
  36}
  37
  38/**
  39 * generic_file_llseek_unlocked - lockless generic llseek implementation
  40 * @file:       file structure to seek on
  41 * @offset:     file offset to seek to
  42 * @origin:     type of seek
  43 *
  44 * Updates the file offset to the value specified by @offset and @origin.
  45 * Locking must be provided by the caller.
  46 */
  47loff_t
  48generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
  49{
  50        struct inode *inode = file->f_mapping->host;
  51
  52        switch (origin) {
  53        case SEEK_END:
  54                offset += inode->i_size;
  55                break;
  56        case SEEK_CUR:
  57                /*
  58                 * Here we special-case the lseek(fd, 0, SEEK_CUR)
  59                 * position-querying operation.  Avoid rewriting the "same"
  60                 * f_pos value back to the file because a concurrent read(),
  61                 * write() or lseek() might have altered it
  62                 */
  63                if (offset == 0)
  64                        return file->f_pos;
  65                offset += file->f_pos;
  66                break;
  67        case SEEK_DATA:
  68                /*
  69                 * In the generic case the entire file is data, so as long as
  70                 * offset isn't at the end of the file then the offset is data.
  71                 */
  72                if (offset >= inode->i_size)
  73                        return -ENXIO;
  74                break;
  75        case SEEK_HOLE:
  76                /*
  77                 * There is a virtual hole at the end of the file, so as long as
  78                 * offset isn't i_size or larger, return i_size.
  79                 */
  80                if (offset >= inode->i_size)
  81                        return -ENXIO;
  82                offset = inode->i_size;
  83                break;
  84        }
  85
  86        if (offset < 0 && !unsigned_offsets(file))
  87                return -EINVAL;
  88        if (offset > inode->i_sb->s_maxbytes)
  89                return -EINVAL;
  90
  91        /* Special lock needed here? */
  92        if (offset != file->f_pos) {
  93                file->f_pos = offset;
  94                file->f_version = 0;
  95        }
  96
  97        return offset;
  98}
  99EXPORT_SYMBOL(generic_file_llseek_unlocked);
 100
 101/**
 102 * generic_file_llseek - generic llseek implementation for regular files
 103 * @file:       file structure to seek on
 104 * @offset:     file offset to seek to
 105 * @origin:     type of seek
 106 *
 107 * This is a generic implemenation of ->llseek useable for all normal local
 108 * filesystems.  It just updates the file offset to the value specified by
 109 * @offset and @origin under i_mutex.
 110 */
 111loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
 112{
 113        loff_t rval;
 114
 115        mutex_lock(&file->f_dentry->d_inode->i_mutex);
 116        rval = generic_file_llseek_unlocked(file, offset, origin);
 117        mutex_unlock(&file->f_dentry->d_inode->i_mutex);
 118
 119        return rval;
 120}
 121EXPORT_SYMBOL(generic_file_llseek);
 122
 123/**
 124 * noop_llseek - No Operation Performed llseek implementation
 125 * @file:       file structure to seek on
 126 * @offset:     file offset to seek to
 127 * @origin:     type of seek
 128 *
 129 * This is an implementation of ->llseek useable for the rare special case when
 130 * userspace expects the seek to succeed but the (device) file is actually not
 131 * able to perform the seek. In this case you use noop_llseek() instead of
 132 * falling back to the default implementation of ->llseek.
 133 */
 134loff_t noop_llseek(struct file *file, loff_t offset, int origin)
 135{
 136        return file->f_pos;
 137}
 138EXPORT_SYMBOL(noop_llseek);
 139
 140loff_t no_llseek(struct file *file, loff_t offset, int origin)
 141{
 142        return -ESPIPE;
 143}
 144EXPORT_SYMBOL(no_llseek);
 145
 146loff_t default_llseek(struct file *file, loff_t offset, int origin)
 147{
 148        struct inode *inode = file->f_path.dentry->d_inode;
 149        loff_t retval;
 150
 151        mutex_lock(&inode->i_mutex);
 152        switch (origin) {
 153                case SEEK_END:
 154                        offset += i_size_read(inode);
 155                        break;
 156                case SEEK_CUR:
 157                        if (offset == 0) {
 158                                retval = file->f_pos;
 159                                goto out;
 160                        }
 161                        offset += file->f_pos;
 162                        break;
 163                case SEEK_DATA:
 164                        /*
 165                         * In the generic case the entire file is data, so as
 166                         * long as offset isn't at the end of the file then the
 167                         * offset is data.
 168                         */
 169                        if (offset >= inode->i_size) {
 170                                retval = -ENXIO;
 171                                goto out;
 172                        }
 173                        break;
 174                case SEEK_HOLE:
 175                        /*
 176                         * There is a virtual hole at the end of the file, so
 177                         * as long as offset isn't i_size or larger, return
 178                         * i_size.
 179                         */
 180                        if (offset >= inode->i_size) {
 181                                retval = -ENXIO;
 182                                goto out;
 183                        }
 184                        offset = inode->i_size;
 185                        break;
 186        }
 187        retval = -EINVAL;
 188        if (offset >= 0 || unsigned_offsets(file)) {
 189                if (offset != file->f_pos) {
 190                        file->f_pos = offset;
 191                        file->f_version = 0;
 192                }
 193                retval = offset;
 194        }
 195out:
 196        mutex_unlock(&inode->i_mutex);
 197        return retval;
 198}
 199EXPORT_SYMBOL(default_llseek);
 200
 201loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
 202{
 203        loff_t (*fn)(struct file *, loff_t, int);
 204
 205        fn = no_llseek;
 206        if (file->f_mode & FMODE_LSEEK) {
 207                if (file->f_op && file->f_op->llseek)
 208                        fn = file->f_op->llseek;
 209        }
 210        return fn(file, offset, origin);
 211}
 212EXPORT_SYMBOL(vfs_llseek);
 213
 214SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
 215{
 216        off_t retval;
 217        struct file * file;
 218        int fput_needed;
 219
 220        retval = -EBADF;
 221        file = fget_light(fd, &fput_needed);
 222        if (!file)
 223                goto bad;
 224
 225        retval = -EINVAL;
 226        if (origin <= SEEK_MAX) {
 227                loff_t res = vfs_llseek(file, offset, origin);
 228                retval = res;
 229                if (res != (loff_t)retval)
 230                        retval = -EOVERFLOW;    /* LFS: should only happen on 32 bit platforms */
 231        }
 232        fput_light(file, fput_needed);
 233bad:
 234        return retval;
 235}
 236
 237#ifdef __ARCH_WANT_SYS_LLSEEK
 238SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 239                unsigned long, offset_low, loff_t __user *, result,
 240                unsigned int, origin)
 241{
 242        int retval;
 243        struct file * file;
 244        loff_t offset;
 245        int fput_needed;
 246
 247        retval = -EBADF;
 248        file = fget_light(fd, &fput_needed);
 249        if (!file)
 250                goto bad;
 251
 252        retval = -EINVAL;
 253        if (origin > SEEK_MAX)
 254                goto out_putf;
 255
 256        offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
 257                        origin);
 258
 259        retval = (int)offset;
 260        if (offset >= 0) {
 261                retval = -EFAULT;
 262                if (!copy_to_user(result, &offset, sizeof(offset)))
 263                        retval = 0;
 264        }
 265out_putf:
 266        fput_light(file, fput_needed);
 267bad:
 268        return retval;
 269}
 270#endif
 271
 272
 273/*
 274 * rw_verify_area doesn't like huge counts. We limit
 275 * them to something that fits in "int" so that others
 276 * won't have to do range checks all the time.
 277 */
 278int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
 279{
 280        struct inode *inode;
 281        loff_t pos;
 282        int retval = -EINVAL;
 283
 284        inode = file->f_path.dentry->d_inode;
 285        if (unlikely((ssize_t) count < 0))
 286                return retval;
 287        pos = *ppos;
 288        if (unlikely(pos < 0)) {
 289                if (!unsigned_offsets(file))
 290                        return retval;
 291                if (count >= -pos) /* both values are in 0..LLONG_MAX */
 292                        return -EOVERFLOW;
 293        } else if (unlikely((loff_t) (pos + count) < 0)) {
 294                if (!unsigned_offsets(file))
 295                        return retval;
 296        }
 297
 298        if (unlikely(inode->i_flock && mandatory_lock(inode))) {
 299                retval = locks_mandatory_area(
 300                        read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
 301                        inode, file, pos, count);
 302                if (retval < 0)
 303                        return retval;
 304        }
 305        retval = security_file_permission(file,
 306                                read_write == READ ? MAY_READ : MAY_WRITE);
 307        if (retval)
 308                return retval;
 309        return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
 310}
 311
 312static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
 313{
 314        set_current_state(TASK_UNINTERRUPTIBLE);
 315        if (!kiocbIsKicked(iocb))
 316                schedule();
 317        else
 318                kiocbClearKicked(iocb);
 319        __set_current_state(TASK_RUNNING);
 320}
 321
 322ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 323{
 324        struct iovec iov = { .iov_base = buf, .iov_len = len };
 325        struct kiocb kiocb;
 326        ssize_t ret;
 327
 328        init_sync_kiocb(&kiocb, filp);
 329        kiocb.ki_pos = *ppos;
 330        kiocb.ki_left = len;
 331        kiocb.ki_nbytes = len;
 332
 333        for (;;) {
 334                ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
 335                if (ret != -EIOCBRETRY)
 336                        break;
 337                wait_on_retry_sync_kiocb(&kiocb);
 338        }
 339
 340        if (-EIOCBQUEUED == ret)
 341                ret = wait_on_sync_kiocb(&kiocb);
 342        *ppos = kiocb.ki_pos;
 343        return ret;
 344}
 345
 346EXPORT_SYMBOL(do_sync_read);
 347
 348ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 349{
 350        ssize_t ret;
 351
 352        if (!(file->f_mode & FMODE_READ))
 353                return -EBADF;
 354        if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
 355                return -EINVAL;
 356        if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
 357                return -EFAULT;
 358
 359        ret = rw_verify_area(READ, file, pos, count);
 360        if (ret >= 0) {
 361                count = ret;
 362                if (file->f_op->read)
 363                        ret = file->f_op->read(file, buf, count, pos);
 364                else
 365                        ret = do_sync_read(file, buf, count, pos);
 366                if (ret > 0) {
 367                        fsnotify_access(file);
 368                        add_rchar(current, ret);
 369                }
 370                inc_syscr(current);
 371        }
 372
 373        return ret;
 374}
 375
 376EXPORT_SYMBOL(vfs_read);
 377
 378ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 379{
 380        struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
 381        struct kiocb kiocb;
 382        ssize_t ret;
 383
 384        init_sync_kiocb(&kiocb, filp);
 385        kiocb.ki_pos = *ppos;
 386        kiocb.ki_left = len;
 387        kiocb.ki_nbytes = len;
 388
 389        for (;;) {
 390                ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
 391                if (ret != -EIOCBRETRY)
 392                        break;
 393                wait_on_retry_sync_kiocb(&kiocb);
 394        }
 395
 396        if (-EIOCBQUEUED == ret)
 397                ret = wait_on_sync_kiocb(&kiocb);
 398        *ppos = kiocb.ki_pos;
 399        return ret;
 400}
 401
 402EXPORT_SYMBOL(do_sync_write);
 403
 404ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
 405{
 406        ssize_t ret;
 407
 408        if (!(file->f_mode & FMODE_WRITE))
 409                return -EBADF;
 410        if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
 411                return -EINVAL;
 412        if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 413                return -EFAULT;
 414
 415        ret = rw_verify_area(WRITE, file, pos, count);
 416        if (ret >= 0) {
 417                count = ret;
 418                if (file->f_op->write)
 419                        ret = file->f_op->write(file, buf, count, pos);
 420                else
 421                        ret = do_sync_write(file, buf, count, pos);
 422                if (ret > 0) {
 423                        fsnotify_modify(file);
 424                        add_wchar(current, ret);
 425                }
 426                inc_syscw(current);
 427        }
 428
 429        return ret;
 430}
 431
 432EXPORT_SYMBOL(vfs_write);
 433
 434static inline loff_t file_pos_read(struct file *file)
 435{
 436        return file->f_pos;
 437}
 438
 439static inline void file_pos_write(struct file *file, loff_t pos)
 440{
 441        file->f_pos = pos;
 442}
 443
 444SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
 445{
 446        struct file *file;
 447        ssize_t ret = -EBADF;
 448        int fput_needed;
 449
 450        file = fget_light(fd, &fput_needed);
 451        if (file) {
 452                loff_t pos = file_pos_read(file);
 453                ret = vfs_read(file, buf, count, &pos);
 454                file_pos_write(file, pos);
 455                fput_light(file, fput_needed);
 456        }
 457
 458        return ret;
 459}
 460
 461SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
 462                size_t, count)
 463{
 464        struct file *file;
 465        ssize_t ret = -EBADF;
 466        int fput_needed;
 467
 468        file = fget_light(fd, &fput_needed);
 469        if (file) {
 470                loff_t pos = file_pos_read(file);
 471                ret = vfs_write(file, buf, count, &pos);
 472                file_pos_write(file, pos);
 473                fput_light(file, fput_needed);
 474        }
 475
 476        return ret;
 477}
 478
 479SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
 480                        size_t count, loff_t pos)
 481{
 482        struct file *file;
 483        ssize_t ret = -EBADF;
 484        int fput_needed;
 485
 486        if (pos < 0)
 487                return -EINVAL;
 488
 489        file = fget_light(fd, &fput_needed);
 490        if (file) {
 491                ret = -ESPIPE;
 492                if (file->f_mode & FMODE_PREAD)
 493                        ret = vfs_read(file, buf, count, &pos);
 494                fput_light(file, fput_needed);
 495        }
 496
 497        return ret;
 498}
 499#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 500asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos)
 501{
 502        return SYSC_pread64((unsigned int) fd, (char __user *) buf,
 503                            (size_t) count, pos);
 504}
 505SYSCALL_ALIAS(sys_pread64, SyS_pread64);
 506#endif
 507
 508SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
 509                         size_t count, loff_t pos)
 510{
 511        struct file *file;
 512        ssize_t ret = -EBADF;
 513        int fput_needed;
 514
 515        if (pos < 0)
 516                return -EINVAL;
 517
 518        file = fget_light(fd, &fput_needed);
 519        if (file) {
 520                ret = -ESPIPE;
 521                if (file->f_mode & FMODE_PWRITE)  
 522                        ret = vfs_write(file, buf, count, &pos);
 523                fput_light(file, fput_needed);
 524        }
 525
 526        return ret;
 527}
 528#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 529asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos)
 530{
 531        return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf,
 532                             (size_t) count, pos);
 533}
 534SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64);
 535#endif
 536
 537/*
 538 * Reduce an iovec's length in-place.  Return the resulting number of segments
 539 */
 540unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 541{
 542        unsigned long seg = 0;
 543        size_t len = 0;
 544
 545        while (seg < nr_segs) {
 546                seg++;
 547                if (len + iov->iov_len >= to) {
 548                        iov->iov_len = to - len;
 549                        break;
 550                }
 551                len += iov->iov_len;
 552                iov++;
 553        }
 554        return seg;
 555}
 556EXPORT_SYMBOL(iov_shorten);
 557
 558ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 559                unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 560{
 561        struct kiocb kiocb;
 562        ssize_t ret;
 563
 564        init_sync_kiocb(&kiocb, filp);
 565        kiocb.ki_pos = *ppos;
 566        kiocb.ki_left = len;
 567        kiocb.ki_nbytes = len;
 568
 569        for (;;) {
 570                ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
 571                if (ret != -EIOCBRETRY)
 572                        break;
 573                wait_on_retry_sync_kiocb(&kiocb);
 574        }
 575
 576        if (ret == -EIOCBQUEUED)
 577                ret = wait_on_sync_kiocb(&kiocb);
 578        *ppos = kiocb.ki_pos;
 579        return ret;
 580}
 581
 582/* Do it by hand, with file-ops */
 583ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
 584                unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
 585{
 586        struct iovec *vector = iov;
 587        ssize_t ret = 0;
 588
 589        while (nr_segs > 0) {
 590                void __user *base;
 591                size_t len;
 592                ssize_t nr;
 593
 594                base = vector->iov_base;
 595                len = vector->iov_len;
 596                vector++;
 597                nr_segs--;
 598
 599                nr = fn(filp, base, len, ppos);
 600
 601                if (nr < 0) {
 602                        if (!ret)
 603                                ret = nr;
 604                        break;
 605                }
 606                ret += nr;
 607                if (nr != len)
 608                        break;
 609        }
 610
 611        return ret;
 612}
 613
 614/* A write operation does a read from user space and vice versa */
 615#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 616
 617ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 618                              unsigned long nr_segs, unsigned long fast_segs,
 619                              struct iovec *fast_pointer,
 620                              struct iovec **ret_pointer)
 621{
 622        unsigned long seg;
 623        ssize_t ret;
 624        struct iovec *iov = fast_pointer;
 625
 626        /*
 627         * SuS says "The readv() function *may* fail if the iovcnt argument
 628         * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
 629         * traditionally returned zero for zero segments, so...
 630         */
 631        if (nr_segs == 0) {
 632                ret = 0;
 633                goto out;
 634        }
 635
 636        /*
 637         * First get the "struct iovec" from user memory and
 638         * verify all the pointers
 639         */
 640        if (nr_segs > UIO_MAXIOV) {
 641                ret = -EINVAL;
 642                goto out;
 643        }
 644        if (nr_segs > fast_segs) {
 645                iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
 646                if (iov == NULL) {
 647                        ret = -ENOMEM;
 648                        goto out;
 649                }
 650        }
 651        if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
 652                ret = -EFAULT;
 653                goto out;
 654        }
 655
 656        /*
 657         * According to the Single Unix Specification we should return EINVAL
 658         * if an element length is < 0 when cast to ssize_t or if the
 659         * total length would overflow the ssize_t return value of the
 660         * system call.
 661         *
 662         * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
 663         * overflow case.
 664         */
 665        ret = 0;
 666        for (seg = 0; seg < nr_segs; seg++) {
 667                void __user *buf = iov[seg].iov_base;
 668                ssize_t len = (ssize_t)iov[seg].iov_len;
 669
 670                /* see if we we're about to use an invalid len or if
 671                 * it's about to overflow ssize_t */
 672                if (len < 0) {
 673                        ret = -EINVAL;
 674                        goto out;
 675                }
 676                if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
 677                        ret = -EFAULT;
 678                        goto out;
 679                }
 680                if (len > MAX_RW_COUNT - ret) {
 681                        len = MAX_RW_COUNT - ret;
 682                        iov[seg].iov_len = len;
 683                }
 684                ret += len;
 685        }
 686out:
 687        *ret_pointer = iov;
 688        return ret;
 689}
 690
 691static ssize_t do_readv_writev(int type, struct file *file,
 692                               const struct iovec __user * uvector,
 693                               unsigned long nr_segs, loff_t *pos)
 694{
 695        size_t tot_len;
 696        struct iovec iovstack[UIO_FASTIOV];
 697        struct iovec *iov = iovstack;
 698        ssize_t ret;
 699        io_fn_t fn;
 700        iov_fn_t fnv;
 701
 702        if (!file->f_op) {
 703                ret = -EINVAL;
 704                goto out;
 705        }
 706
 707        ret = rw_copy_check_uvector(type, uvector, nr_segs,
 708                        ARRAY_SIZE(iovstack), iovstack, &iov);
 709        if (ret <= 0)
 710                goto out;
 711
 712        tot_len = ret;
 713        ret = rw_verify_area(type, file, pos, tot_len);
 714        if (ret < 0)
 715                goto out;
 716
 717        fnv = NULL;
 718        if (type == READ) {
 719                fn = file->f_op->read;
 720                fnv = file->f_op->aio_read;
 721        } else {
 722                fn = (io_fn_t)file->f_op->write;
 723                fnv = file->f_op->aio_write;
 724        }
 725
 726        if (fnv)
 727                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 728                                                pos, fnv);
 729        else
 730                ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 731
 732out:
 733        if (iov != iovstack)
 734                kfree(iov);
 735        if ((ret + (type == READ)) > 0) {
 736                if (type == READ)
 737                        fsnotify_access(file);
 738                else
 739                        fsnotify_modify(file);
 740        }
 741        return ret;
 742}
 743
 744ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 745                  unsigned long vlen, loff_t *pos)
 746{
 747        if (!(file->f_mode & FMODE_READ))
 748                return -EBADF;
 749        if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
 750                return -EINVAL;
 751
 752        return do_readv_writev(READ, file, vec, vlen, pos);
 753}
 754
 755EXPORT_SYMBOL(vfs_readv);
 756
 757ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 758                   unsigned long vlen, loff_t *pos)
 759{
 760        if (!(file->f_mode & FMODE_WRITE))
 761                return -EBADF;
 762        if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
 763                return -EINVAL;
 764
 765        return do_readv_writev(WRITE, file, vec, vlen, pos);
 766}
 767
 768EXPORT_SYMBOL(vfs_writev);
 769
 770SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
 771                unsigned long, vlen)
 772{
 773        struct file *file;
 774        ssize_t ret = -EBADF;
 775        int fput_needed;
 776
 777        file = fget_light(fd, &fput_needed);
 778        if (file) {
 779                loff_t pos = file_pos_read(file);
 780                ret = vfs_readv(file, vec, vlen, &pos);
 781                file_pos_write(file, pos);
 782                fput_light(file, fput_needed);
 783        }
 784
 785        if (ret > 0)
 786                add_rchar(current, ret);
 787        inc_syscr(current);
 788        return ret;
 789}
 790
 791SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
 792                unsigned long, vlen)
 793{
 794        struct file *file;
 795        ssize_t ret = -EBADF;
 796        int fput_needed;
 797
 798        file = fget_light(fd, &fput_needed);
 799        if (file) {
 800                loff_t pos = file_pos_read(file);
 801                ret = vfs_writev(file, vec, vlen, &pos);
 802                file_pos_write(file, pos);
 803                fput_light(file, fput_needed);
 804        }
 805
 806        if (ret > 0)
 807                add_wchar(current, ret);
 808        inc_syscw(current);
 809        return ret;
 810}
 811
 812static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
 813{
 814#define HALF_LONG_BITS (BITS_PER_LONG / 2)
 815        return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
 816}
 817
 818SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
 819                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 820{
 821        loff_t pos = pos_from_hilo(pos_h, pos_l);
 822        struct file *file;
 823        ssize_t ret = -EBADF;
 824        int fput_needed;
 825
 826        if (pos < 0)
 827                return -EINVAL;
 828
 829        file = fget_light(fd, &fput_needed);
 830        if (file) {
 831                ret = -ESPIPE;
 832                if (file->f_mode & FMODE_PREAD)
 833                        ret = vfs_readv(file, vec, vlen, &pos);
 834                fput_light(file, fput_needed);
 835        }
 836
 837        if (ret > 0)
 838                add_rchar(current, ret);
 839        inc_syscr(current);
 840        return ret;
 841}
 842
 843SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 844                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 845{
 846        loff_t pos = pos_from_hilo(pos_h, pos_l);
 847        struct file *file;
 848        ssize_t ret = -EBADF;
 849        int fput_needed;
 850
 851        if (pos < 0)
 852                return -EINVAL;
 853
 854        file = fget_light(fd, &fput_needed);
 855        if (file) {
 856                ret = -ESPIPE;
 857                if (file->f_mode & FMODE_PWRITE)
 858                        ret = vfs_writev(file, vec, vlen, &pos);
 859                fput_light(file, fput_needed);
 860        }
 861
 862        if (ret > 0)
 863                add_wchar(current, ret);
 864        inc_syscw(current);
 865        return ret;
 866}
 867
 868static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 869                           size_t count, loff_t max)
 870{
 871        struct file * in_file, * out_file;
 872        struct inode * in_inode, * out_inode;
 873        loff_t pos;
 874        ssize_t retval;
 875        int fput_needed_in, fput_needed_out, fl;
 876
 877        /*
 878         * Get input file, and verify that it is ok..
 879         */
 880        retval = -EBADF;
 881        in_file = fget_light(in_fd, &fput_needed_in);
 882        if (!in_file)
 883                goto out;
 884        if (!(in_file->f_mode & FMODE_READ))
 885                goto fput_in;
 886        retval = -ESPIPE;
 887        if (!ppos)
 888                ppos = &in_file->f_pos;
 889        else
 890                if (!(in_file->f_mode & FMODE_PREAD))
 891                        goto fput_in;
 892        retval = rw_verify_area(READ, in_file, ppos, count);
 893        if (retval < 0)
 894                goto fput_in;
 895        count = retval;
 896
 897        /*
 898         * Get output file, and verify that it is ok..
 899         */
 900        retval = -EBADF;
 901        out_file = fget_light(out_fd, &fput_needed_out);
 902        if (!out_file)
 903                goto fput_in;
 904        if (!(out_file->f_mode & FMODE_WRITE))
 905                goto fput_out;
 906        retval = -EINVAL;
 907        in_inode = in_file->f_path.dentry->d_inode;
 908        out_inode = out_file->f_path.dentry->d_inode;
 909        retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
 910        if (retval < 0)
 911                goto fput_out;
 912        count = retval;
 913
 914        if (!max)
 915                max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
 916
 917        pos = *ppos;
 918        if (unlikely(pos + count > max)) {
 919                retval = -EOVERFLOW;
 920                if (pos >= max)
 921                        goto fput_out;
 922                count = max - pos;
 923        }
 924
 925        fl = 0;
 926#if 0
 927        /*
 928         * We need to debate whether we can enable this or not. The
 929         * man page documents EAGAIN return for the output at least,
 930         * and the application is arguably buggy if it doesn't expect
 931         * EAGAIN on a non-blocking file descriptor.
 932         */
 933        if (in_file->f_flags & O_NONBLOCK)
 934                fl = SPLICE_F_NONBLOCK;
 935#endif
 936        retval = do_splice_direct(in_file, ppos, out_file, count, fl);
 937
 938        if (retval > 0) {
 939                add_rchar(current, retval);
 940                add_wchar(current, retval);
 941        }
 942
 943        inc_syscr(current);
 944        inc_syscw(current);
 945        if (*ppos > max)
 946                retval = -EOVERFLOW;
 947
 948fput_out:
 949        fput_light(out_file, fput_needed_out);
 950fput_in:
 951        fput_light(in_file, fput_needed_in);
 952out:
 953        return retval;
 954}
 955
 956SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
 957{
 958        loff_t pos;
 959        off_t off;
 960        ssize_t ret;
 961
 962        if (offset) {
 963                if (unlikely(get_user(off, offset)))
 964                        return -EFAULT;
 965                pos = off;
 966                ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
 967                if (unlikely(put_user(pos, offset)))
 968                        return -EFAULT;
 969                return ret;
 970        }
 971
 972        return do_sendfile(out_fd, in_fd, NULL, count, 0);
 973}
 974
 975SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
 976{
 977        loff_t pos;
 978        ssize_t ret;
 979
 980        if (offset) {
 981                if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
 982                        return -EFAULT;
 983                ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
 984                if (unlikely(put_user(pos, offset)))
 985                        return -EFAULT;
 986                return ret;
 987        }
 988
 989        return do_sendfile(out_fd, in_fd, NULL, count, 0);
 990}
 991
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.