linux/fs/read_write.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/read_write.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/slab.h> 
   8#include <linux/stat.h>
   9#include <linux/fcntl.h>
  10#include <linux/file.h>
  11#include <linux/uio.h>
  12#include <linux/aio.h>
  13#include <linux/fsnotify.h>
  14#include <linux/security.h>
  15#include <linux/export.h>
  16#include <linux/syscalls.h>
  17#include <linux/pagemap.h>
  18#include <linux/splice.h>
  19#include <linux/compat.h>
  20#include "internal.h"
  21
  22#include <asm/uaccess.h>
  23#include <asm/unistd.h>
  24
  25typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
  26typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
  27                unsigned long, loff_t);
  28
  29const struct file_operations generic_ro_fops = {
  30        .llseek         = generic_file_llseek,
  31        .read           = do_sync_read,
  32        .aio_read       = generic_file_aio_read,
  33        .mmap           = generic_file_readonly_mmap,
  34        .splice_read    = generic_file_splice_read,
  35};
  36
  37EXPORT_SYMBOL(generic_ro_fops);
  38
  39static inline int unsigned_offsets(struct file *file)
  40{
  41        return file->f_mode & FMODE_UNSIGNED_OFFSET;
  42}
  43
  44/**
  45 * vfs_setpos - update the file offset for lseek
  46 * @file:       file structure in question
  47 * @offset:     file offset to seek to
  48 * @maxsize:    maximum file size
  49 *
  50 * This is a low-level filesystem helper for updating the file offset to
  51 * the value specified by @offset if the given offset is valid and it is
  52 * not equal to the current file offset.
  53 *
  54 * Return the specified offset on success and -EINVAL on invalid offset.
  55 */
  56loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
  57{
  58        if (offset < 0 && !unsigned_offsets(file))
  59                return -EINVAL;
  60        if (offset > maxsize)
  61                return -EINVAL;
  62
  63        if (offset != file->f_pos) {
  64                file->f_pos = offset;
  65                file->f_version = 0;
  66        }
  67        return offset;
  68}
  69EXPORT_SYMBOL(vfs_setpos);
  70
  71/**
  72 * generic_file_llseek_size - generic llseek implementation for regular files
  73 * @file:       file structure to seek on
  74 * @offset:     file offset to seek to
  75 * @whence:     type of seek
  76 * @size:       max size of this file in file system
  77 * @eof:        offset used for SEEK_END position
  78 *
  79 * This is a variant of generic_file_llseek that allows passing in a custom
  80 * maximum file size and a custom EOF position, for e.g. hashed directories
  81 *
  82 * Synchronization:
  83 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
  84 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
  85 * read/writes behave like SEEK_SET against seeks.
  86 */
  87loff_t
  88generic_file_llseek_size(struct file *file, loff_t offset, int whence,
  89                loff_t maxsize, loff_t eof)
  90{
  91        switch (whence) {
  92        case SEEK_END:
  93                offset += eof;
  94                break;
  95        case SEEK_CUR:
  96                /*
  97                 * Here we special-case the lseek(fd, 0, SEEK_CUR)
  98                 * position-querying operation.  Avoid rewriting the "same"
  99                 * f_pos value back to the file because a concurrent read(),
 100                 * write() or lseek() might have altered it
 101                 */
 102                if (offset == 0)
 103                        return file->f_pos;
 104                /*
 105                 * f_lock protects against read/modify/write race with other
 106                 * SEEK_CURs. Note that parallel writes and reads behave
 107                 * like SEEK_SET.
 108                 */
 109                spin_lock(&file->f_lock);
 110                offset = vfs_setpos(file, file->f_pos + offset, maxsize);
 111                spin_unlock(&file->f_lock);
 112                return offset;
 113        case SEEK_DATA:
 114                /*
 115                 * In the generic case the entire file is data, so as long as
 116                 * offset isn't at the end of the file then the offset is data.
 117                 */
 118                if (offset >= eof)
 119                        return -ENXIO;
 120                break;
 121        case SEEK_HOLE:
 122                /*
 123                 * There is a virtual hole at the end of the file, so as long as
 124                 * offset isn't i_size or larger, return i_size.
 125                 */
 126                if (offset >= eof)
 127                        return -ENXIO;
 128                offset = eof;
 129                break;
 130        }
 131
 132        return vfs_setpos(file, offset, maxsize);
 133}
 134EXPORT_SYMBOL(generic_file_llseek_size);
 135
 136/**
 137 * generic_file_llseek - generic llseek implementation for regular files
 138 * @file:       file structure to seek on
 139 * @offset:     file offset to seek to
 140 * @whence:     type of seek
 141 *
 142 * This is a generic implemenation of ->llseek useable for all normal local
 143 * filesystems.  It just updates the file offset to the value specified by
 144 * @offset and @whence.
 145 */
 146loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
 147{
 148        struct inode *inode = file->f_mapping->host;
 149
 150        return generic_file_llseek_size(file, offset, whence,
 151                                        inode->i_sb->s_maxbytes,
 152                                        i_size_read(inode));
 153}
 154EXPORT_SYMBOL(generic_file_llseek);
 155
 156/**
 157 * fixed_size_llseek - llseek implementation for fixed-sized devices
 158 * @file:       file structure to seek on
 159 * @offset:     file offset to seek to
 160 * @whence:     type of seek
 161 * @size:       size of the file
 162 *
 163 */
 164loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
 165{
 166        switch (whence) {
 167        case SEEK_SET: case SEEK_CUR: case SEEK_END:
 168                return generic_file_llseek_size(file, offset, whence,
 169                                                size, size);
 170        default:
 171                return -EINVAL;
 172        }
 173}
 174EXPORT_SYMBOL(fixed_size_llseek);
 175
 176/**
 177 * noop_llseek - No Operation Performed llseek implementation
 178 * @file:       file structure to seek on
 179 * @offset:     file offset to seek to
 180 * @whence:     type of seek
 181 *
 182 * This is an implementation of ->llseek useable for the rare special case when
 183 * userspace expects the seek to succeed but the (device) file is actually not
 184 * able to perform the seek. In this case you use noop_llseek() instead of
 185 * falling back to the default implementation of ->llseek.
 186 */
 187loff_t noop_llseek(struct file *file, loff_t offset, int whence)
 188{
 189        return file->f_pos;
 190}
 191EXPORT_SYMBOL(noop_llseek);
 192
 193loff_t no_llseek(struct file *file, loff_t offset, int whence)
 194{
 195        return -ESPIPE;
 196}
 197EXPORT_SYMBOL(no_llseek);
 198
 199loff_t default_llseek(struct file *file, loff_t offset, int whence)
 200{
 201        struct inode *inode = file_inode(file);
 202        loff_t retval;
 203
 204        mutex_lock(&inode->i_mutex);
 205        switch (whence) {
 206                case SEEK_END:
 207                        offset += i_size_read(inode);
 208                        break;
 209                case SEEK_CUR:
 210                        if (offset == 0) {
 211                                retval = file->f_pos;
 212                                goto out;
 213                        }
 214                        offset += file->f_pos;
 215                        break;
 216                case SEEK_DATA:
 217                        /*
 218                         * In the generic case the entire file is data, so as
 219                         * long as offset isn't at the end of the file then the
 220                         * offset is data.
 221                         */
 222                        if (offset >= inode->i_size) {
 223                                retval = -ENXIO;
 224                                goto out;
 225                        }
 226                        break;
 227                case SEEK_HOLE:
 228                        /*
 229                         * There is a virtual hole at the end of the file, so
 230                         * as long as offset isn't i_size or larger, return
 231                         * i_size.
 232                         */
 233                        if (offset >= inode->i_size) {
 234                                retval = -ENXIO;
 235                                goto out;
 236                        }
 237                        offset = inode->i_size;
 238                        break;
 239        }
 240        retval = -EINVAL;
 241        if (offset >= 0 || unsigned_offsets(file)) {
 242                if (offset != file->f_pos) {
 243                        file->f_pos = offset;
 244                        file->f_version = 0;
 245                }
 246                retval = offset;
 247        }
 248out:
 249        mutex_unlock(&inode->i_mutex);
 250        return retval;
 251}
 252EXPORT_SYMBOL(default_llseek);
 253
 254loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
 255{
 256        loff_t (*fn)(struct file *, loff_t, int);
 257
 258        fn = no_llseek;
 259        if (file->f_mode & FMODE_LSEEK) {
 260                if (file->f_op->llseek)
 261                        fn = file->f_op->llseek;
 262        }
 263        return fn(file, offset, whence);
 264}
 265EXPORT_SYMBOL(vfs_llseek);
 266
 267static inline struct fd fdget_pos(int fd)
 268{
 269        return __to_fd(__fdget_pos(fd));
 270}
 271
 272static inline void fdput_pos(struct fd f)
 273{
 274        if (f.flags & FDPUT_POS_UNLOCK)
 275                mutex_unlock(&f.file->f_pos_lock);
 276        fdput(f);
 277}
 278
 279SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
 280{
 281        off_t retval;
 282        struct fd f = fdget_pos(fd);
 283        if (!f.file)
 284                return -EBADF;
 285
 286        retval = -EINVAL;
 287        if (whence <= SEEK_MAX) {
 288                loff_t res = vfs_llseek(f.file, offset, whence);
 289                retval = res;
 290                if (res != (loff_t)retval)
 291                        retval = -EOVERFLOW;    /* LFS: should only happen on 32 bit platforms */
 292        }
 293        fdput_pos(f);
 294        return retval;
 295}
 296
 297#ifdef CONFIG_COMPAT
 298COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
 299{
 300        return sys_lseek(fd, offset, whence);
 301}
 302#endif
 303
 304#ifdef __ARCH_WANT_SYS_LLSEEK
 305SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 306                unsigned long, offset_low, loff_t __user *, result,
 307                unsigned int, whence)
 308{
 309        int retval;
 310        struct fd f = fdget_pos(fd);
 311        loff_t offset;
 312
 313        if (!f.file)
 314                return -EBADF;
 315
 316        retval = -EINVAL;
 317        if (whence > SEEK_MAX)
 318                goto out_putf;
 319
 320        offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
 321                        whence);
 322
 323        retval = (int)offset;
 324        if (offset >= 0) {
 325                retval = -EFAULT;
 326                if (!copy_to_user(result, &offset, sizeof(offset)))
 327                        retval = 0;
 328        }
 329out_putf:
 330        fdput_pos(f);
 331        return retval;
 332}
 333#endif
 334
 335/*
 336 * rw_verify_area doesn't like huge counts. We limit
 337 * them to something that fits in "int" so that others
 338 * won't have to do range checks all the time.
 339 */
 340int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
 341{
 342        struct inode *inode;
 343        loff_t pos;
 344        int retval = -EINVAL;
 345
 346        inode = file_inode(file);
 347        if (unlikely((ssize_t) count < 0))
 348                return retval;
 349        pos = *ppos;
 350        if (unlikely(pos < 0)) {
 351                if (!unsigned_offsets(file))
 352                        return retval;
 353                if (count >= -pos) /* both values are in 0..LLONG_MAX */
 354                        return -EOVERFLOW;
 355        } else if (unlikely((loff_t) (pos + count) < 0)) {
 356                if (!unsigned_offsets(file))
 357                        return retval;
 358        }
 359
 360        if (unlikely(inode->i_flock && mandatory_lock(inode))) {
 361                retval = locks_mandatory_area(
 362                        read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
 363                        inode, file, pos, count);
 364                if (retval < 0)
 365                        return retval;
 366        }
 367        retval = security_file_permission(file,
 368                                read_write == READ ? MAY_READ : MAY_WRITE);
 369        if (retval)
 370                return retval;
 371        return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
 372}
 373
 374ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 375{
 376        struct iovec iov = { .iov_base = buf, .iov_len = len };
 377        struct kiocb kiocb;
 378        ssize_t ret;
 379
 380        init_sync_kiocb(&kiocb, filp);
 381        kiocb.ki_pos = *ppos;
 382        kiocb.ki_nbytes = len;
 383
 384        ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
 385        if (-EIOCBQUEUED == ret)
 386                ret = wait_on_sync_kiocb(&kiocb);
 387        *ppos = kiocb.ki_pos;
 388        return ret;
 389}
 390
 391EXPORT_SYMBOL(do_sync_read);
 392
 393ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 394{
 395        ssize_t ret;
 396
 397        if (!(file->f_mode & FMODE_READ))
 398                return -EBADF;
 399        if (!file->f_op->read && !file->f_op->aio_read)
 400                return -EINVAL;
 401        if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
 402                return -EFAULT;
 403
 404        ret = rw_verify_area(READ, file, pos, count);
 405        if (ret >= 0) {
 406                count = ret;
 407                if (file->f_op->read)
 408                        ret = file->f_op->read(file, buf, count, pos);
 409                else
 410                        ret = do_sync_read(file, buf, count, pos);
 411                if (ret > 0) {
 412                        fsnotify_access(file);
 413                        add_rchar(current, ret);
 414                }
 415                inc_syscr(current);
 416        }
 417
 418        return ret;
 419}
 420
 421EXPORT_SYMBOL(vfs_read);
 422
 423ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 424{
 425        struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
 426        struct kiocb kiocb;
 427        ssize_t ret;
 428
 429        init_sync_kiocb(&kiocb, filp);
 430        kiocb.ki_pos = *ppos;
 431        kiocb.ki_nbytes = len;
 432
 433        ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
 434        if (-EIOCBQUEUED == ret)
 435                ret = wait_on_sync_kiocb(&kiocb);
 436        *ppos = kiocb.ki_pos;
 437        return ret;
 438}
 439
 440EXPORT_SYMBOL(do_sync_write);
 441
 442ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
 443{
 444        mm_segment_t old_fs;
 445        const char __user *p;
 446        ssize_t ret;
 447
 448        if (!file->f_op->write && !file->f_op->aio_write)
 449                return -EINVAL;
 450
 451        old_fs = get_fs();
 452        set_fs(get_ds());
 453        p = (__force const char __user *)buf;
 454        if (count > MAX_RW_COUNT)
 455                count =  MAX_RW_COUNT;
 456        if (file->f_op->write)
 457                ret = file->f_op->write(file, p, count, pos);
 458        else
 459                ret = do_sync_write(file, p, count, pos);
 460        set_fs(old_fs);
 461        if (ret > 0) {
 462                fsnotify_modify(file);
 463                add_wchar(current, ret);
 464        }
 465        inc_syscw(current);
 466        return ret;
 467}
 468
 469ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
 470{
 471        ssize_t ret;
 472
 473        if (!(file->f_mode & FMODE_WRITE))
 474                return -EBADF;
 475        if (!file->f_op->write && !file->f_op->aio_write)
 476                return -EINVAL;
 477        if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 478                return -EFAULT;
 479
 480        ret = rw_verify_area(WRITE, file, pos, count);
 481        if (ret >= 0) {
 482                count = ret;
 483                file_start_write(file);
 484                if (file->f_op->write)
 485                        ret = file->f_op->write(file, buf, count, pos);
 486                else
 487                        ret = do_sync_write(file, buf, count, pos);
 488                if (ret > 0) {
 489                        fsnotify_modify(file);
 490                        add_wchar(current, ret);
 491                }
 492                inc_syscw(current);
 493                file_end_write(file);
 494        }
 495
 496        return ret;
 497}
 498
 499EXPORT_SYMBOL(vfs_write);
 500
 501static inline loff_t file_pos_read(struct file *file)
 502{
 503        return file->f_pos;
 504}
 505
 506static inline void file_pos_write(struct file *file, loff_t pos)
 507{
 508        file->f_pos = pos;
 509}
 510
 511SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
 512{
 513        struct fd f = fdget_pos(fd);
 514        ssize_t ret = -EBADF;
 515
 516        if (f.file) {
 517                loff_t pos = file_pos_read(f.file);
 518                ret = vfs_read(f.file, buf, count, &pos);
 519                if (ret >= 0)
 520                        file_pos_write(f.file, pos);
 521                fdput_pos(f);
 522        }
 523        return ret;
 524}
 525
 526SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
 527                size_t, count)
 528{
 529        struct fd f = fdget_pos(fd);
 530        ssize_t ret = -EBADF;
 531
 532        if (f.file) {
 533                loff_t pos = file_pos_read(f.file);
 534                ret = vfs_write(f.file, buf, count, &pos);
 535                if (ret >= 0)
 536                        file_pos_write(f.file, pos);
 537                fdput_pos(f);
 538        }
 539
 540        return ret;
 541}
 542
 543SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
 544                        size_t, count, loff_t, pos)
 545{
 546        struct fd f;
 547        ssize_t ret = -EBADF;
 548
 549        if (pos < 0)
 550                return -EINVAL;
 551
 552        f = fdget(fd);
 553        if (f.file) {
 554                ret = -ESPIPE;
 555                if (f.file->f_mode & FMODE_PREAD)
 556                        ret = vfs_read(f.file, buf, count, &pos);
 557                fdput(f);
 558        }
 559
 560        return ret;
 561}
 562
 563SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
 564                         size_t, count, loff_t, pos)
 565{
 566        struct fd f;
 567        ssize_t ret = -EBADF;
 568
 569        if (pos < 0)
 570                return -EINVAL;
 571
 572        f = fdget(fd);
 573        if (f.file) {
 574                ret = -ESPIPE;
 575                if (f.file->f_mode & FMODE_PWRITE)  
 576                        ret = vfs_write(f.file, buf, count, &pos);
 577                fdput(f);
 578        }
 579
 580        return ret;
 581}
 582
 583/*
 584 * Reduce an iovec's length in-place.  Return the resulting number of segments
 585 */
 586unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 587{
 588        unsigned long seg = 0;
 589        size_t len = 0;
 590
 591        while (seg < nr_segs) {
 592                seg++;
 593                if (len + iov->iov_len >= to) {
 594                        iov->iov_len = to - len;
 595                        break;
 596                }
 597                len += iov->iov_len;
 598                iov++;
 599        }
 600        return seg;
 601}
 602EXPORT_SYMBOL(iov_shorten);
 603
 604static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 605                unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 606{
 607        struct kiocb kiocb;
 608        ssize_t ret;
 609
 610        init_sync_kiocb(&kiocb, filp);
 611        kiocb.ki_pos = *ppos;
 612        kiocb.ki_nbytes = len;
 613
 614        ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
 615        if (ret == -EIOCBQUEUED)
 616                ret = wait_on_sync_kiocb(&kiocb);
 617        *ppos = kiocb.ki_pos;
 618        return ret;
 619}
 620
 621/* Do it by hand, with file-ops */
 622static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
 623                unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
 624{
 625        struct iovec *vector = iov;
 626        ssize_t ret = 0;
 627
 628        while (nr_segs > 0) {
 629                void __user *base;
 630                size_t len;
 631                ssize_t nr;
 632
 633                base = vector->iov_base;
 634                len = vector->iov_len;
 635                vector++;
 636                nr_segs--;
 637
 638                nr = fn(filp, base, len, ppos);
 639
 640                if (nr < 0) {
 641                        if (!ret)
 642                                ret = nr;
 643                        break;
 644                }
 645                ret += nr;
 646                if (nr != len)
 647                        break;
 648        }
 649
 650        return ret;
 651}
 652
 653/* A write operation does a read from user space and vice versa */
 654#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 655
 656ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 657                              unsigned long nr_segs, unsigned long fast_segs,
 658                              struct iovec *fast_pointer,
 659                              struct iovec **ret_pointer)
 660{
 661        unsigned long seg;
 662        ssize_t ret;
 663        struct iovec *iov = fast_pointer;
 664
 665        /*
 666         * SuS says "The readv() function *may* fail if the iovcnt argument
 667         * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
 668         * traditionally returned zero for zero segments, so...
 669         */
 670        if (nr_segs == 0) {
 671                ret = 0;
 672                goto out;
 673        }
 674
 675        /*
 676         * First get the "struct iovec" from user memory and
 677         * verify all the pointers
 678         */
 679        if (nr_segs > UIO_MAXIOV) {
 680                ret = -EINVAL;
 681                goto out;
 682        }
 683        if (nr_segs > fast_segs) {
 684                iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
 685                if (iov == NULL) {
 686                        ret = -ENOMEM;
 687                        goto out;
 688                }
 689        }
 690        if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
 691                ret = -EFAULT;
 692                goto out;
 693        }
 694
 695        /*
 696         * According to the Single Unix Specification we should return EINVAL
 697         * if an element length is < 0 when cast to ssize_t or if the
 698         * total length would overflow the ssize_t return value of the
 699         * system call.
 700         *
 701         * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
 702         * overflow case.
 703         */
 704        ret = 0;
 705        for (seg = 0; seg < nr_segs; seg++) {
 706                void __user *buf = iov[seg].iov_base;
 707                ssize_t len = (ssize_t)iov[seg].iov_len;
 708
 709                /* see if we we're about to use an invalid len or if
 710                 * it's about to overflow ssize_t */
 711                if (len < 0) {
 712                        ret = -EINVAL;
 713                        goto out;
 714                }
 715                if (type >= 0
 716                    && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
 717                        ret = -EFAULT;
 718                        goto out;
 719                }
 720                if (len > MAX_RW_COUNT - ret) {
 721                        len = MAX_RW_COUNT - ret;
 722                        iov[seg].iov_len = len;
 723                }
 724                ret += len;
 725        }
 726out:
 727        *ret_pointer = iov;
 728        return ret;
 729}
 730
 731static ssize_t do_readv_writev(int type, struct file *file,
 732                               const struct iovec __user * uvector,
 733                               unsigned long nr_segs, loff_t *pos)
 734{
 735        size_t tot_len;
 736        struct iovec iovstack[UIO_FASTIOV];
 737        struct iovec *iov = iovstack;
 738        ssize_t ret;
 739        io_fn_t fn;
 740        iov_fn_t fnv;
 741
 742        ret = rw_copy_check_uvector(type, uvector, nr_segs,
 743                                    ARRAY_SIZE(iovstack), iovstack, &iov);
 744        if (ret <= 0)
 745                goto out;
 746
 747        tot_len = ret;
 748        ret = rw_verify_area(type, file, pos, tot_len);
 749        if (ret < 0)
 750                goto out;
 751
 752        fnv = NULL;
 753        if (type == READ) {
 754                fn = file->f_op->read;
 755                fnv = file->f_op->aio_read;
 756        } else {
 757                fn = (io_fn_t)file->f_op->write;
 758                fnv = file->f_op->aio_write;
 759                file_start_write(file);
 760        }
 761
 762        if (fnv)
 763                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 764                                                pos, fnv);
 765        else
 766                ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 767
 768        if (type != READ)
 769                file_end_write(file);
 770
 771out:
 772        if (iov != iovstack)
 773                kfree(iov);
 774        if ((ret + (type == READ)) > 0) {
 775                if (type == READ)
 776                        fsnotify_access(file);
 777                else
 778                        fsnotify_modify(file);
 779        }
 780        return ret;
 781}
 782
 783ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 784                  unsigned long vlen, loff_t *pos)
 785{
 786        if (!(file->f_mode & FMODE_READ))
 787                return -EBADF;
 788        if (!file->f_op->aio_read && !file->f_op->read)
 789                return -EINVAL;
 790
 791        return do_readv_writev(READ, file, vec, vlen, pos);
 792}
 793
 794EXPORT_SYMBOL(vfs_readv);
 795
 796ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 797                   unsigned long vlen, loff_t *pos)
 798{
 799        if (!(file->f_mode & FMODE_WRITE))
 800                return -EBADF;
 801        if (!file->f_op->aio_write && !file->f_op->write)
 802                return -EINVAL;
 803
 804        return do_readv_writev(WRITE, file, vec, vlen, pos);
 805}
 806
 807EXPORT_SYMBOL(vfs_writev);
 808
 809SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
 810                unsigned long, vlen)
 811{
 812        struct fd f = fdget_pos(fd);
 813        ssize_t ret = -EBADF;
 814
 815        if (f.file) {
 816                loff_t pos = file_pos_read(f.file);
 817                ret = vfs_readv(f.file, vec, vlen, &pos);
 818                if (ret >= 0)
 819                        file_pos_write(f.file, pos);
 820                fdput_pos(f);
 821        }
 822
 823        if (ret > 0)
 824                add_rchar(current, ret);
 825        inc_syscr(current);
 826        return ret;
 827}
 828
 829SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
 830                unsigned long, vlen)
 831{
 832        struct fd f = fdget_pos(fd);
 833        ssize_t ret = -EBADF;
 834
 835        if (f.file) {
 836                loff_t pos = file_pos_read(f.file);
 837                ret = vfs_writev(f.file, vec, vlen, &pos);
 838                if (ret >= 0)
 839                        file_pos_write(f.file, pos);
 840                fdput_pos(f);
 841        }
 842
 843        if (ret > 0)
 844                add_wchar(current, ret);
 845        inc_syscw(current);
 846        return ret;
 847}
 848
 849static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
 850{
 851#define HALF_LONG_BITS (BITS_PER_LONG / 2)
 852        return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
 853}
 854
 855SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
 856                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 857{
 858        loff_t pos = pos_from_hilo(pos_h, pos_l);
 859        struct fd f;
 860        ssize_t ret = -EBADF;
 861
 862        if (pos < 0)
 863                return -EINVAL;
 864
 865        f = fdget(fd);
 866        if (f.file) {
 867                ret = -ESPIPE;
 868                if (f.file->f_mode & FMODE_PREAD)
 869                        ret = vfs_readv(f.file, vec, vlen, &pos);
 870                fdput(f);
 871        }
 872
 873        if (ret > 0)
 874                add_rchar(current, ret);
 875        inc_syscr(current);
 876        return ret;
 877}
 878
 879SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 880                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 881{
 882        loff_t pos = pos_from_hilo(pos_h, pos_l);
 883        struct fd f;
 884        ssize_t ret = -EBADF;
 885
 886        if (pos < 0)
 887                return -EINVAL;
 888
 889        f = fdget(fd);
 890        if (f.file) {
 891                ret = -ESPIPE;
 892                if (f.file->f_mode & FMODE_PWRITE)
 893                        ret = vfs_writev(f.file, vec, vlen, &pos);
 894                fdput(f);
 895        }
 896
 897        if (ret > 0)
 898                add_wchar(current, ret);
 899        inc_syscw(current);
 900        return ret;
 901}
 902
 903#ifdef CONFIG_COMPAT
 904
 905static ssize_t compat_do_readv_writev(int type, struct file *file,
 906                               const struct compat_iovec __user *uvector,
 907                               unsigned long nr_segs, loff_t *pos)
 908{
 909        compat_ssize_t tot_len;
 910        struct iovec iovstack[UIO_FASTIOV];
 911        struct iovec *iov = iovstack;
 912        ssize_t ret;
 913        io_fn_t fn;
 914        iov_fn_t fnv;
 915
 916        ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
 917                                               UIO_FASTIOV, iovstack, &iov);
 918        if (ret <= 0)
 919                goto out;
 920
 921        tot_len = ret;
 922        ret = rw_verify_area(type, file, pos, tot_len);
 923        if (ret < 0)
 924                goto out;
 925
 926        fnv = NULL;
 927        if (type == READ) {
 928                fn = file->f_op->read;
 929                fnv = file->f_op->aio_read;
 930        } else {
 931                fn = (io_fn_t)file->f_op->write;
 932                fnv = file->f_op->aio_write;
 933                file_start_write(file);
 934        }
 935
 936        if (fnv)
 937                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 938                                                pos, fnv);
 939        else
 940                ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 941
 942        if (type != READ)
 943                file_end_write(file);
 944
 945out:
 946        if (iov != iovstack)
 947                kfree(iov);
 948        if ((ret + (type == READ)) > 0) {
 949                if (type == READ)
 950                        fsnotify_access(file);
 951                else
 952                        fsnotify_modify(file);
 953        }
 954        return ret;
 955}
 956
 957static size_t compat_readv(struct file *file,
 958                           const struct compat_iovec __user *vec,
 959                           unsigned long vlen, loff_t *pos)
 960{
 961        ssize_t ret = -EBADF;
 962
 963        if (!(file->f_mode & FMODE_READ))
 964                goto out;
 965
 966        ret = -EINVAL;
 967        if (!file->f_op->aio_read && !file->f_op->read)
 968                goto out;
 969
 970        ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
 971
 972out:
 973        if (ret > 0)
 974                add_rchar(current, ret);
 975        inc_syscr(current);
 976        return ret;
 977}
 978
 979COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
 980                const struct compat_iovec __user *,vec,
 981                compat_ulong_t, vlen)
 982{
 983        struct fd f = fdget_pos(fd);
 984        ssize_t ret;
 985        loff_t pos;
 986
 987        if (!f.file)
 988                return -EBADF;
 989        pos = f.file->f_pos;
 990        ret = compat_readv(f.file, vec, vlen, &pos);
 991        if (ret >= 0)
 992                f.file->f_pos = pos;
 993        fdput_pos(f);
 994        return ret;
 995}
 996
 997COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
 998                const struct compat_iovec __user *,vec,
 999                unsigned long, vlen, loff_t, pos)
1000{
1001        struct fd f;
1002        ssize_t ret;
1003
1004        if (pos < 0)
1005                return -EINVAL;
1006        f = fdget(fd);
1007        if (!f.file)
1008                return -EBADF;
1009        ret = -ESPIPE;
1010        if (f.file->f_mode & FMODE_PREAD)
1011                ret = compat_readv(f.file, vec, vlen, &pos);
1012        fdput(f);
1013        return ret;
1014}
1015
1016COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
1017                const struct compat_iovec __user *,vec,
1018                compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1019{
1020        loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1021        return compat_sys_preadv64(fd, vec, vlen, pos);
1022}
1023
1024static size_t compat_writev(struct file *file,
1025                            const struct compat_iovec __user *vec,
1026                            unsigned long vlen, loff_t *pos)
1027{
1028        ssize_t ret = -EBADF;
1029
1030        if (!(file->f_mode & FMODE_WRITE))
1031                goto out;
1032
1033        ret = -EINVAL;
1034        if (!file->f_op->aio_write && !file->f_op->write)
1035                goto out;
1036
1037        ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
1038
1039out:
1040        if (ret > 0)
1041                add_wchar(current, ret);
1042        inc_syscw(current);
1043        return ret;
1044}
1045
1046COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
1047                const struct compat_iovec __user *, vec,
1048                compat_ulong_t, vlen)
1049{
1050        struct fd f = fdget_pos(fd);
1051        ssize_t ret;
1052        loff_t pos;
1053
1054        if (!f.file)
1055                return -EBADF;
1056        pos = f.file->f_pos;
1057        ret = compat_writev(f.file, vec, vlen, &pos);
1058        if (ret >= 0)
1059                f.file->f_pos = pos;
1060        fdput_pos(f);
1061        return ret;
1062}
1063
1064COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
1065                const struct compat_iovec __user *,vec,
1066                unsigned long, vlen, loff_t, pos)
1067{
1068        struct fd f;
1069        ssize_t ret;
1070
1071        if (pos < 0)
1072                return -EINVAL;
1073        f = fdget(fd);
1074        if (!f.file)
1075                return -EBADF;
1076        ret = -ESPIPE;
1077        if (f.file->f_mode & FMODE_PWRITE)
1078                ret = compat_writev(f.file, vec, vlen, &pos);
1079        fdput(f);
1080        return ret;
1081}
1082
1083COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
1084                const struct compat_iovec __user *,vec,
1085                compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1086{
1087        loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1088        return compat_sys_pwritev64(fd, vec, vlen, pos);
1089}
1090#endif
1091
1092static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1093                           size_t count, loff_t max)
1094{
1095        struct fd in, out;
1096        struct inode *in_inode, *out_inode;
1097        loff_t pos;
1098        loff_t out_pos;
1099        ssize_t retval;
1100        int fl;
1101
1102        /*
1103         * Get input file, and verify that it is ok..
1104         */
1105        retval = -EBADF;
1106        in = fdget(in_fd);
1107        if (!in.file)
1108                goto out;
1109        if (!(in.file->f_mode & FMODE_READ))
1110                goto fput_in;
1111        retval = -ESPIPE;
1112        if (!ppos) {
1113                pos = in.file->f_pos;
1114        } else {
1115                pos = *ppos;
1116                if (!(in.file->f_mode & FMODE_PREAD))
1117                        goto fput_in;
1118        }
1119        retval = rw_verify_area(READ, in.file, &pos, count);
1120        if (retval < 0)
1121                goto fput_in;
1122        count = retval;
1123
1124        /*
1125         * Get output file, and verify that it is ok..
1126         */
1127        retval = -EBADF;
1128        out = fdget(out_fd);
1129        if (!out.file)
1130                goto fput_in;
1131        if (!(out.file->f_mode & FMODE_WRITE))
1132                goto fput_out;
1133        retval = -EINVAL;
1134        in_inode = file_inode(in.file);
1135        out_inode = file_inode(out.file);
1136        out_pos = out.file->f_pos;
1137        retval = rw_verify_area(WRITE, out.file, &out_pos, count);
1138        if (retval < 0)
1139                goto fput_out;
1140        count = retval;
1141
1142        if (!max)
1143                max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1144
1145        if (unlikely(pos + count > max)) {
1146                retval = -EOVERFLOW;
1147                if (pos >= max)
1148                        goto fput_out;
1149                count = max - pos;
1150        }
1151
1152        fl = 0;
1153#if 0
1154        /*
1155         * We need to debate whether we can enable this or not. The
1156         * man page documents EAGAIN return for the output at least,
1157         * and the application is arguably buggy if it doesn't expect
1158         * EAGAIN on a non-blocking file descriptor.
1159         */
1160        if (in.file->f_flags & O_NONBLOCK)
1161                fl = SPLICE_F_NONBLOCK;
1162#endif
1163        file_start_write(out.file);
1164        retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
1165        file_end_write(out.file);
1166
1167        if (retval > 0) {
1168                add_rchar(current, retval);
1169                add_wchar(current, retval);
1170                fsnotify_access(in.file);
1171                fsnotify_modify(out.file);
1172                out.file->f_pos = out_pos;
1173                if (ppos)
1174                        *ppos = pos;
1175                else
1176                        in.file->f_pos = pos;
1177        }
1178
1179        inc_syscr(current);
1180        inc_syscw(current);
1181        if (pos > max)
1182                retval = -EOVERFLOW;
1183
1184fput_out:
1185        fdput(out);
1186fput_in:
1187        fdput(in);
1188out:
1189        return retval;
1190}
1191
1192SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1193{
1194        loff_t pos;
1195        off_t off;
1196        ssize_t ret;
1197
1198        if (offset) {
1199                if (unlikely(get_user(off, offset)))
1200                        return -EFAULT;
1201                pos = off;
1202                ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1203                if (unlikely(put_user(pos, offset)))
1204                        return -EFAULT;
1205                return ret;
1206        }
1207
1208        return do_sendfile(out_fd, in_fd, NULL, count, 0);
1209}
1210
1211SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1212{
1213        loff_t pos;
1214        ssize_t ret;
1215
1216        if (offset) {
1217                if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1218                        return -EFAULT;
1219                ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1220                if (unlikely(put_user(pos, offset)))
1221                        return -EFAULT;
1222                return ret;
1223        }
1224
1225        return do_sendfile(out_fd, in_fd, NULL, count, 0);
1226}
1227
1228#ifdef CONFIG_COMPAT
1229COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
1230                compat_off_t __user *, offset, compat_size_t, count)
1231{
1232        loff_t pos;
1233        off_t off;
1234        ssize_t ret;
1235
1236        if (offset) {
1237                if (unlikely(get_user(off, offset)))
1238                        return -EFAULT;
1239                pos = off;
1240                ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1241                if (unlikely(put_user(pos, offset)))
1242                        return -EFAULT;
1243                return ret;
1244        }
1245
1246        return do_sendfile(out_fd, in_fd, NULL, count, 0);
1247}
1248
1249COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
1250                compat_loff_t __user *, offset, compat_size_t, count)
1251{
1252        loff_t pos;
1253        ssize_t ret;
1254
1255        if (offset) {
1256                if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1257                        return -EFAULT;
1258                ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1259                if (unlikely(put_user(pos, offset)))
1260                        return -EFAULT;
1261                return ret;
1262        }
1263
1264        return do_sendfile(out_fd, in_fd, NULL, count, 0);
1265}
1266#endif
1267
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.