linux/fs/cifs/file.c
<<
>>
Prefs
   1// SPDX-License-Identifier: LGPL-2.1
   2/*
   3 *   fs/cifs/file.c
   4 *
   5 *   vfs operations that deal with files
   6 *
   7 *   Copyright (C) International Business Machines  Corp., 2002,2010
   8 *   Author(s): Steve French (sfrench@us.ibm.com)
   9 *              Jeremy Allison (jra@samba.org)
  10 *
  11 */
  12#include <linux/fs.h>
  13#include <linux/backing-dev.h>
  14#include <linux/stat.h>
  15#include <linux/fcntl.h>
  16#include <linux/pagemap.h>
  17#include <linux/pagevec.h>
  18#include <linux/writeback.h>
  19#include <linux/task_io_accounting_ops.h>
  20#include <linux/delay.h>
  21#include <linux/mount.h>
  22#include <linux/slab.h>
  23#include <linux/swap.h>
  24#include <linux/mm.h>
  25#include <asm/div64.h>
  26#include "cifsfs.h"
  27#include "cifspdu.h"
  28#include "cifsglob.h"
  29#include "cifsproto.h"
  30#include "cifs_unicode.h"
  31#include "cifs_debug.h"
  32#include "cifs_fs_sb.h"
  33#include "fscache.h"
  34#include "smbdirect.h"
  35#include "fs_context.h"
  36#include "cifs_ioctl.h"
  37
  38static inline int cifs_convert_flags(unsigned int flags)
  39{
  40        if ((flags & O_ACCMODE) == O_RDONLY)
  41                return GENERIC_READ;
  42        else if ((flags & O_ACCMODE) == O_WRONLY)
  43                return GENERIC_WRITE;
  44        else if ((flags & O_ACCMODE) == O_RDWR) {
  45                /* GENERIC_ALL is too much permission to request
  46                   can cause unnecessary access denied on create */
  47                /* return GENERIC_ALL; */
  48                return (GENERIC_READ | GENERIC_WRITE);
  49        }
  50
  51        return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  52                FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  53                FILE_READ_DATA);
  54}
  55
  56static u32 cifs_posix_convert_flags(unsigned int flags)
  57{
  58        u32 posix_flags = 0;
  59
  60        if ((flags & O_ACCMODE) == O_RDONLY)
  61                posix_flags = SMB_O_RDONLY;
  62        else if ((flags & O_ACCMODE) == O_WRONLY)
  63                posix_flags = SMB_O_WRONLY;
  64        else if ((flags & O_ACCMODE) == O_RDWR)
  65                posix_flags = SMB_O_RDWR;
  66
  67        if (flags & O_CREAT) {
  68                posix_flags |= SMB_O_CREAT;
  69                if (flags & O_EXCL)
  70                        posix_flags |= SMB_O_EXCL;
  71        } else if (flags & O_EXCL)
  72                cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  73                         current->comm, current->tgid);
  74
  75        if (flags & O_TRUNC)
  76                posix_flags |= SMB_O_TRUNC;
  77        /* be safe and imply O_SYNC for O_DSYNC */
  78        if (flags & O_DSYNC)
  79                posix_flags |= SMB_O_SYNC;
  80        if (flags & O_DIRECTORY)
  81                posix_flags |= SMB_O_DIRECTORY;
  82        if (flags & O_NOFOLLOW)
  83                posix_flags |= SMB_O_NOFOLLOW;
  84        if (flags & O_DIRECT)
  85                posix_flags |= SMB_O_DIRECT;
  86
  87        return posix_flags;
  88}
  89
  90static inline int cifs_get_disposition(unsigned int flags)
  91{
  92        if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
  93                return FILE_CREATE;
  94        else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
  95                return FILE_OVERWRITE_IF;
  96        else if ((flags & O_CREAT) == O_CREAT)
  97                return FILE_OPEN_IF;
  98        else if ((flags & O_TRUNC) == O_TRUNC)
  99                return FILE_OVERWRITE;
 100        else
 101                return FILE_OPEN;
 102}
 103
 104int cifs_posix_open(const char *full_path, struct inode **pinode,
 105                        struct super_block *sb, int mode, unsigned int f_flags,
 106                        __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 107{
 108        int rc;
 109        FILE_UNIX_BASIC_INFO *presp_data;
 110        __u32 posix_flags = 0;
 111        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 112        struct cifs_fattr fattr;
 113        struct tcon_link *tlink;
 114        struct cifs_tcon *tcon;
 115
 116        cifs_dbg(FYI, "posix open %s\n", full_path);
 117
 118        presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 119        if (presp_data == NULL)
 120                return -ENOMEM;
 121
 122        tlink = cifs_sb_tlink(cifs_sb);
 123        if (IS_ERR(tlink)) {
 124                rc = PTR_ERR(tlink);
 125                goto posix_open_ret;
 126        }
 127
 128        tcon = tlink_tcon(tlink);
 129        mode &= ~current_umask();
 130
 131        posix_flags = cifs_posix_convert_flags(f_flags);
 132        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 133                             poplock, full_path, cifs_sb->local_nls,
 134                             cifs_remap(cifs_sb));
 135        cifs_put_tlink(tlink);
 136
 137        if (rc)
 138                goto posix_open_ret;
 139
 140        if (presp_data->Type == cpu_to_le32(-1))
 141                goto posix_open_ret; /* open ok, caller does qpathinfo */
 142
 143        if (!pinode)
 144                goto posix_open_ret; /* caller does not need info */
 145
 146        cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 147
 148        /* get new inode and set it up */
 149        if (*pinode == NULL) {
 150                cifs_fill_uniqueid(sb, &fattr);
 151                *pinode = cifs_iget(sb, &fattr);
 152                if (!*pinode) {
 153                        rc = -ENOMEM;
 154                        goto posix_open_ret;
 155                }
 156        } else {
 157                cifs_revalidate_mapping(*pinode);
 158                rc = cifs_fattr_to_inode(*pinode, &fattr);
 159        }
 160
 161posix_open_ret:
 162        kfree(presp_data);
 163        return rc;
 164}
 165
 166static int
 167cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 168             struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 169             struct cifs_fid *fid, unsigned int xid)
 170{
 171        int rc;
 172        int desired_access;
 173        int disposition;
 174        int create_options = CREATE_NOT_DIR;
 175        FILE_ALL_INFO *buf;
 176        struct TCP_Server_Info *server = tcon->ses->server;
 177        struct cifs_open_parms oparms;
 178
 179        if (!server->ops->open)
 180                return -ENOSYS;
 181
 182        desired_access = cifs_convert_flags(f_flags);
 183
 184/*********************************************************************
 185 *  open flag mapping table:
 186 *
 187 *      POSIX Flag            CIFS Disposition
 188 *      ----------            ----------------
 189 *      O_CREAT               FILE_OPEN_IF
 190 *      O_CREAT | O_EXCL      FILE_CREATE
 191 *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 192 *      O_TRUNC               FILE_OVERWRITE
 193 *      none of the above     FILE_OPEN
 194 *
 195 *      Note that there is not a direct match between disposition
 196 *      FILE_SUPERSEDE (ie create whether or not file exists although
 197 *      O_CREAT | O_TRUNC is similar but truncates the existing
 198 *      file rather than creating a new file as FILE_SUPERSEDE does
 199 *      (which uses the attributes / metadata passed in on open call)
 200 *?
 201 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 202 *?  and the read write flags match reasonably.  O_LARGEFILE
 203 *?  is irrelevant because largefile support is always used
 204 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 205 *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 206 *********************************************************************/
 207
 208        disposition = cifs_get_disposition(f_flags);
 209
 210        /* BB pass O_SYNC flag through on file attributes .. BB */
 211
 212        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 213        if (!buf)
 214                return -ENOMEM;
 215
 216        /* O_SYNC also has bit for O_DSYNC so following check picks up either */
 217        if (f_flags & O_SYNC)
 218                create_options |= CREATE_WRITE_THROUGH;
 219
 220        if (f_flags & O_DIRECT)
 221                create_options |= CREATE_NO_BUFFER;
 222
 223        oparms.tcon = tcon;
 224        oparms.cifs_sb = cifs_sb;
 225        oparms.desired_access = desired_access;
 226        oparms.create_options = cifs_create_options(cifs_sb, create_options);
 227        oparms.disposition = disposition;
 228        oparms.path = full_path;
 229        oparms.fid = fid;
 230        oparms.reconnect = false;
 231
 232        rc = server->ops->open(xid, &oparms, oplock, buf);
 233
 234        if (rc)
 235                goto out;
 236
 237        /* TODO: Add support for calling posix query info but with passing in fid */
 238        if (tcon->unix_ext)
 239                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 240                                              xid);
 241        else
 242                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 243                                         xid, fid);
 244
 245        if (rc) {
 246                server->ops->close(xid, tcon, fid);
 247                if (rc == -ESTALE)
 248                        rc = -EOPENSTALE;
 249        }
 250
 251out:
 252        kfree(buf);
 253        return rc;
 254}
 255
 256static bool
 257cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 258{
 259        struct cifs_fid_locks *cur;
 260        bool has_locks = false;
 261
 262        down_read(&cinode->lock_sem);
 263        list_for_each_entry(cur, &cinode->llist, llist) {
 264                if (!list_empty(&cur->locks)) {
 265                        has_locks = true;
 266                        break;
 267                }
 268        }
 269        up_read(&cinode->lock_sem);
 270        return has_locks;
 271}
 272
 273void
 274cifs_down_write(struct rw_semaphore *sem)
 275{
 276        while (!down_write_trylock(sem))
 277                msleep(10);
 278}
 279
 280static void cifsFileInfo_put_work(struct work_struct *work);
 281
 282struct cifsFileInfo *
 283cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 284                  struct tcon_link *tlink, __u32 oplock)
 285{
 286        struct dentry *dentry = file_dentry(file);
 287        struct inode *inode = d_inode(dentry);
 288        struct cifsInodeInfo *cinode = CIFS_I(inode);
 289        struct cifsFileInfo *cfile;
 290        struct cifs_fid_locks *fdlocks;
 291        struct cifs_tcon *tcon = tlink_tcon(tlink);
 292        struct TCP_Server_Info *server = tcon->ses->server;
 293
 294        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 295        if (cfile == NULL)
 296                return cfile;
 297
 298        fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 299        if (!fdlocks) {
 300                kfree(cfile);
 301                return NULL;
 302        }
 303
 304        INIT_LIST_HEAD(&fdlocks->locks);
 305        fdlocks->cfile = cfile;
 306        cfile->llist = fdlocks;
 307
 308        cfile->count = 1;
 309        cfile->pid = current->tgid;
 310        cfile->uid = current_fsuid();
 311        cfile->dentry = dget(dentry);
 312        cfile->f_flags = file->f_flags;
 313        cfile->invalidHandle = false;
 314        cfile->deferred_close_scheduled = false;
 315        cfile->tlink = cifs_get_tlink(tlink);
 316        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 317        INIT_WORK(&cfile->put, cifsFileInfo_put_work);
 318        INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
 319        mutex_init(&cfile->fh_mutex);
 320        spin_lock_init(&cfile->file_info_lock);
 321
 322        cifs_sb_active(inode->i_sb);
 323
 324        /*
 325         * If the server returned a read oplock and we have mandatory brlocks,
 326         * set oplock level to None.
 327         */
 328        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 329                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 330                oplock = 0;
 331        }
 332
 333        cifs_down_write(&cinode->lock_sem);
 334        list_add(&fdlocks->llist, &cinode->llist);
 335        up_write(&cinode->lock_sem);
 336
 337        spin_lock(&tcon->open_file_lock);
 338        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 339                oplock = fid->pending_open->oplock;
 340        list_del(&fid->pending_open->olist);
 341
 342        fid->purge_cache = false;
 343        server->ops->set_fid(cfile, fid, oplock);
 344
 345        list_add(&cfile->tlist, &tcon->openFileList);
 346        atomic_inc(&tcon->num_local_opens);
 347
 348        /* if readable file instance put first in list*/
 349        spin_lock(&cinode->open_file_lock);
 350        if (file->f_mode & FMODE_READ)
 351                list_add(&cfile->flist, &cinode->openFileList);
 352        else
 353                list_add_tail(&cfile->flist, &cinode->openFileList);
 354        spin_unlock(&cinode->open_file_lock);
 355        spin_unlock(&tcon->open_file_lock);
 356
 357        if (fid->purge_cache)
 358                cifs_zap_mapping(inode);
 359
 360        file->private_data = cfile;
 361        return cfile;
 362}
 363
 364struct cifsFileInfo *
 365cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 366{
 367        spin_lock(&cifs_file->file_info_lock);
 368        cifsFileInfo_get_locked(cifs_file);
 369        spin_unlock(&cifs_file->file_info_lock);
 370        return cifs_file;
 371}
 372
 373static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
 374{
 375        struct inode *inode = d_inode(cifs_file->dentry);
 376        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 377        struct cifsLockInfo *li, *tmp;
 378        struct super_block *sb = inode->i_sb;
 379
 380        /*
 381         * Delete any outstanding lock records. We'll lose them when the file
 382         * is closed anyway.
 383         */
 384        cifs_down_write(&cifsi->lock_sem);
 385        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 386                list_del(&li->llist);
 387                cifs_del_lock_waiters(li);
 388                kfree(li);
 389        }
 390        list_del(&cifs_file->llist->llist);
 391        kfree(cifs_file->llist);
 392        up_write(&cifsi->lock_sem);
 393
 394        cifs_put_tlink(cifs_file->tlink);
 395        dput(cifs_file->dentry);
 396        cifs_sb_deactive(sb);
 397        kfree(cifs_file);
 398}
 399
 400static void cifsFileInfo_put_work(struct work_struct *work)
 401{
 402        struct cifsFileInfo *cifs_file = container_of(work,
 403                        struct cifsFileInfo, put);
 404
 405        cifsFileInfo_put_final(cifs_file);
 406}
 407
 408/**
 409 * cifsFileInfo_put - release a reference of file priv data
 410 *
 411 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
 412 *
 413 * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
 414 */
 415void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 416{
 417        _cifsFileInfo_put(cifs_file, true, true);
 418}
 419
 420/**
 421 * _cifsFileInfo_put - release a reference of file priv data
 422 *
 423 * This may involve closing the filehandle @cifs_file out on the
 424 * server. Must be called without holding tcon->open_file_lock,
 425 * cinode->open_file_lock and cifs_file->file_info_lock.
 426 *
 427 * If @wait_for_oplock_handler is true and we are releasing the last
 428 * reference, wait for any running oplock break handler of the file
 429 * and cancel any pending one.
 430 *
 431 * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
 432 * @wait_oplock_handler: must be false if called from oplock_break_handler
 433 * @offload:    not offloaded on close and oplock breaks
 434 *
 435 */
 436void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
 437                       bool wait_oplock_handler, bool offload)
 438{
 439        struct inode *inode = d_inode(cifs_file->dentry);
 440        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 441        struct TCP_Server_Info *server = tcon->ses->server;
 442        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 443        struct super_block *sb = inode->i_sb;
 444        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 445        struct cifs_fid fid;
 446        struct cifs_pending_open open;
 447        bool oplock_break_cancelled;
 448
 449        spin_lock(&tcon->open_file_lock);
 450        spin_lock(&cifsi->open_file_lock);
 451        spin_lock(&cifs_file->file_info_lock);
 452        if (--cifs_file->count > 0) {
 453                spin_unlock(&cifs_file->file_info_lock);
 454                spin_unlock(&cifsi->open_file_lock);
 455                spin_unlock(&tcon->open_file_lock);
 456                return;
 457        }
 458        spin_unlock(&cifs_file->file_info_lock);
 459
 460        if (server->ops->get_lease_key)
 461                server->ops->get_lease_key(inode, &fid);
 462
 463        /* store open in pending opens to make sure we don't miss lease break */
 464        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 465
 466        /* remove it from the lists */
 467        list_del(&cifs_file->flist);
 468        list_del(&cifs_file->tlist);
 469        atomic_dec(&tcon->num_local_opens);
 470
 471        if (list_empty(&cifsi->openFileList)) {
 472                cifs_dbg(FYI, "closing last open instance for inode %p\n",
 473                         d_inode(cifs_file->dentry));
 474                /*
 475                 * In strict cache mode we need invalidate mapping on the last
 476                 * close  because it may cause a error when we open this file
 477                 * again and get at least level II oplock.
 478                 */
 479                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 480                        set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 481                cifs_set_oplock_level(cifsi, 0);
 482        }
 483
 484        spin_unlock(&cifsi->open_file_lock);
 485        spin_unlock(&tcon->open_file_lock);
 486
 487        oplock_break_cancelled = wait_oplock_handler ?
 488                cancel_work_sync(&cifs_file->oplock_break) : false;
 489
 490        if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 491                struct TCP_Server_Info *server = tcon->ses->server;
 492                unsigned int xid;
 493
 494                xid = get_xid();
 495                if (server->ops->close_getattr)
 496                        server->ops->close_getattr(xid, tcon, cifs_file);
 497                else if (server->ops->close)
 498                        server->ops->close(xid, tcon, &cifs_file->fid);
 499                _free_xid(xid);
 500        }
 501
 502        if (oplock_break_cancelled)
 503                cifs_done_oplock_break(cifsi);
 504
 505        cifs_del_pending_open(&open);
 506
 507        if (offload)
 508                queue_work(fileinfo_put_wq, &cifs_file->put);
 509        else
 510                cifsFileInfo_put_final(cifs_file);
 511}
 512
 513int cifs_open(struct inode *inode, struct file *file)
 514
 515{
 516        int rc = -EACCES;
 517        unsigned int xid;
 518        __u32 oplock;
 519        struct cifs_sb_info *cifs_sb;
 520        struct TCP_Server_Info *server;
 521        struct cifs_tcon *tcon;
 522        struct tcon_link *tlink;
 523        struct cifsFileInfo *cfile = NULL;
 524        void *page;
 525        const char *full_path;
 526        bool posix_open_ok = false;
 527        struct cifs_fid fid;
 528        struct cifs_pending_open open;
 529
 530        xid = get_xid();
 531
 532        cifs_sb = CIFS_SB(inode->i_sb);
 533        if (unlikely(cifs_forced_shutdown(cifs_sb))) {
 534                free_xid(xid);
 535                return -EIO;
 536        }
 537
 538        tlink = cifs_sb_tlink(cifs_sb);
 539        if (IS_ERR(tlink)) {
 540                free_xid(xid);
 541                return PTR_ERR(tlink);
 542        }
 543        tcon = tlink_tcon(tlink);
 544        server = tcon->ses->server;
 545
 546        page = alloc_dentry_path();
 547        full_path = build_path_from_dentry(file_dentry(file), page);
 548        if (IS_ERR(full_path)) {
 549                rc = PTR_ERR(full_path);
 550                goto out;
 551        }
 552
 553        cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 554                 inode, file->f_flags, full_path);
 555
 556        if (file->f_flags & O_DIRECT &&
 557            cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 558                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 559                        file->f_op = &cifs_file_direct_nobrl_ops;
 560                else
 561                        file->f_op = &cifs_file_direct_ops;
 562        }
 563
 564        /* Get the cached handle as SMB2 close is deferred */
 565        rc = cifs_get_readable_path(tcon, full_path, &cfile);
 566        if (rc == 0) {
 567                if (file->f_flags == cfile->f_flags) {
 568                        file->private_data = cfile;
 569                        spin_lock(&CIFS_I(inode)->deferred_lock);
 570                        cifs_del_deferred_close(cfile);
 571                        spin_unlock(&CIFS_I(inode)->deferred_lock);
 572                        goto out;
 573                } else {
 574                        _cifsFileInfo_put(cfile, true, false);
 575                }
 576        }
 577
 578        if (server->oplocks)
 579                oplock = REQ_OPLOCK;
 580        else
 581                oplock = 0;
 582
 583        if (!tcon->broken_posix_open && tcon->unix_ext &&
 584            cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 585                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 586                /* can not refresh inode info since size could be stale */
 587                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 588                                cifs_sb->ctx->file_mode /* ignored */,
 589                                file->f_flags, &oplock, &fid.netfid, xid);
 590                if (rc == 0) {
 591                        cifs_dbg(FYI, "posix open succeeded\n");
 592                        posix_open_ok = true;
 593                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 594                        if (tcon->ses->serverNOS)
 595                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 596                                         tcon->ses->ip_addr,
 597                                         tcon->ses->serverNOS);
 598                        tcon->broken_posix_open = true;
 599                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 600                         (rc != -EOPNOTSUPP)) /* path not found or net err */
 601                        goto out;
 602                /*
 603                 * Else fallthrough to retry open the old way on network i/o
 604                 * or DFS errors.
 605                 */
 606        }
 607
 608        if (server->ops->get_lease_key)
 609                server->ops->get_lease_key(inode, &fid);
 610
 611        cifs_add_pending_open(&fid, tlink, &open);
 612
 613        if (!posix_open_ok) {
 614                if (server->ops->get_lease_key)
 615                        server->ops->get_lease_key(inode, &fid);
 616
 617                rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 618                                  file->f_flags, &oplock, &fid, xid);
 619                if (rc) {
 620                        cifs_del_pending_open(&open);
 621                        goto out;
 622                }
 623        }
 624
 625        cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 626        if (cfile == NULL) {
 627                if (server->ops->close)
 628                        server->ops->close(xid, tcon, &fid);
 629                cifs_del_pending_open(&open);
 630                rc = -ENOMEM;
 631                goto out;
 632        }
 633
 634        cifs_fscache_set_inode_cookie(inode, file);
 635
 636        if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 637                /*
 638                 * Time to set mode which we can not set earlier due to
 639                 * problems creating new read-only files.
 640                 */
 641                struct cifs_unix_set_info_args args = {
 642                        .mode   = inode->i_mode,
 643                        .uid    = INVALID_UID, /* no change */
 644                        .gid    = INVALID_GID, /* no change */
 645                        .ctime  = NO_CHANGE_64,
 646                        .atime  = NO_CHANGE_64,
 647                        .mtime  = NO_CHANGE_64,
 648                        .device = 0,
 649                };
 650                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 651                                       cfile->pid);
 652        }
 653
 654out:
 655        free_dentry_path(page);
 656        free_xid(xid);
 657        cifs_put_tlink(tlink);
 658        return rc;
 659}
 660
 661static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 662
 663/*
 664 * Try to reacquire byte range locks that were released when session
 665 * to server was lost.
 666 */
 667static int
 668cifs_relock_file(struct cifsFileInfo *cfile)
 669{
 670        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 671        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 672        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 673        int rc = 0;
 674
 675        down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
 676        if (cinode->can_cache_brlcks) {
 677                /* can cache locks - no need to relock */
 678                up_read(&cinode->lock_sem);
 679                return rc;
 680        }
 681
 682        if (cap_unix(tcon->ses) &&
 683            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 684            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 685                rc = cifs_push_posix_locks(cfile);
 686        else
 687                rc = tcon->ses->server->ops->push_mand_locks(cfile);
 688
 689        up_read(&cinode->lock_sem);
 690        return rc;
 691}
 692
 693static int
 694cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 695{
 696        int rc = -EACCES;
 697        unsigned int xid;
 698        __u32 oplock;
 699        struct cifs_sb_info *cifs_sb;
 700        struct cifs_tcon *tcon;
 701        struct TCP_Server_Info *server;
 702        struct cifsInodeInfo *cinode;
 703        struct inode *inode;
 704        void *page;
 705        const char *full_path;
 706        int desired_access;
 707        int disposition = FILE_OPEN;
 708        int create_options = CREATE_NOT_DIR;
 709        struct cifs_open_parms oparms;
 710
 711        xid = get_xid();
 712        mutex_lock(&cfile->fh_mutex);
 713        if (!cfile->invalidHandle) {
 714                mutex_unlock(&cfile->fh_mutex);
 715                free_xid(xid);
 716                return 0;
 717        }
 718
 719        inode = d_inode(cfile->dentry);
 720        cifs_sb = CIFS_SB(inode->i_sb);
 721        tcon = tlink_tcon(cfile->tlink);
 722        server = tcon->ses->server;
 723
 724        /*
 725         * Can not grab rename sem here because various ops, including those
 726         * that already have the rename sem can end up causing writepage to get
 727         * called and if the server was down that means we end up here, and we
 728         * can never tell if the caller already has the rename_sem.
 729         */
 730        page = alloc_dentry_path();
 731        full_path = build_path_from_dentry(cfile->dentry, page);
 732        if (IS_ERR(full_path)) {
 733                mutex_unlock(&cfile->fh_mutex);
 734                free_dentry_path(page);
 735                free_xid(xid);
 736                return PTR_ERR(full_path);
 737        }
 738
 739        cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 740                 inode, cfile->f_flags, full_path);
 741
 742        if (tcon->ses->server->oplocks)
 743                oplock = REQ_OPLOCK;
 744        else
 745                oplock = 0;
 746
 747        if (tcon->unix_ext && cap_unix(tcon->ses) &&
 748            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 749                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 750                /*
 751                 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 752                 * original open. Must mask them off for a reopen.
 753                 */
 754                unsigned int oflags = cfile->f_flags &
 755                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 756
 757                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 758                                     cifs_sb->ctx->file_mode /* ignored */,
 759                                     oflags, &oplock, &cfile->fid.netfid, xid);
 760                if (rc == 0) {
 761                        cifs_dbg(FYI, "posix reopen succeeded\n");
 762                        oparms.reconnect = true;
 763                        goto reopen_success;
 764                }
 765                /*
 766                 * fallthrough to retry open the old way on errors, especially
 767                 * in the reconnect path it is important to retry hard
 768                 */
 769        }
 770
 771        desired_access = cifs_convert_flags(cfile->f_flags);
 772
 773        /* O_SYNC also has bit for O_DSYNC so following check picks up either */
 774        if (cfile->f_flags & O_SYNC)
 775                create_options |= CREATE_WRITE_THROUGH;
 776
 777        if (cfile->f_flags & O_DIRECT)
 778                create_options |= CREATE_NO_BUFFER;
 779
 780        if (server->ops->get_lease_key)
 781                server->ops->get_lease_key(inode, &cfile->fid);
 782
 783        oparms.tcon = tcon;
 784        oparms.cifs_sb = cifs_sb;
 785        oparms.desired_access = desired_access;
 786        oparms.create_options = cifs_create_options(cifs_sb, create_options);
 787        oparms.disposition = disposition;
 788        oparms.path = full_path;
 789        oparms.fid = &cfile->fid;
 790        oparms.reconnect = true;
 791
 792        /*
 793         * Can not refresh inode by passing in file_info buf to be returned by
 794         * ops->open and then calling get_inode_info with returned buf since
 795         * file might have write behind data that needs to be flushed and server
 796         * version of file size can be stale. If we knew for sure that inode was
 797         * not dirty locally we could do this.
 798         */
 799        rc = server->ops->open(xid, &oparms, &oplock, NULL);
 800        if (rc == -ENOENT && oparms.reconnect == false) {
 801                /* durable handle timeout is expired - open the file again */
 802                rc = server->ops->open(xid, &oparms, &oplock, NULL);
 803                /* indicate that we need to relock the file */
 804                oparms.reconnect = true;
 805        }
 806
 807        if (rc) {
 808                mutex_unlock(&cfile->fh_mutex);
 809                cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 810                cifs_dbg(FYI, "oplock: %d\n", oplock);
 811                goto reopen_error_exit;
 812        }
 813
 814reopen_success:
 815        cfile->invalidHandle = false;
 816        mutex_unlock(&cfile->fh_mutex);
 817        cinode = CIFS_I(inode);
 818
 819        if (can_flush) {
 820                rc = filemap_write_and_wait(inode->i_mapping);
 821                if (!is_interrupt_error(rc))
 822                        mapping_set_error(inode->i_mapping, rc);
 823
 824                if (tcon->posix_extensions)
 825                        rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
 826                else if (tcon->unix_ext)
 827                        rc = cifs_get_inode_info_unix(&inode, full_path,
 828                                                      inode->i_sb, xid);
 829                else
 830                        rc = cifs_get_inode_info(&inode, full_path, NULL,
 831                                                 inode->i_sb, xid, NULL);
 832        }
 833        /*
 834         * Else we are writing out data to server already and could deadlock if
 835         * we tried to flush data, and since we do not know if we have data that
 836         * would invalidate the current end of file on the server we can not go
 837         * to the server to get the new inode info.
 838         */
 839
 840        /*
 841         * If the server returned a read oplock and we have mandatory brlocks,
 842         * set oplock level to None.
 843         */
 844        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 845                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 846                oplock = 0;
 847        }
 848
 849        server->ops->set_fid(cfile, &cfile->fid, oplock);
 850        if (oparms.reconnect)
 851                cifs_relock_file(cfile);
 852
 853reopen_error_exit:
 854        free_dentry_path(page);
 855        free_xid(xid);
 856        return rc;
 857}
 858
 859void smb2_deferred_work_close(struct work_struct *work)
 860{
 861        struct cifsFileInfo *cfile = container_of(work,
 862                        struct cifsFileInfo, deferred.work);
 863
 864        spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
 865        cifs_del_deferred_close(cfile);
 866        cfile->deferred_close_scheduled = false;
 867        spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
 868        _cifsFileInfo_put(cfile, true, false);
 869}
 870
 871int cifs_close(struct inode *inode, struct file *file)
 872{
 873        struct cifsFileInfo *cfile;
 874        struct cifsInodeInfo *cinode = CIFS_I(inode);
 875        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 876        struct cifs_deferred_close *dclose;
 877
 878        if (file->private_data != NULL) {
 879                cfile = file->private_data;
 880                file->private_data = NULL;
 881                dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
 882                if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
 883                    cinode->lease_granted &&
 884                    dclose) {
 885                        if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
 886                                inode->i_ctime = inode->i_mtime = current_time(inode);
 887                        spin_lock(&cinode->deferred_lock);
 888                        cifs_add_deferred_close(cfile, dclose);
 889                        if (cfile->deferred_close_scheduled &&
 890                            delayed_work_pending(&cfile->deferred)) {
 891                                /*
 892                                 * If there is no pending work, mod_delayed_work queues new work.
 893                                 * So, Increase the ref count to avoid use-after-free.
 894                                 */
 895                                if (!mod_delayed_work(deferredclose_wq,
 896                                                &cfile->deferred, cifs_sb->ctx->acregmax))
 897                                        cifsFileInfo_get(cfile);
 898                        } else {
 899                                /* Deferred close for files */
 900                                queue_delayed_work(deferredclose_wq,
 901                                                &cfile->deferred, cifs_sb->ctx->acregmax);
 902                                cfile->deferred_close_scheduled = true;
 903                                spin_unlock(&cinode->deferred_lock);
 904                                return 0;
 905                        }
 906                        spin_unlock(&cinode->deferred_lock);
 907                        _cifsFileInfo_put(cfile, true, false);
 908                } else {
 909                        _cifsFileInfo_put(cfile, true, false);
 910                        kfree(dclose);
 911                }
 912        }
 913
 914        /* return code from the ->release op is always ignored */
 915        return 0;
 916}
 917
 918void
 919cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
 920{
 921        struct cifsFileInfo *open_file;
 922        struct list_head *tmp;
 923        struct list_head *tmp1;
 924        struct list_head tmp_list;
 925
 926        if (!tcon->use_persistent || !tcon->need_reopen_files)
 927                return;
 928
 929        tcon->need_reopen_files = false;
 930
 931        cifs_dbg(FYI, "Reopen persistent handles\n");
 932        INIT_LIST_HEAD(&tmp_list);
 933
 934        /* list all files open on tree connection, reopen resilient handles  */
 935        spin_lock(&tcon->open_file_lock);
 936        list_for_each(tmp, &tcon->openFileList) {
 937                open_file = list_entry(tmp, struct cifsFileInfo, tlist);
 938                if (!open_file->invalidHandle)
 939                        continue;
 940                cifsFileInfo_get(open_file);
 941                list_add_tail(&open_file->rlist, &tmp_list);
 942        }
 943        spin_unlock(&tcon->open_file_lock);
 944
 945        list_for_each_safe(tmp, tmp1, &tmp_list) {
 946                open_file = list_entry(tmp, struct cifsFileInfo, rlist);
 947                if (cifs_reopen_file(open_file, false /* do not flush */))
 948                        tcon->need_reopen_files = true;
 949                list_del_init(&open_file->rlist);
 950                cifsFileInfo_put(open_file);
 951        }
 952}
 953
 954int cifs_closedir(struct inode *inode, struct file *file)
 955{
 956        int rc = 0;
 957        unsigned int xid;
 958        struct cifsFileInfo *cfile = file->private_data;
 959        struct cifs_tcon *tcon;
 960        struct TCP_Server_Info *server;
 961        char *buf;
 962
 963        cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 964
 965        if (cfile == NULL)
 966                return rc;
 967
 968        xid = get_xid();
 969        tcon = tlink_tcon(cfile->tlink);
 970        server = tcon->ses->server;
 971
 972        cifs_dbg(FYI, "Freeing private data in close dir\n");
 973        spin_lock(&cfile->file_info_lock);
 974        if (server->ops->dir_needs_close(cfile)) {
 975                cfile->invalidHandle = true;
 976                spin_unlock(&cfile->file_info_lock);
 977                if (server->ops->close_dir)
 978                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 979                else
 980                        rc = -ENOSYS;
 981                cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 982                /* not much we can do if it fails anyway, ignore rc */
 983                rc = 0;
 984        } else
 985                spin_unlock(&cfile->file_info_lock);
 986
 987        buf = cfile->srch_inf.ntwrk_buf_start;
 988        if (buf) {
 989                cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 990                cfile->srch_inf.ntwrk_buf_start = NULL;
 991                if (cfile->srch_inf.smallBuf)
 992                        cifs_small_buf_release(buf);
 993                else
 994                        cifs_buf_release(buf);
 995        }
 996
 997        cifs_put_tlink(cfile->tlink);
 998        kfree(file->private_data);
 999        file->private_data = NULL;
1000        /* BB can we lock the filestruct while this is going on? */
1001        free_xid(xid);
1002        return rc;
1003}
1004
1005static struct cifsLockInfo *
1006cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1007{
1008        struct cifsLockInfo *lock =
1009                kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1010        if (!lock)
1011                return lock;
1012        lock->offset = offset;
1013        lock->length = length;
1014        lock->type = type;
1015        lock->pid = current->tgid;
1016        lock->flags = flags;
1017        INIT_LIST_HEAD(&lock->blist);
1018        init_waitqueue_head(&lock->block_q);
1019        return lock;
1020}
1021
1022void
1023cifs_del_lock_waiters(struct cifsLockInfo *lock)
1024{
1025        struct cifsLockInfo *li, *tmp;
1026        list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1027                list_del_init(&li->blist);
1028                wake_up(&li->block_q);
1029        }
1030}
1031
1032#define CIFS_LOCK_OP    0
1033#define CIFS_READ_OP    1
1034#define CIFS_WRITE_OP   2
1035
1036/* @rw_check : 0 - no op, 1 - read, 2 - write */
1037static bool
1038cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1039                            __u64 length, __u8 type, __u16 flags,
1040                            struct cifsFileInfo *cfile,
1041                            struct cifsLockInfo **conf_lock, int rw_check)
1042{
1043        struct cifsLockInfo *li;
1044        struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1045        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1046
1047        list_for_each_entry(li, &fdlocks->locks, llist) {
1048                if (offset + length <= li->offset ||
1049                    offset >= li->offset + li->length)
1050                        continue;
1051                if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1052                    server->ops->compare_fids(cfile, cur_cfile)) {
1053                        /* shared lock prevents write op through the same fid */
1054                        if (!(li->type & server->vals->shared_lock_type) ||
1055                            rw_check != CIFS_WRITE_OP)
1056                                continue;
1057                }
1058                if ((type & server->vals->shared_lock_type) &&
1059                    ((server->ops->compare_fids(cfile, cur_cfile) &&
1060                     current->tgid == li->pid) || type == li->type))
1061                        continue;
1062                if (rw_check == CIFS_LOCK_OP &&
1063                    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1064                    server->ops->compare_fids(cfile, cur_cfile))
1065                        continue;
1066                if (conf_lock)
1067                        *conf_lock = li;
1068                return true;
1069        }
1070        return false;
1071}
1072
1073bool
1074cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1075                        __u8 type, __u16 flags,
1076                        struct cifsLockInfo **conf_lock, int rw_check)
1077{
1078        bool rc = false;
1079        struct cifs_fid_locks *cur;
1080        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1081
1082        list_for_each_entry(cur, &cinode->llist, llist) {
1083                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1084                                                 flags, cfile, conf_lock,
1085                                                 rw_check);
1086                if (rc)
1087                        break;
1088        }
1089
1090        return rc;
1091}
1092
1093/*
1094 * Check if there is another lock that prevents us to set the lock (mandatory
1095 * style). If such a lock exists, update the flock structure with its
1096 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1097 * or leave it the same if we can't. Returns 0 if we don't need to request to
1098 * the server or 1 otherwise.
1099 */
1100static int
1101cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1102               __u8 type, struct file_lock *flock)
1103{
1104        int rc = 0;
1105        struct cifsLockInfo *conf_lock;
1106        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1107        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1108        bool exist;
1109
1110        down_read(&cinode->lock_sem);
1111
1112        exist = cifs_find_lock_conflict(cfile, offset, length, type,
1113                                        flock->fl_flags, &conf_lock,
1114                                        CIFS_LOCK_OP);
1115        if (exist) {
1116                flock->fl_start = conf_lock->offset;
1117                flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1118                flock->fl_pid = conf_lock->pid;
1119                if (conf_lock->type & server->vals->shared_lock_type)
1120                        flock->fl_type = F_RDLCK;
1121                else
1122                        flock->fl_type = F_WRLCK;
1123        } else if (!cinode->can_cache_brlcks)
1124                rc = 1;
1125        else
1126                flock->fl_type = F_UNLCK;
1127
1128        up_read(&cinode->lock_sem);
1129        return rc;
1130}
1131
1132static void
1133cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1134{
1135        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1136        cifs_down_write(&cinode->lock_sem);
1137        list_add_tail(&lock->llist, &cfile->llist->locks);
1138        up_write(&cinode->lock_sem);
1139}
1140
1141/*
1142 * Set the byte-range lock (mandatory style). Returns:
1143 * 1) 0, if we set the lock and don't need to request to the server;
1144 * 2) 1, if no locks prevent us but we need to request to the server;
1145 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1146 */
1147static int
1148cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1149                 bool wait)
1150{
1151        struct cifsLockInfo *conf_lock;
1152        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1153        bool exist;
1154        int rc = 0;
1155
1156try_again:
1157        exist = false;
1158        cifs_down_write(&cinode->lock_sem);
1159
1160        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1161                                        lock->type, lock->flags, &conf_lock,
1162                                        CIFS_LOCK_OP);
1163        if (!exist && cinode->can_cache_brlcks) {
1164                list_add_tail(&lock->llist, &cfile->llist->locks);
1165                up_write(&cinode->lock_sem);
1166                return rc;
1167        }
1168
1169        if (!exist)
1170                rc = 1;
1171        else if (!wait)
1172                rc = -EACCES;
1173        else {
1174                list_add_tail(&lock->blist, &conf_lock->blist);
1175                up_write(&cinode->lock_sem);
1176                rc = wait_event_interruptible(lock->block_q,
1177                                        (lock->blist.prev == &lock->blist) &&
1178                                        (lock->blist.next == &lock->blist));
1179                if (!rc)
1180                        goto try_again;
1181                cifs_down_write(&cinode->lock_sem);
1182                list_del_init(&lock->blist);
1183        }
1184
1185        up_write(&cinode->lock_sem);
1186        return rc;
1187}
1188
1189/*
1190 * Check if there is another lock that prevents us to set the lock (posix
1191 * style). If such a lock exists, update the flock structure with its
1192 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1193 * or leave it the same if we can't. Returns 0 if we don't need to request to
1194 * the server or 1 otherwise.
1195 */
1196static int
1197cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1198{
1199        int rc = 0;
1200        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1201        unsigned char saved_type = flock->fl_type;
1202
1203        if ((flock->fl_flags & FL_POSIX) == 0)
1204                return 1;
1205
1206        down_read(&cinode->lock_sem);
1207        posix_test_lock(file, flock);
1208
1209        if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1210                flock->fl_type = saved_type;
1211                rc = 1;
1212        }
1213
1214        up_read(&cinode->lock_sem);
1215        return rc;
1216}
1217
1218/*
1219 * Set the byte-range lock (posix style). Returns:
1220 * 1) <0, if the error occurs while setting the lock;
1221 * 2) 0, if we set the lock and don't need to request to the server;
1222 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1223 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1224 */
1225static int
1226cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1227{
1228        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1229        int rc = FILE_LOCK_DEFERRED + 1;
1230
1231        if ((flock->fl_flags & FL_POSIX) == 0)
1232                return rc;
1233
1234        cifs_down_write(&cinode->lock_sem);
1235        if (!cinode->can_cache_brlcks) {
1236                up_write(&cinode->lock_sem);
1237                return rc;
1238        }
1239
1240        rc = posix_lock_file(file, flock, NULL);
1241        up_write(&cinode->lock_sem);
1242        return rc;
1243}
1244
1245int
1246cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1247{
1248        unsigned int xid;
1249        int rc = 0, stored_rc;
1250        struct cifsLockInfo *li, *tmp;
1251        struct cifs_tcon *tcon;
1252        unsigned int num, max_num, max_buf;
1253        LOCKING_ANDX_RANGE *buf, *cur;
1254        static const int types[] = {
1255                LOCKING_ANDX_LARGE_FILES,
1256                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1257        };
1258        int i;
1259
1260        xid = get_xid();
1261        tcon = tlink_tcon(cfile->tlink);
1262
1263        /*
1264         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1265         * and check it before using.
1266         */
1267        max_buf = tcon->ses->server->maxBuf;
1268        if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1269                free_xid(xid);
1270                return -EINVAL;
1271        }
1272
1273        BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1274                     PAGE_SIZE);
1275        max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1276                        PAGE_SIZE);
1277        max_num = (max_buf - sizeof(struct smb_hdr)) /
1278                                                sizeof(LOCKING_ANDX_RANGE);
1279        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1280        if (!buf) {
1281                free_xid(xid);
1282                return -ENOMEM;
1283        }
1284
1285        for (i = 0; i < 2; i++) {
1286                cur = buf;
1287                num = 0;
1288                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1289                        if (li->type != types[i])
1290                                continue;
1291                        cur->Pid = cpu_to_le16(li->pid);
1292                        cur->LengthLow = cpu_to_le32((u32)li->length);
1293                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1294                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1295                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1296                        if (++num == max_num) {
1297                                stored_rc = cifs_lockv(xid, tcon,
1298                                                       cfile->fid.netfid,
1299                                                       (__u8)li->type, 0, num,
1300                                                       buf);
1301                                if (stored_rc)
1302                                        rc = stored_rc;
1303                                cur = buf;
1304                                num = 0;
1305                        } else
1306                                cur++;
1307                }
1308
1309                if (num) {
1310                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1311                                               (__u8)types[i], 0, num, buf);
1312                        if (stored_rc)
1313                                rc = stored_rc;
1314                }
1315        }
1316
1317        kfree(buf);
1318        free_xid(xid);
1319        return rc;
1320}
1321
1322static __u32
1323hash_lockowner(fl_owner_t owner)
1324{
1325        return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1326}
1327
1328struct lock_to_push {
1329        struct list_head llist;
1330        __u64 offset;
1331        __u64 length;
1332        __u32 pid;
1333        __u16 netfid;
1334        __u8 type;
1335};
1336
1337static int
1338cifs_push_posix_locks(struct cifsFileInfo *cfile)
1339{
1340        struct inode *inode = d_inode(cfile->dentry);
1341        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1342        struct file_lock *flock;
1343        struct file_lock_context *flctx = inode->i_flctx;
1344        unsigned int count = 0, i;
1345        int rc = 0, xid, type;
1346        struct list_head locks_to_send, *el;
1347        struct lock_to_push *lck, *tmp;
1348        __u64 length;
1349
1350        xid = get_xid();
1351
1352        if (!flctx)
1353                goto out;
1354
1355        spin_lock(&flctx->flc_lock);
1356        list_for_each(el, &flctx->flc_posix) {
1357                count++;
1358        }
1359        spin_unlock(&flctx->flc_lock);
1360
1361        INIT_LIST_HEAD(&locks_to_send);
1362
1363        /*
1364         * Allocating count locks is enough because no FL_POSIX locks can be
1365         * added to the list while we are holding cinode->lock_sem that
1366         * protects locking operations of this inode.
1367         */
1368        for (i = 0; i < count; i++) {
1369                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1370                if (!lck) {
1371                        rc = -ENOMEM;
1372                        goto err_out;
1373                }
1374                list_add_tail(&lck->llist, &locks_to_send);
1375        }
1376
1377        el = locks_to_send.next;
1378        spin_lock(&flctx->flc_lock);
1379        list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1380                if (el == &locks_to_send) {
1381                        /*
1382                         * The list ended. We don't have enough allocated
1383                         * structures - something is really wrong.
1384                         */
1385                        cifs_dbg(VFS, "Can't push all brlocks!\n");
1386                        break;
1387                }
1388                length = 1 + flock->fl_end - flock->fl_start;
1389                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1390                        type = CIFS_RDLCK;
1391                else
1392                        type = CIFS_WRLCK;
1393                lck = list_entry(el, struct lock_to_push, llist);
1394                lck->pid = hash_lockowner(flock->fl_owner);
1395                lck->netfid = cfile->fid.netfid;
1396                lck->length = length;
1397                lck->type = type;
1398                lck->offset = flock->fl_start;
1399        }
1400        spin_unlock(&flctx->flc_lock);
1401
1402        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1403                int stored_rc;
1404
1405                stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1406                                             lck->offset, lck->length, NULL,
1407                                             lck->type, 0);
1408                if (stored_rc)
1409                        rc = stored_rc;
1410                list_del(&lck->llist);
1411                kfree(lck);
1412        }
1413
1414out:
1415        free_xid(xid);
1416        return rc;
1417err_out:
1418        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1419                list_del(&lck->llist);
1420                kfree(lck);
1421        }
1422        goto out;
1423}
1424
1425static int
1426cifs_push_locks(struct cifsFileInfo *cfile)
1427{
1428        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1429        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1430        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1431        int rc = 0;
1432
1433        /* we are going to update can_cache_brlcks here - need a write access */
1434        cifs_down_write(&cinode->lock_sem);
1435        if (!cinode->can_cache_brlcks) {
1436                up_write(&cinode->lock_sem);
1437                return rc;
1438        }
1439
1440        if (cap_unix(tcon->ses) &&
1441            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1442            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1443                rc = cifs_push_posix_locks(cfile);
1444        else
1445                rc = tcon->ses->server->ops->push_mand_locks(cfile);
1446
1447        cinode->can_cache_brlcks = false;
1448        up_write(&cinode->lock_sem);
1449        return rc;
1450}
1451
1452static void
1453cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1454                bool *wait_flag, struct TCP_Server_Info *server)
1455{
1456        if (flock->fl_flags & FL_POSIX)
1457                cifs_dbg(FYI, "Posix\n");
1458        if (flock->fl_flags & FL_FLOCK)
1459                cifs_dbg(FYI, "Flock\n");
1460        if (flock->fl_flags & FL_SLEEP) {
1461                cifs_dbg(FYI, "Blocking lock\n");
1462                *wait_flag = true;
1463        }
1464        if (flock->fl_flags & FL_ACCESS)
1465                cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1466        if (flock->fl_flags & FL_LEASE)
1467                cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1468        if (flock->fl_flags &
1469            (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1470               FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1471                cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1472
1473        *type = server->vals->large_lock_type;
1474        if (flock->fl_type == F_WRLCK) {
1475                cifs_dbg(FYI, "F_WRLCK\n");
1476                *type |= server->vals->exclusive_lock_type;
1477                *lock = 1;
1478        } else if (flock->fl_type == F_UNLCK) {
1479                cifs_dbg(FYI, "F_UNLCK\n");
1480                *type |= server->vals->unlock_lock_type;
1481                *unlock = 1;
1482                /* Check if unlock includes more than one lock range */
1483        } else if (flock->fl_type == F_RDLCK) {
1484                cifs_dbg(FYI, "F_RDLCK\n");
1485                *type |= server->vals->shared_lock_type;
1486                *lock = 1;
1487        } else if (flock->fl_type == F_EXLCK) {
1488                cifs_dbg(FYI, "F_EXLCK\n");
1489                *type |= server->vals->exclusive_lock_type;
1490                *lock = 1;
1491        } else if (flock->fl_type == F_SHLCK) {
1492                cifs_dbg(FYI, "F_SHLCK\n");
1493                *type |= server->vals->shared_lock_type;
1494                *lock = 1;
1495        } else
1496                cifs_dbg(FYI, "Unknown type of lock\n");
1497}
1498
1499static int
1500cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1501           bool wait_flag, bool posix_lck, unsigned int xid)
1502{
1503        int rc = 0;
1504        __u64 length = 1 + flock->fl_end - flock->fl_start;
1505        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1506        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1507        struct TCP_Server_Info *server = tcon->ses->server;
1508        __u16 netfid = cfile->fid.netfid;
1509
1510        if (posix_lck) {
1511                int posix_lock_type;
1512
1513                rc = cifs_posix_lock_test(file, flock);
1514                if (!rc)
1515                        return rc;
1516
1517                if (type & server->vals->shared_lock_type)
1518                        posix_lock_type = CIFS_RDLCK;
1519                else
1520                        posix_lock_type = CIFS_WRLCK;
1521                rc = CIFSSMBPosixLock(xid, tcon, netfid,
1522                                      hash_lockowner(flock->fl_owner),
1523                                      flock->fl_start, length, flock,
1524                                      posix_lock_type, wait_flag);
1525                return rc;
1526        }
1527
1528        rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1529        if (!rc)
1530                return rc;
1531
1532        /* BB we could chain these into one lock request BB */
1533        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1534                                    1, 0, false);
1535        if (rc == 0) {
1536                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1537                                            type, 0, 1, false);
1538                flock->fl_type = F_UNLCK;
1539                if (rc != 0)
1540                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1541                                 rc);
1542                return 0;
1543        }
1544
1545        if (type & server->vals->shared_lock_type) {
1546                flock->fl_type = F_WRLCK;
1547                return 0;
1548        }
1549
1550        type &= ~server->vals->exclusive_lock_type;
1551
1552        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1553                                    type | server->vals->shared_lock_type,
1554                                    1, 0, false);
1555        if (rc == 0) {
1556                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1557                        type | server->vals->shared_lock_type, 0, 1, false);
1558                flock->fl_type = F_RDLCK;
1559                if (rc != 0)
1560                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1561                                 rc);
1562        } else
1563                flock->fl_type = F_WRLCK;
1564
1565        return 0;
1566}
1567
1568void
1569cifs_move_llist(struct list_head *source, struct list_head *dest)
1570{
1571        struct list_head *li, *tmp;
1572        list_for_each_safe(li, tmp, source)
1573                list_move(li, dest);
1574}
1575
1576void
1577cifs_free_llist(struct list_head *llist)
1578{
1579        struct cifsLockInfo *li, *tmp;
1580        list_for_each_entry_safe(li, tmp, llist, llist) {
1581                cifs_del_lock_waiters(li);
1582                list_del(&li->llist);
1583                kfree(li);
1584        }
1585}
1586
1587int
1588cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1589                  unsigned int xid)
1590{
1591        int rc = 0, stored_rc;
1592        static const int types[] = {
1593                LOCKING_ANDX_LARGE_FILES,
1594                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1595        };
1596        unsigned int i;
1597        unsigned int max_num, num, max_buf;
1598        LOCKING_ANDX_RANGE *buf, *cur;
1599        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1600        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1601        struct cifsLockInfo *li, *tmp;
1602        __u64 length = 1 + flock->fl_end - flock->fl_start;
1603        struct list_head tmp_llist;
1604
1605        INIT_LIST_HEAD(&tmp_llist);
1606
1607        /*
1608         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1609         * and check it before using.
1610         */
1611        max_buf = tcon->ses->server->maxBuf;
1612        if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1613                return -EINVAL;
1614
1615        BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1616                     PAGE_SIZE);
1617        max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1618                        PAGE_SIZE);
1619        max_num = (max_buf - sizeof(struct smb_hdr)) /
1620                                                sizeof(LOCKING_ANDX_RANGE);
1621        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1622        if (!buf)
1623                return -ENOMEM;
1624
1625        cifs_down_write(&cinode->lock_sem);
1626        for (i = 0; i < 2; i++) {
1627                cur = buf;
1628                num = 0;
1629                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1630                        if (flock->fl_start > li->offset ||
1631                            (flock->fl_start + length) <
1632                            (li->offset + li->length))
1633                                continue;
1634                        if (current->tgid != li->pid)
1635                                continue;
1636                        if (types[i] != li->type)
1637                                continue;
1638                        if (cinode->can_cache_brlcks) {
1639                                /*
1640                                 * We can cache brlock requests - simply remove
1641                                 * a lock from the file's list.
1642                                 */
1643                                list_del(&li->llist);
1644                                cifs_del_lock_waiters(li);
1645                                kfree(li);
1646                                continue;
1647                        }
1648                        cur->Pid = cpu_to_le16(li->pid);
1649                        cur->LengthLow = cpu_to_le32((u32)li->length);
1650                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1651                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1652                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1653                        /*
1654                         * We need to save a lock here to let us add it again to
1655                         * the file's list if the unlock range request fails on
1656                         * the server.
1657                         */
1658                        list_move(&li->llist, &tmp_llist);
1659                        if (++num == max_num) {
1660                                stored_rc = cifs_lockv(xid, tcon,
1661                                                       cfile->fid.netfid,
1662                                                       li->type, num, 0, buf);
1663                                if (stored_rc) {
1664                                        /*
1665                                         * We failed on the unlock range
1666                                         * request - add all locks from the tmp
1667                                         * list to the head of the file's list.
1668                                         */
1669                                        cifs_move_llist(&tmp_llist,
1670                                                        &cfile->llist->locks);
1671                                        rc = stored_rc;
1672                                } else
1673                                        /*
1674                                         * The unlock range request succeed -
1675                                         * free the tmp list.
1676                                         */
1677                                        cifs_free_llist(&tmp_llist);
1678                                cur = buf;
1679                                num = 0;
1680                        } else
1681                                cur++;
1682                }
1683                if (num) {
1684                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1685                                               types[i], num, 0, buf);
1686                        if (stored_rc) {
1687                                cifs_move_llist(&tmp_llist,
1688                                                &cfile->llist->locks);
1689                                rc = stored_rc;
1690                        } else
1691                                cifs_free_llist(&tmp_llist);
1692                }
1693        }
1694
1695        up_write(&cinode->lock_sem);
1696        kfree(buf);
1697        return rc;
1698}
1699
1700static int
1701cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1702           bool wait_flag, bool posix_lck, int lock, int unlock,
1703           unsigned int xid)
1704{
1705        int rc = 0;
1706        __u64 length = 1 + flock->fl_end - flock->fl_start;
1707        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1708        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1709        struct TCP_Server_Info *server = tcon->ses->server;
1710        struct inode *inode = d_inode(cfile->dentry);
1711
1712        if (posix_lck) {
1713                int posix_lock_type;
1714
1715                rc = cifs_posix_lock_set(file, flock);
1716                if (rc <= FILE_LOCK_DEFERRED)
1717                        return rc;
1718
1719                if (type & server->vals->shared_lock_type)
1720                        posix_lock_type = CIFS_RDLCK;
1721                else
1722                        posix_lock_type = CIFS_WRLCK;
1723
1724                if (unlock == 1)
1725                        posix_lock_type = CIFS_UNLCK;
1726
1727                rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1728                                      hash_lockowner(flock->fl_owner),
1729                                      flock->fl_start, length,
1730                                      NULL, posix_lock_type, wait_flag);
1731                goto out;
1732        }
1733
1734        if (lock) {
1735                struct cifsLockInfo *lock;
1736
1737                lock = cifs_lock_init(flock->fl_start, length, type,
1738                                      flock->fl_flags);
1739                if (!lock)
1740                        return -ENOMEM;
1741
1742                rc = cifs_lock_add_if(cfile, lock, wait_flag);
1743                if (rc < 0) {
1744                        kfree(lock);
1745                        return rc;
1746                }
1747                if (!rc)
1748                        goto out;
1749
1750                /*
1751                 * Windows 7 server can delay breaking lease from read to None
1752                 * if we set a byte-range lock on a file - break it explicitly
1753                 * before sending the lock to the server to be sure the next
1754                 * read won't conflict with non-overlapted locks due to
1755                 * pagereading.
1756                 */
1757                if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1758                                        CIFS_CACHE_READ(CIFS_I(inode))) {
1759                        cifs_zap_mapping(inode);
1760                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1761                                 inode);
1762                        CIFS_I(inode)->oplock = 0;
1763                }
1764
1765                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1766                                            type, 1, 0, wait_flag);
1767                if (rc) {
1768                        kfree(lock);
1769                        return rc;
1770                }
1771
1772                cifs_lock_add(cfile, lock);
1773        } else if (unlock)
1774                rc = server->ops->mand_unlock_range(cfile, flock, xid);
1775
1776out:
1777        if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1778                /*
1779                 * If this is a request to remove all locks because we
1780                 * are closing the file, it doesn't matter if the
1781                 * unlocking failed as both cifs.ko and the SMB server
1782                 * remove the lock on file close
1783                 */
1784                if (rc) {
1785                        cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1786                        if (!(flock->fl_flags & FL_CLOSE))
1787                                return rc;
1788                }
1789                rc = locks_lock_file_wait(file, flock);
1790        }
1791        return rc;
1792}
1793
1794int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1795{
1796        int rc, xid;
1797        int lock = 0, unlock = 0;
1798        bool wait_flag = false;
1799        bool posix_lck = false;
1800        struct cifs_sb_info *cifs_sb;
1801        struct cifs_tcon *tcon;
1802        struct cifsFileInfo *cfile;
1803        __u32 type;
1804
1805        rc = -EACCES;
1806        xid = get_xid();
1807
1808        if (!(fl->fl_flags & FL_FLOCK))
1809                return -ENOLCK;
1810
1811        cfile = (struct cifsFileInfo *)file->private_data;
1812        tcon = tlink_tcon(cfile->tlink);
1813
1814        cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1815                        tcon->ses->server);
1816        cifs_sb = CIFS_FILE_SB(file);
1817
1818        if (cap_unix(tcon->ses) &&
1819            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1820            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1821                posix_lck = true;
1822
1823        if (!lock && !unlock) {
1824                /*
1825                 * if no lock or unlock then nothing to do since we do not
1826                 * know what it is
1827                 */
1828                free_xid(xid);
1829                return -EOPNOTSUPP;
1830        }
1831
1832        rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1833                        xid);
1834        free_xid(xid);
1835        return rc;
1836
1837
1838}
1839
1840int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1841{
1842        int rc, xid;
1843        int lock = 0, unlock = 0;
1844        bool wait_flag = false;
1845        bool posix_lck = false;
1846        struct cifs_sb_info *cifs_sb;
1847        struct cifs_tcon *tcon;
1848        struct cifsFileInfo *cfile;
1849        __u32 type;
1850
1851        rc = -EACCES;
1852        xid = get_xid();
1853
1854        cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1855                 cmd, flock->fl_flags, flock->fl_type,
1856                 flock->fl_start, flock->fl_end);
1857
1858        cfile = (struct cifsFileInfo *)file->private_data;
1859        tcon = tlink_tcon(cfile->tlink);
1860
1861        cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1862                        tcon->ses->server);
1863        cifs_sb = CIFS_FILE_SB(file);
1864
1865        if (cap_unix(tcon->ses) &&
1866            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1867            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1868                posix_lck = true;
1869        /*
1870         * BB add code here to normalize offset and length to account for
1871         * negative length which we can not accept over the wire.
1872         */
1873        if (IS_GETLK(cmd)) {
1874                rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1875                free_xid(xid);
1876                return rc;
1877        }
1878
1879        if (!lock && !unlock) {
1880                /*
1881                 * if no lock or unlock then nothing to do since we do not
1882                 * know what it is
1883                 */
1884                free_xid(xid);
1885                return -EOPNOTSUPP;
1886        }
1887
1888        rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1889                        xid);
1890        free_xid(xid);
1891        return rc;
1892}
1893
1894/*
1895 * update the file size (if needed) after a write. Should be called with
1896 * the inode->i_lock held
1897 */
1898void
1899cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1900                      unsigned int bytes_written)
1901{
1902        loff_t end_of_write = offset + bytes_written;
1903
1904        if (end_of_write > cifsi->server_eof)
1905                cifsi->server_eof = end_of_write;
1906}
1907
1908static ssize_t
1909cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1910           size_t write_size, loff_t *offset)
1911{
1912        int rc = 0;
1913        unsigned int bytes_written = 0;
1914        unsigned int total_written;
1915        struct cifs_tcon *tcon;
1916        struct TCP_Server_Info *server;
1917        unsigned int xid;
1918        struct dentry *dentry = open_file->dentry;
1919        struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1920        struct cifs_io_parms io_parms = {0};
1921
1922        cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1923                 write_size, *offset, dentry);
1924
1925        tcon = tlink_tcon(open_file->tlink);
1926        server = tcon->ses->server;
1927
1928        if (!server->ops->sync_write)
1929                return -ENOSYS;
1930
1931        xid = get_xid();
1932
1933        for (total_written = 0; write_size > total_written;
1934             total_written += bytes_written) {
1935                rc = -EAGAIN;
1936                while (rc == -EAGAIN) {
1937                        struct kvec iov[2];
1938                        unsigned int len;
1939
1940                        if (open_file->invalidHandle) {
1941                                /* we could deadlock if we called
1942                                   filemap_fdatawait from here so tell
1943                                   reopen_file not to flush data to
1944                                   server now */
1945                                rc = cifs_reopen_file(open_file, false);
1946                                if (rc != 0)
1947                                        break;
1948                        }
1949
1950                        len = min(server->ops->wp_retry_size(d_inode(dentry)),
1951                                  (unsigned int)write_size - total_written);
1952                        /* iov[0] is reserved for smb header */
1953                        iov[1].iov_base = (char *)write_data + total_written;
1954                        iov[1].iov_len = len;
1955                        io_parms.pid = pid;
1956                        io_parms.tcon = tcon;
1957                        io_parms.offset = *offset;
1958                        io_parms.length = len;
1959                        rc = server->ops->sync_write(xid, &open_file->fid,
1960                                        &io_parms, &bytes_written, iov, 1);
1961                }
1962                if (rc || (bytes_written == 0)) {
1963                        if (total_written)
1964                                break;
1965                        else {
1966                                free_xid(xid);
1967                                return rc;
1968                        }
1969                } else {
1970                        spin_lock(&d_inode(dentry)->i_lock);
1971                        cifs_update_eof(cifsi, *offset, bytes_written);
1972                        spin_unlock(&d_inode(dentry)->i_lock);
1973                        *offset += bytes_written;
1974                }
1975        }
1976
1977        cifs_stats_bytes_written(tcon, total_written);
1978
1979        if (total_written > 0) {
1980                spin_lock(&d_inode(dentry)->i_lock);
1981                if (*offset > d_inode(dentry)->i_size) {
1982                        i_size_write(d_inode(dentry), *offset);
1983                        d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1984                }
1985                spin_unlock(&d_inode(dentry)->i_lock);
1986        }
1987        mark_inode_dirty_sync(d_inode(dentry));
1988        free_xid(xid);
1989        return total_written;
1990}
1991
1992struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1993                                        bool fsuid_only)
1994{
1995        struct cifsFileInfo *open_file = NULL;
1996        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1997
1998        /* only filter by fsuid on multiuser mounts */
1999        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2000                fsuid_only = false;
2001
2002        spin_lock(&cifs_inode->open_file_lock);
2003        /* we could simply get the first_list_entry since write-only entries
2004           are always at the end of the list but since the first entry might
2005           have a close pending, we go through the whole list */
2006        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2007                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2008                        continue;
2009                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2010                        if ((!open_file->invalidHandle)) {
2011                                /* found a good file */
2012                                /* lock it so it will not be closed on us */
2013                                cifsFileInfo_get(open_file);
2014                                spin_unlock(&cifs_inode->open_file_lock);
2015                                return open_file;
2016                        } /* else might as well continue, and look for
2017                             another, or simply have the caller reopen it
2018                             again rather than trying to fix this handle */
2019                } else /* write only file */
2020                        break; /* write only files are last so must be done */
2021        }
2022        spin_unlock(&cifs_inode->open_file_lock);
2023        return NULL;
2024}
2025
2026/* Return -EBADF if no handle is found and general rc otherwise */
2027int
2028cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2029                       struct cifsFileInfo **ret_file)
2030{
2031        struct cifsFileInfo *open_file, *inv_file = NULL;
2032        struct cifs_sb_info *cifs_sb;
2033        bool any_available = false;
2034        int rc = -EBADF;
2035        unsigned int refind = 0;
2036        bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2037        bool with_delete = flags & FIND_WR_WITH_DELETE;
2038        *ret_file = NULL;
2039
2040        /*
2041         * Having a null inode here (because mapping->host was set to zero by
2042         * the VFS or MM) should not happen but we had reports of on oops (due
2043         * to it being zero) during stress testcases so we need to check for it
2044         */
2045
2046        if (cifs_inode == NULL) {
2047                cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2048                dump_stack();
2049                return rc;
2050        }
2051
2052        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2053
2054        /* only filter by fsuid on multiuser mounts */
2055        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2056                fsuid_only = false;
2057
2058        spin_lock(&cifs_inode->open_file_lock);
2059refind_writable:
2060        if (refind > MAX_REOPEN_ATT) {
2061                spin_unlock(&cifs_inode->open_file_lock);
2062                return rc;
2063        }
2064        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2065                if (!any_available && open_file->pid != current->tgid)
2066                        continue;
2067                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2068                        continue;
2069                if (with_delete && !(open_file->fid.access & DELETE))
2070                        continue;
2071                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2072                        if (!open_file->invalidHandle) {
2073                                /* found a good writable file */
2074                                cifsFileInfo_get(open_file);
2075                                spin_unlock(&cifs_inode->open_file_lock);
2076                                *ret_file = open_file;
2077                                return 0;
2078                        } else {
2079                                if (!inv_file)
2080                                        inv_file = open_file;
2081                        }
2082                }
2083        }
2084        /* couldn't find useable FH with same pid, try any available */
2085        if (!any_available) {
2086                any_available = true;
2087                goto refind_writable;
2088        }
2089
2090        if (inv_file) {
2091                any_available = false;
2092                cifsFileInfo_get(inv_file);
2093        }
2094
2095        spin_unlock(&cifs_inode->open_file_lock);
2096
2097        if (inv_file) {
2098                rc = cifs_reopen_file(inv_file, false);
2099                if (!rc) {
2100                        *ret_file = inv_file;
2101                        return 0;
2102                }
2103
2104                spin_lock(&cifs_inode->open_file_lock);
2105                list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2106                spin_unlock(&cifs_inode->open_file_lock);
2107                cifsFileInfo_put(inv_file);
2108                ++refind;
2109                inv_file = NULL;
2110                spin_lock(&cifs_inode->open_file_lock);
2111                goto refind_writable;
2112        }
2113
2114        return rc;
2115}
2116
2117struct cifsFileInfo *
2118find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2119{
2120        struct cifsFileInfo *cfile;
2121        int rc;
2122
2123        rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2124        if (rc)
2125                cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2126
2127        return cfile;
2128}
2129
2130int
2131cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2132                       int flags,
2133                       struct cifsFileInfo **ret_file)
2134{
2135        struct cifsFileInfo *cfile;
2136        void *page = alloc_dentry_path();
2137
2138        *ret_file = NULL;
2139
2140        spin_lock(&tcon->open_file_lock);
2141        list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2142                struct cifsInodeInfo *cinode;
2143                const char *full_path = build_path_from_dentry(cfile->dentry, page);
2144                if (IS_ERR(full_path)) {
2145                        spin_unlock(&tcon->open_file_lock);
2146                        free_dentry_path(page);
2147                        return PTR_ERR(full_path);
2148                }
2149                if (strcmp(full_path, name))
2150                        continue;
2151
2152                cinode = CIFS_I(d_inode(cfile->dentry));
2153                spin_unlock(&tcon->open_file_lock);
2154                free_dentry_path(page);
2155                return cifs_get_writable_file(cinode, flags, ret_file);
2156        }
2157
2158        spin_unlock(&tcon->open_file_lock);
2159        free_dentry_path(page);
2160        return -ENOENT;
2161}
2162
2163int
2164cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2165                       struct cifsFileInfo **ret_file)
2166{
2167        struct cifsFileInfo *cfile;
2168        void *page = alloc_dentry_path();
2169
2170        *ret_file = NULL;
2171
2172        spin_lock(&tcon->open_file_lock);
2173        list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2174                struct cifsInodeInfo *cinode;
2175                const char *full_path = build_path_from_dentry(cfile->dentry, page);
2176                if (IS_ERR(full_path)) {
2177                        spin_unlock(&tcon->open_file_lock);
2178                        free_dentry_path(page);
2179                        return PTR_ERR(full_path);
2180                }
2181                if (strcmp(full_path, name))
2182                        continue;
2183
2184                cinode = CIFS_I(d_inode(cfile->dentry));
2185                spin_unlock(&tcon->open_file_lock);
2186                free_dentry_path(page);
2187                *ret_file = find_readable_file(cinode, 0);
2188                return *ret_file ? 0 : -ENOENT;
2189        }
2190
2191        spin_unlock(&tcon->open_file_lock);
2192        free_dentry_path(page);
2193        return -ENOENT;
2194}
2195
2196static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2197{
2198        struct address_space *mapping = page->mapping;
2199        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2200        char *write_data;
2201        int rc = -EFAULT;
2202        int bytes_written = 0;
2203        struct inode *inode;
2204        struct cifsFileInfo *open_file;
2205
2206        if (!mapping || !mapping->host)
2207                return -EFAULT;
2208
2209        inode = page->mapping->host;
2210
2211        offset += (loff_t)from;
2212        write_data = kmap(page);
2213        write_data += from;
2214
2215        if ((to > PAGE_SIZE) || (from > to)) {
2216                kunmap(page);
2217                return -EIO;
2218        }
2219
2220        /* racing with truncate? */
2221        if (offset > mapping->host->i_size) {
2222                kunmap(page);
2223                return 0; /* don't care */
2224        }
2225
2226        /* check to make sure that we are not extending the file */
2227        if (mapping->host->i_size - offset < (loff_t)to)
2228                to = (unsigned)(mapping->host->i_size - offset);
2229
2230        rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2231                                    &open_file);
2232        if (!rc) {
2233                bytes_written = cifs_write(open_file, open_file->pid,
2234                                           write_data, to - from, &offset);
2235                cifsFileInfo_put(open_file);
2236                /* Does mm or vfs already set times? */
2237                inode->i_atime = inode->i_mtime = current_time(inode);
2238                if ((bytes_written > 0) && (offset))
2239                        rc = 0;
2240                else if (bytes_written < 0)
2241                        rc = bytes_written;
2242                else
2243                        rc = -EFAULT;
2244        } else {
2245                cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2246                if (!is_retryable_error(rc))
2247                        rc = -EIO;
2248        }
2249
2250        kunmap(page);
2251        return rc;
2252}
2253
2254static struct cifs_writedata *
2255wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2256                          pgoff_t end, pgoff_t *index,
2257                          unsigned int *found_pages)
2258{
2259        struct cifs_writedata *wdata;
2260
2261        wdata = cifs_writedata_alloc((unsigned int)tofind,
2262                                     cifs_writev_complete);
2263        if (!wdata)
2264                return NULL;
2265
2266        *found_pages = find_get_pages_range_tag(mapping, index, end,
2267                                PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2268        return wdata;
2269}
2270
2271static unsigned int
2272wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2273                    struct address_space *mapping,
2274                    struct writeback_control *wbc,
2275                    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2276{
2277        unsigned int nr_pages = 0, i;
2278        struct page *page;
2279
2280        for (i = 0; i < found_pages; i++) {
2281                page = wdata->pages[i];
2282                /*
2283                 * At this point we hold neither the i_pages lock nor the
2284                 * page lock: the page may be truncated or invalidated
2285                 * (changing page->mapping to NULL), or even swizzled
2286                 * back from swapper_space to tmpfs file mapping
2287                 */
2288
2289                if (nr_pages == 0)
2290                        lock_page(page);
2291                else if (!trylock_page(page))
2292                        break;
2293
2294                if (unlikely(page->mapping != mapping)) {
2295                        unlock_page(page);
2296                        break;
2297                }
2298
2299                if (!wbc->range_cyclic && page->index > end) {
2300                        *done = true;
2301                        unlock_page(page);
2302                        break;
2303                }
2304
2305                if (*next && (page->index != *next)) {
2306                        /* Not next consecutive page */
2307                        unlock_page(page);
2308                        break;
2309                }
2310
2311                if (wbc->sync_mode != WB_SYNC_NONE)
2312                        wait_on_page_writeback(page);
2313
2314                if (PageWriteback(page) ||
2315                                !clear_page_dirty_for_io(page)) {
2316                        unlock_page(page);
2317                        break;
2318                }
2319
2320                /*
2321                 * This actually clears the dirty bit in the radix tree.
2322                 * See cifs_writepage() for more commentary.
2323                 */
2324                set_page_writeback(page);
2325                if (page_offset(page) >= i_size_read(mapping->host)) {
2326                        *done = true;
2327                        unlock_page(page);
2328                        end_page_writeback(page);
2329                        break;
2330                }
2331
2332                wdata->pages[i] = page;
2333                *next = page->index + 1;
2334                ++nr_pages;
2335        }
2336
2337        /* reset index to refind any pages skipped */
2338        if (nr_pages == 0)
2339                *index = wdata->pages[0]->index + 1;
2340
2341        /* put any pages we aren't going to use */
2342        for (i = nr_pages; i < found_pages; i++) {
2343                put_page(wdata->pages[i]);
2344                wdata->pages[i] = NULL;
2345        }
2346
2347        return nr_pages;
2348}
2349
2350static int
2351wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2352                 struct address_space *mapping, struct writeback_control *wbc)
2353{
2354        int rc;
2355
2356        wdata->sync_mode = wbc->sync_mode;
2357        wdata->nr_pages = nr_pages;
2358        wdata->offset = page_offset(wdata->pages[0]);
2359        wdata->pagesz = PAGE_SIZE;
2360        wdata->tailsz = min(i_size_read(mapping->host) -
2361                        page_offset(wdata->pages[nr_pages - 1]),
2362                        (loff_t)PAGE_SIZE);
2363        wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2364        wdata->pid = wdata->cfile->pid;
2365
2366        rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2367        if (rc)
2368                return rc;
2369
2370        if (wdata->cfile->invalidHandle)
2371                rc = -EAGAIN;
2372        else
2373                rc = wdata->server->ops->async_writev(wdata,
2374                                                      cifs_writedata_release);
2375
2376        return rc;
2377}
2378
2379static int cifs_writepages(struct address_space *mapping,
2380                           struct writeback_control *wbc)
2381{
2382        struct inode *inode = mapping->host;
2383        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2384        struct TCP_Server_Info *server;
2385        bool done = false, scanned = false, range_whole = false;
2386        pgoff_t end, index;
2387        struct cifs_writedata *wdata;
2388        struct cifsFileInfo *cfile = NULL;
2389        int rc = 0;
2390        int saved_rc = 0;
2391        unsigned int xid;
2392
2393        /*
2394         * If wsize is smaller than the page cache size, default to writing
2395         * one page at a time via cifs_writepage
2396         */
2397        if (cifs_sb->ctx->wsize < PAGE_SIZE)
2398                return generic_writepages(mapping, wbc);
2399
2400        xid = get_xid();
2401        if (wbc->range_cyclic) {
2402                index = mapping->writeback_index; /* Start from prev offset */
2403                end = -1;
2404        } else {
2405                index = wbc->range_start >> PAGE_SHIFT;
2406                end = wbc->range_end >> PAGE_SHIFT;
2407                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2408                        range_whole = true;
2409                scanned = true;
2410        }
2411        server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2412
2413retry:
2414        while (!done && index <= end) {
2415                unsigned int i, nr_pages, found_pages, wsize;
2416                pgoff_t next = 0, tofind, saved_index = index;
2417                struct cifs_credits credits_on_stack;
2418                struct cifs_credits *credits = &credits_on_stack;
2419                int get_file_rc = 0;
2420
2421                if (cfile)
2422                        cifsFileInfo_put(cfile);
2423
2424                rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2425
2426                /* in case of an error store it to return later */
2427                if (rc)
2428                        get_file_rc = rc;
2429
2430                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2431                                                   &wsize, credits);
2432                if (rc != 0) {
2433                        done = true;
2434                        break;
2435                }
2436
2437                tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2438
2439                wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2440                                                  &found_pages);
2441                if (!wdata) {
2442                        rc = -ENOMEM;
2443                        done = true;
2444                        add_credits_and_wake_if(server, credits, 0);
2445                        break;
2446                }
2447
2448                if (found_pages == 0) {
2449                        kref_put(&wdata->refcount, cifs_writedata_release);
2450                        add_credits_and_wake_if(server, credits, 0);
2451                        break;
2452                }
2453
2454                nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2455                                               end, &index, &next, &done);
2456
2457                /* nothing to write? */
2458                if (nr_pages == 0) {
2459                        kref_put(&wdata->refcount, cifs_writedata_release);
2460                        add_credits_and_wake_if(server, credits, 0);
2461                        continue;
2462                }
2463
2464                wdata->credits = credits_on_stack;
2465                wdata->cfile = cfile;
2466                wdata->server = server;
2467                cfile = NULL;
2468
2469                if (!wdata->cfile) {
2470                        cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2471                                 get_file_rc);
2472                        if (is_retryable_error(get_file_rc))
2473                                rc = get_file_rc;
2474                        else
2475                                rc = -EBADF;
2476                } else
2477                        rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2478
2479                for (i = 0; i < nr_pages; ++i)
2480                        unlock_page(wdata->pages[i]);
2481
2482                /* send failure -- clean up the mess */
2483                if (rc != 0) {
2484                        add_credits_and_wake_if(server, &wdata->credits, 0);
2485                        for (i = 0; i < nr_pages; ++i) {
2486                                if (is_retryable_error(rc))
2487                                        redirty_page_for_writepage(wbc,
2488                                                           wdata->pages[i]);
2489                                else
2490                                        SetPageError(wdata->pages[i]);
2491                                end_page_writeback(wdata->pages[i]);
2492                                put_page(wdata->pages[i]);
2493                        }
2494                        if (!is_retryable_error(rc))
2495                                mapping_set_error(mapping, rc);
2496                }
2497                kref_put(&wdata->refcount, cifs_writedata_release);
2498
2499                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2500                        index = saved_index;
2501                        continue;
2502                }
2503
2504                /* Return immediately if we received a signal during writing */
2505                if (is_interrupt_error(rc)) {
2506                        done = true;
2507                        break;
2508                }
2509
2510                if (rc != 0 && saved_rc == 0)
2511                        saved_rc = rc;
2512
2513                wbc->nr_to_write -= nr_pages;
2514                if (wbc->nr_to_write <= 0)
2515                        done = true;
2516
2517                index = next;
2518        }
2519
2520        if (!scanned && !done) {
2521                /*
2522                 * We hit the last page and there is more work to be done: wrap
2523                 * back to the start of the file
2524                 */
2525                scanned = true;
2526                index = 0;
2527                goto retry;
2528        }
2529
2530        if (saved_rc != 0)
2531                rc = saved_rc;
2532
2533        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2534                mapping->writeback_index = index;
2535
2536        if (cfile)
2537                cifsFileInfo_put(cfile);
2538        free_xid(xid);
2539        /* Indication to update ctime and mtime as close is deferred */
2540        set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2541        return rc;
2542}
2543
2544static int
2545cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2546{
2547        int rc;
2548        unsigned int xid;
2549
2550        xid = get_xid();
2551/* BB add check for wbc flags */
2552        get_page(page);
2553        if (!PageUptodate(page))
2554                cifs_dbg(FYI, "ppw - page not up to date\n");
2555
2556        /*
2557         * Set the "writeback" flag, and clear "dirty" in the radix tree.
2558         *
2559         * A writepage() implementation always needs to do either this,
2560         * or re-dirty the page with "redirty_page_for_writepage()" in
2561         * the case of a failure.
2562         *
2563         * Just unlocking the page will cause the radix tree tag-bits
2564         * to fail to update with the state of the page correctly.
2565         */
2566        set_page_writeback(page);
2567retry_write:
2568        rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2569        if (is_retryable_error(rc)) {
2570                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2571                        goto retry_write;
2572                redirty_page_for_writepage(wbc, page);
2573        } else if (rc != 0) {
2574                SetPageError(page);
2575                mapping_set_error(page->mapping, rc);
2576        } else {
2577                SetPageUptodate(page);
2578        }
2579        end_page_writeback(page);
2580        put_page(page);
2581        free_xid(xid);
2582        return rc;
2583}
2584
2585static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2586{
2587        int rc = cifs_writepage_locked(page, wbc);
2588        unlock_page(page);
2589        return rc;
2590}
2591
2592static int cifs_write_end(struct file *file, struct address_space *mapping,
2593                        loff_t pos, unsigned len, unsigned copied,
2594                        struct page *page, void *fsdata)
2595{
2596        int rc;
2597        struct inode *inode = mapping->host;
2598        struct cifsFileInfo *cfile = file->private_data;
2599        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2600        __u32 pid;
2601
2602        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2603                pid = cfile->pid;
2604        else
2605                pid = current->tgid;
2606
2607        cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2608                 page, pos, copied);
2609
2610        if (PageChecked(page)) {
2611                if (copied == len)
2612                        SetPageUptodate(page);
2613                ClearPageChecked(page);
2614        } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2615                SetPageUptodate(page);
2616
2617        if (!PageUptodate(page)) {
2618                char *page_data;
2619                unsigned offset = pos & (PAGE_SIZE - 1);
2620                unsigned int xid;
2621
2622                xid = get_xid();
2623                /* this is probably better than directly calling
2624                   partialpage_write since in this function the file handle is
2625                   known which we might as well leverage */
2626                /* BB check if anything else missing out of ppw
2627                   such as updating last write time */
2628                page_data = kmap(page);
2629                rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2630                /* if (rc < 0) should we set writebehind rc? */
2631                kunmap(page);
2632
2633                free_xid(xid);
2634        } else {
2635                rc = copied;
2636                pos += copied;
2637                set_page_dirty(page);
2638        }
2639
2640        if (rc > 0) {
2641                spin_lock(&inode->i_lock);
2642                if (pos > inode->i_size) {
2643                        i_size_write(inode, pos);
2644                        inode->i_blocks = (512 - 1 + pos) >> 9;
2645                }
2646                spin_unlock(&inode->i_lock);
2647        }
2648
2649        unlock_page(page);
2650        put_page(page);
2651        /* Indication to update ctime and mtime as close is deferred */
2652        set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2653
2654        return rc;
2655}
2656
2657int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2658                      int datasync)
2659{
2660        unsigned int xid;
2661        int rc = 0;
2662        struct cifs_tcon *tcon;
2663        struct TCP_Server_Info *server;
2664        struct cifsFileInfo *smbfile = file->private_data;
2665        struct inode *inode = file_inode(file);
2666        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2667
2668        rc = file_write_and_wait_range(file, start, end);
2669        if (rc) {
2670                trace_cifs_fsync_err(inode->i_ino, rc);
2671                return rc;
2672        }
2673
2674        xid = get_xid();
2675
2676        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2677                 file, datasync);
2678
2679        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2680                rc = cifs_zap_mapping(inode);
2681                if (rc) {
2682                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2683                        rc = 0; /* don't care about it in fsync */
2684                }
2685        }
2686
2687        tcon = tlink_tcon(smbfile->tlink);
2688        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2689                server = tcon->ses->server;
2690                if (server->ops->flush)
2691                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2692                else
2693                        rc = -ENOSYS;
2694        }
2695
2696        free_xid(xid);
2697        return rc;
2698}
2699
2700int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2701{
2702        unsigned int xid;
2703        int rc = 0;
2704        struct cifs_tcon *tcon;
2705        struct TCP_Server_Info *server;
2706        struct cifsFileInfo *smbfile = file->private_data;
2707        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2708
2709        rc = file_write_and_wait_range(file, start, end);
2710        if (rc) {
2711                trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2712                return rc;
2713        }
2714
2715        xid = get_xid();
2716
2717        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2718                 file, datasync);
2719
2720        tcon = tlink_tcon(smbfile->tlink);
2721        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2722                server = tcon->ses->server;
2723                if (server->ops->flush)
2724                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2725                else
2726                        rc = -ENOSYS;
2727        }
2728
2729        free_xid(xid);
2730        return rc;
2731}
2732
2733/*
2734 * As file closes, flush all cached write data for this inode checking
2735 * for write behind errors.
2736 */
2737int cifs_flush(struct file *file, fl_owner_t id)
2738{
2739        struct inode *inode = file_inode(file);
2740        int rc = 0;
2741
2742        if (file->f_mode & FMODE_WRITE)
2743                rc = filemap_write_and_wait(inode->i_mapping);
2744
2745        cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2746        if (rc)
2747                trace_cifs_flush_err(inode->i_ino, rc);
2748        return rc;
2749}
2750
2751static int
2752cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2753{
2754        int rc = 0;
2755        unsigned long i;
2756
2757        for (i = 0; i < num_pages; i++) {
2758                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2759                if (!pages[i]) {
2760                        /*
2761                         * save number of pages we have already allocated and
2762                         * return with ENOMEM error
2763                         */
2764                        num_pages = i;
2765                        rc = -ENOMEM;
2766                        break;
2767                }
2768        }
2769
2770        if (rc) {
2771                for (i = 0; i < num_pages; i++)
2772                        put_page(pages[i]);
2773        }
2774        return rc;
2775}
2776
2777static inline
2778size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2779{
2780        size_t num_pages;
2781        size_t clen;
2782
2783        clen = min_t(const size_t, len, wsize);
2784        num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2785
2786        if (cur_len)
2787                *cur_len = clen;
2788
2789        return num_pages;
2790}
2791
2792static void
2793cifs_uncached_writedata_release(struct kref *refcount)
2794{
2795        int i;
2796        struct cifs_writedata *wdata = container_of(refcount,
2797                                        struct cifs_writedata, refcount);
2798
2799        kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2800        for (i = 0; i < wdata->nr_pages; i++)
2801                put_page(wdata->pages[i]);
2802        cifs_writedata_release(refcount);
2803}
2804
2805static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2806
2807static void
2808cifs_uncached_writev_complete(struct work_struct *work)
2809{
2810        struct cifs_writedata *wdata = container_of(work,
2811                                        struct cifs_writedata, work);
2812        struct inode *inode = d_inode(wdata->cfile->dentry);
2813        struct cifsInodeInfo *cifsi = CIFS_I(inode);
2814
2815        spin_lock(&inode->i_lock);
2816        cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2817        if (cifsi->server_eof > inode->i_size)
2818                i_size_write(inode, cifsi->server_eof);
2819        spin_unlock(&inode->i_lock);
2820
2821        complete(&wdata->done);
2822        collect_uncached_write_data(wdata->ctx);
2823        /* the below call can possibly free the last ref to aio ctx */
2824        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2825}
2826
2827static int
2828wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2829                      size_t *len, unsigned long *num_pages)
2830{
2831        size_t save_len, copied, bytes, cur_len = *len;
2832        unsigned long i, nr_pages = *num_pages;
2833
2834        save_len = cur_len;
2835        for (i = 0; i < nr_pages; i++) {
2836                bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2837                copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2838                cur_len -= copied;
2839                /*
2840                 * If we didn't copy as much as we expected, then that
2841                 * may mean we trod into an unmapped area. Stop copying
2842                 * at that point. On the next pass through the big
2843                 * loop, we'll likely end up getting a zero-length
2844                 * write and bailing out of it.
2845                 */
2846                if (copied < bytes)
2847                        break;
2848        }
2849        cur_len = save_len - cur_len;
2850        *len = cur_len;
2851
2852        /*
2853         * If we have no data to send, then that probably means that
2854         * the copy above failed altogether. That's most likely because
2855         * the address in the iovec was bogus. Return -EFAULT and let
2856         * the caller free anything we allocated and bail out.
2857         */
2858        if (!cur_len)
2859                return -EFAULT;
2860
2861        /*
2862         * i + 1 now represents the number of pages we actually used in
2863         * the copy phase above.
2864         */
2865        *num_pages = i + 1;
2866        return 0;
2867}
2868
2869static int
2870cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2871        struct cifs_aio_ctx *ctx)
2872{
2873        unsigned int wsize;
2874        struct cifs_credits credits;
2875        int rc;
2876        struct TCP_Server_Info *server = wdata->server;
2877
2878        do {
2879                if (wdata->cfile->invalidHandle) {
2880                        rc = cifs_reopen_file(wdata->cfile, false);
2881                        if (rc == -EAGAIN)
2882                                continue;
2883                        else if (rc)
2884                                break;
2885                }
2886
2887
2888                /*
2889                 * Wait for credits to resend this wdata.
2890                 * Note: we are attempting to resend the whole wdata not in
2891                 * segments
2892                 */
2893                do {
2894                        rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2895                                                &wsize, &credits);
2896                        if (rc)
2897                                goto fail;
2898
2899                        if (wsize < wdata->bytes) {
2900                                add_credits_and_wake_if(server, &credits, 0);
2901                                msleep(1000);
2902                        }
2903                } while (wsize < wdata->bytes);
2904                wdata->credits = credits;
2905
2906                rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2907
2908                if (!rc) {
2909                        if (wdata->cfile->invalidHandle)
2910                                rc = -EAGAIN;
2911                        else {
2912#ifdef CONFIG_CIFS_SMB_DIRECT
2913                                if (wdata->mr) {
2914                                        wdata->mr->need_invalidate = true;
2915                                        smbd_deregister_mr(wdata->mr);
2916                                        wdata->mr = NULL;
2917                                }
2918#endif
2919                                rc = server->ops->async_writev(wdata,
2920                                        cifs_uncached_writedata_release);
2921                        }
2922                }
2923
2924                /* If the write was successfully sent, we are done */
2925                if (!rc) {
2926                        list_add_tail(&wdata->list, wdata_list);
2927                        return 0;
2928                }
2929
2930                /* Roll back credits and retry if needed */
2931                add_credits_and_wake_if(server, &wdata->credits, 0);
2932        } while (rc == -EAGAIN);
2933
2934fail:
2935        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2936        return rc;
2937}
2938
2939static int
2940cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2941                     struct cifsFileInfo *open_file,
2942                     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2943                     struct cifs_aio_ctx *ctx)
2944{
2945        int rc = 0;
2946        size_t cur_len;
2947        unsigned long nr_pages, num_pages, i;
2948        struct cifs_writedata *wdata;
2949        struct iov_iter saved_from = *from;
2950        loff_t saved_offset = offset;
2951        pid_t pid;
2952        struct TCP_Server_Info *server;
2953        struct page **pagevec;
2954        size_t start;
2955        unsigned int xid;
2956
2957        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2958                pid = open_file->pid;
2959        else
2960                pid = current->tgid;
2961
2962        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2963        xid = get_xid();
2964
2965        do {
2966                unsigned int wsize;
2967                struct cifs_credits credits_on_stack;
2968                struct cifs_credits *credits = &credits_on_stack;
2969
2970                if (open_file->invalidHandle) {
2971                        rc = cifs_reopen_file(open_file, false);
2972                        if (rc == -EAGAIN)
2973                                continue;
2974                        else if (rc)
2975                                break;
2976                }
2977
2978                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2979                                                   &wsize, credits);
2980                if (rc)
2981                        break;
2982
2983                cur_len = min_t(const size_t, len, wsize);
2984
2985                if (ctx->direct_io) {
2986                        ssize_t result;
2987
2988                        result = iov_iter_get_pages_alloc(
2989                                from, &pagevec, cur_len, &start);
2990                        if (result < 0) {
2991                                cifs_dbg(VFS,
2992                                         "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2993                                         result, iov_iter_type(from),
2994                                         from->iov_offset, from->count);
2995                                dump_stack();
2996
2997                                rc = result;
2998                                add_credits_and_wake_if(server, credits, 0);
2999                                break;
3000                        }
3001                        cur_len = (size_t)result;
3002                        iov_iter_advance(from, cur_len);
3003
3004                        nr_pages =
3005                                (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3006
3007                        wdata = cifs_writedata_direct_alloc(pagevec,
3008                                             cifs_uncached_writev_complete);
3009                        if (!wdata) {
3010                                rc = -ENOMEM;
3011                                add_credits_and_wake_if(server, credits, 0);
3012                                break;
3013                        }
3014
3015
3016                        wdata->page_offset = start;
3017                        wdata->tailsz =
3018                                nr_pages > 1 ?
3019                                        cur_len - (PAGE_SIZE - start) -
3020                                        (nr_pages - 2) * PAGE_SIZE :
3021                                        cur_len;
3022                } else {
3023                        nr_pages = get_numpages(wsize, len, &cur_len);
3024                        wdata = cifs_writedata_alloc(nr_pages,
3025                                             cifs_uncached_writev_complete);
3026                        if (!wdata) {
3027                                rc = -ENOMEM;
3028                                add_credits_and_wake_if(server, credits, 0);
3029                                break;
3030                        }
3031
3032                        rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3033                        if (rc) {
3034                                kvfree(wdata->pages);
3035                                kfree(wdata);
3036                                add_credits_and_wake_if(server, credits, 0);
3037                                break;
3038                        }
3039
3040                        num_pages = nr_pages;
3041                        rc = wdata_fill_from_iovec(
3042                                wdata, from, &cur_len, &num_pages);
3043                        if (rc) {
3044                                for (i = 0; i < nr_pages; i++)
3045                                        put_page(wdata->pages[i]);
3046                                kvfree(wdata->pages);
3047                                kfree(wdata);
3048                                add_credits_and_wake_if(server, credits, 0);
3049                                break;
3050                        }
3051
3052                        /*
3053                         * Bring nr_pages down to the number of pages we
3054                         * actually used, and free any pages that we didn't use.
3055                         */
3056                        for ( ; nr_pages > num_pages; nr_pages--)
3057                                put_page(wdata->pages[nr_pages - 1]);
3058
3059                        wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3060                }
3061
3062                wdata->sync_mode = WB_SYNC_ALL;
3063                wdata->nr_pages = nr_pages;
3064                wdata->offset = (__u64)offset;
3065                wdata->cfile = cifsFileInfo_get(open_file);
3066                wdata->server = server;
3067                wdata->pid = pid;
3068                wdata->bytes = cur_len;
3069                wdata->pagesz = PAGE_SIZE;
3070                wdata->credits = credits_on_stack;
3071                wdata->ctx = ctx;
3072                kref_get(&ctx->refcount);
3073
3074                rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3075
3076                if (!rc) {
3077                        if (wdata->cfile->invalidHandle)
3078                                rc = -EAGAIN;
3079                        else
3080                                rc = server->ops->async_writev(wdata,
3081                                        cifs_uncached_writedata_release);
3082                }
3083
3084                if (rc) {
3085                        add_credits_and_wake_if(server, &wdata->credits, 0);
3086                        kref_put(&wdata->refcount,
3087                                 cifs_uncached_writedata_release);
3088                        if (rc == -EAGAIN) {
3089                                *from = saved_from;
3090                                iov_iter_advance(from, offset - saved_offset);
3091                                continue;
3092                        }
3093                        break;
3094                }
3095
3096                list_add_tail(&wdata->list, wdata_list);
3097                offset += cur_len;
3098                len -= cur_len;
3099        } while (len > 0);
3100
3101        free_xid(xid);
3102        return rc;
3103}
3104
3105static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3106{
3107        struct cifs_writedata *wdata, *tmp;
3108        struct cifs_tcon *tcon;
3109        struct cifs_sb_info *cifs_sb;
3110        struct dentry *dentry = ctx->cfile->dentry;
3111        int rc;
3112
3113        tcon = tlink_tcon(ctx->cfile->tlink);
3114        cifs_sb = CIFS_SB(dentry->d_sb);
3115
3116        mutex_lock(&ctx->aio_mutex);
3117
3118        if (list_empty(&ctx->list)) {
3119                mutex_unlock(&ctx->aio_mutex);
3120                return;
3121        }
3122
3123        rc = ctx->rc;
3124        /*
3125         * Wait for and collect replies for any successful sends in order of
3126         * increasing offset. Once an error is hit, then return without waiting
3127         * for any more replies.
3128         */
3129restart_loop:
3130        list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3131                if (!rc) {
3132                        if (!try_wait_for_completion(&wdata->done)) {
3133                                mutex_unlock(&ctx->aio_mutex);
3134                                return;
3135                        }
3136
3137                        if (wdata->result)
3138                                rc = wdata->result;
3139                        else
3140                                ctx->total_len += wdata->bytes;
3141
3142                        /* resend call if it's a retryable error */
3143                        if (rc == -EAGAIN) {
3144                                struct list_head tmp_list;
3145                                struct iov_iter tmp_from = ctx->iter;
3146
3147                                INIT_LIST_HEAD(&tmp_list);
3148                                list_del_init(&wdata->list);
3149
3150                                if (ctx->direct_io)
3151                                        rc = cifs_resend_wdata(
3152                                                wdata, &tmp_list, ctx);
3153                                else {
3154                                        iov_iter_advance(&tmp_from,
3155                                                 wdata->offset - ctx->pos);
3156
3157                                        rc = cifs_write_from_iter(wdata->offset,
3158                                                wdata->bytes, &tmp_from,
3159                                                ctx->cfile, cifs_sb, &tmp_list,
3160                                                ctx);
3161
3162                                        kref_put(&wdata->refcount,
3163                                                cifs_uncached_writedata_release);
3164                                }
3165
3166                                list_splice(&tmp_list, &ctx->list);
3167                                goto restart_loop;
3168                        }
3169                }
3170                list_del_init(&wdata->list);
3171                kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3172        }
3173
3174        cifs_stats_bytes_written(tcon, ctx->total_len);
3175        set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3176
3177        ctx->rc = (rc == 0) ? ctx->total_len : rc;
3178
3179        mutex_unlock(&ctx->aio_mutex);
3180
3181        if (ctx->iocb && ctx->iocb->ki_complete)
3182                ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3183        else
3184                complete(&ctx->done);
3185}
3186
3187static ssize_t __cifs_writev(
3188        struct kiocb *iocb, struct iov_iter *from, bool direct)
3189{
3190        struct file *file = iocb->ki_filp;
3191        ssize_t total_written = 0;
3192        struct cifsFileInfo *cfile;
3193        struct cifs_tcon *tcon;
3194        struct cifs_sb_info *cifs_sb;
3195        struct cifs_aio_ctx *ctx;
3196        struct iov_iter saved_from = *from;
3197        size_t len = iov_iter_count(from);
3198        int rc;
3199
3200        /*
3201         * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3202         * In this case, fall back to non-direct write function.
3203         * this could be improved by getting pages directly in ITER_KVEC
3204         */
3205        if (direct && iov_iter_is_kvec(from)) {
3206                cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3207                direct = false;
3208        }
3209
3210        rc = generic_write_checks(iocb, from);
3211        if (rc <= 0)
3212                return rc;
3213
3214        cifs_sb = CIFS_FILE_SB(file);
3215        cfile = file->private_data;
3216        tcon = tlink_tcon(cfile->tlink);
3217
3218        if (!tcon->ses->server->ops->async_writev)
3219                return -ENOSYS;
3220
3221        ctx = cifs_aio_ctx_alloc();
3222        if (!ctx)
3223                return -ENOMEM;
3224
3225        ctx->cfile = cifsFileInfo_get(cfile);
3226
3227        if (!is_sync_kiocb(iocb))
3228                ctx->iocb = iocb;
3229
3230        ctx->pos = iocb->ki_pos;
3231
3232        if (direct) {
3233                ctx->direct_io = true;
3234                ctx->iter = *from;
3235                ctx->len = len;
3236        } else {
3237                rc = setup_aio_ctx_iter(ctx, from, WRITE);
3238                if (rc) {
3239                        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3240                        return rc;
3241                }
3242        }
3243
3244        /* grab a lock here due to read response handlers can access ctx */
3245        mutex_lock(&ctx->aio_mutex);
3246
3247        rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3248                                  cfile, cifs_sb, &ctx->list, ctx);
3249
3250        /*
3251         * If at least one write was successfully sent, then discard any rc
3252         * value from the later writes. If the other write succeeds, then
3253         * we'll end up returning whatever was written. If it fails, then
3254         * we'll get a new rc value from that.
3255         */
3256        if (!list_empty(&ctx->list))
3257                rc = 0;
3258
3259        mutex_unlock(&ctx->aio_mutex);
3260
3261        if (rc) {
3262                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3263                return rc;
3264        }
3265
3266        if (!is_sync_kiocb(iocb)) {
3267                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3268                return -EIOCBQUEUED;
3269        }
3270
3271        rc = wait_for_completion_killable(&ctx->done);
3272        if (rc) {
3273                mutex_lock(&ctx->aio_mutex);
3274                ctx->rc = rc = -EINTR;
3275                total_written = ctx->total_len;
3276                mutex_unlock(&ctx->aio_mutex);
3277        } else {
3278                rc = ctx->rc;
3279                total_written = ctx->total_len;
3280        }
3281
3282        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3283
3284        if (unlikely(!total_written))
3285                return rc;
3286
3287        iocb->ki_pos += total_written;
3288        return total_written;
3289}
3290
3291ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3292{
3293        return __cifs_writev(iocb, from, true);
3294}
3295
3296ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3297{
3298        return __cifs_writev(iocb, from, false);
3299}
3300
3301static ssize_t
3302cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3303{
3304        struct file *file = iocb->ki_filp;
3305        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3306        struct inode *inode = file->f_mapping->host;
3307        struct cifsInodeInfo *cinode = CIFS_I(inode);
3308        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3309        ssize_t rc;
3310
3311        inode_lock(inode);
3312        /*
3313         * We need to hold the sem to be sure nobody modifies lock list
3314         * with a brlock that prevents writing.
3315         */
3316        down_read(&cinode->lock_sem);
3317
3318        rc = generic_write_checks(iocb, from);
3319        if (rc <= 0)
3320                goto out;
3321
3322        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3323                                     server->vals->exclusive_lock_type, 0,
3324                                     NULL, CIFS_WRITE_OP))
3325                rc = __generic_file_write_iter(iocb, from);
3326        else
3327                rc = -EACCES;
3328out:
3329        up_read(&cinode->lock_sem);
3330        inode_unlock(inode);
3331
3332        if (rc > 0)
3333                rc = generic_write_sync(iocb, rc);
3334        return rc;
3335}
3336
3337ssize_t
3338cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3339{
3340        struct inode *inode = file_inode(iocb->ki_filp);
3341        struct cifsInodeInfo *cinode = CIFS_I(inode);
3342        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3343        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3344                                                iocb->ki_filp->private_data;
3345        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3346        ssize_t written;
3347
3348        written = cifs_get_writer(cinode);
3349        if (written)
3350                return written;
3351
3352        if (CIFS_CACHE_WRITE(cinode)) {
3353                if (cap_unix(tcon->ses) &&
3354                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3355                  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3356                        written = generic_file_write_iter(iocb, from);
3357                        goto out;
3358                }
3359                written = cifs_writev(iocb, from);
3360                goto out;
3361        }
3362        /*
3363         * For non-oplocked files in strict cache mode we need to write the data
3364         * to the server exactly from the pos to pos+len-1 rather than flush all
3365         * affected pages because it may cause a error with mandatory locks on
3366         * these pages but not on the region from pos to ppos+len-1.
3367         */
3368        written = cifs_user_writev(iocb, from);
3369        if (CIFS_CACHE_READ(cinode)) {
3370                /*
3371                 * We have read level caching and we have just sent a write
3372                 * request to the server thus making data in the cache stale.
3373                 * Zap the cache and set oplock/lease level to NONE to avoid
3374                 * reading stale data from the cache. All subsequent read
3375                 * operations will read new data from the server.
3376                 */
3377                cifs_zap_mapping(inode);
3378                cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3379                         inode);
3380                cinode->oplock = 0;
3381        }
3382out:
3383        cifs_put_writer(cinode);
3384        return written;
3385}
3386
3387static struct cifs_readdata *
3388cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3389{
3390        struct cifs_readdata *rdata;
3391
3392        rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3393        if (rdata != NULL) {
3394                rdata->pages = pages;
3395                kref_init(&rdata->refcount);
3396                INIT_LIST_HEAD(&rdata->list);
3397                init_completion(&rdata->done);
3398                INIT_WORK(&rdata->work, complete);
3399        }
3400
3401        return rdata;
3402}
3403
3404static struct cifs_readdata *
3405cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3406{
3407        struct page **pages =
3408                kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3409        struct cifs_readdata *ret = NULL;
3410
3411        if (pages) {
3412                ret = cifs_readdata_direct_alloc(pages, complete);
3413                if (!ret)
3414                        kfree(pages);
3415        }
3416
3417        return ret;
3418}
3419
3420void
3421cifs_readdata_release(struct kref *refcount)
3422{
3423        struct cifs_readdata *rdata = container_of(refcount,
3424                                        struct cifs_readdata, refcount);
3425#ifdef CONFIG_CIFS_SMB_DIRECT
3426        if (rdata->mr) {
3427                smbd_deregister_mr(rdata->mr);
3428                rdata->mr = NULL;
3429        }
3430#endif
3431        if (rdata->cfile)
3432                cifsFileInfo_put(rdata->cfile);
3433
3434        kvfree(rdata->pages);
3435        kfree(rdata);
3436}
3437
3438static int
3439cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3440{
3441        int rc = 0;
3442        struct page *page;
3443        unsigned int i;
3444
3445        for (i = 0; i < nr_pages; i++) {
3446                page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3447                if (!page) {
3448                        rc = -ENOMEM;
3449                        break;
3450                }
3451                rdata->pages[i] = page;
3452        }
3453
3454        if (rc) {
3455                unsigned int nr_page_failed = i;
3456
3457                for (i = 0; i < nr_page_failed; i++) {
3458                        put_page(rdata->pages[i]);
3459                        rdata->pages[i] = NULL;
3460                }
3461        }
3462        return rc;
3463}
3464
3465static void
3466cifs_uncached_readdata_release(struct kref *refcount)
3467{
3468        struct cifs_readdata *rdata = container_of(refcount,
3469                                        struct cifs_readdata, refcount);
3470        unsigned int i;
3471
3472        kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3473        for (i = 0; i < rdata->nr_pages; i++) {
3474                put_page(rdata->pages[i]);
3475        }
3476        cifs_readdata_release(refcount);
3477}
3478
3479/**
3480 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3481 * @rdata:      the readdata response with list of pages holding data
3482 * @iter:       destination for our data
3483 *
3484 * This function copies data from a list of pages in a readdata response into
3485 * an array of iovecs. It will first calculate where the data should go
3486 * based on the info in the readdata and then copy the data into that spot.
3487 */
3488static int
3489cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3490{
3491        size_t remaining = rdata->got_bytes;
3492        unsigned int i;
3493
3494        for (i = 0; i < rdata->nr_pages; i++) {
3495                struct page *page = rdata->pages[i];
3496                size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3497                size_t written;
3498
3499                if (unlikely(iov_iter_is_pipe(iter))) {
3500                        void *addr = kmap_atomic(page);
3501
3502                        written = copy_to_iter(addr, copy, iter);
3503                        kunmap_atomic(addr);
3504                } else
3505                        written = copy_page_to_iter(page, 0, copy, iter);
3506                remaining -= written;
3507                if (written < copy && iov_iter_count(iter) > 0)
3508                        break;
3509        }
3510        return remaining ? -EFAULT : 0;
3511}
3512
3513static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3514
3515static void
3516cifs_uncached_readv_complete(struct work_struct *work)
3517{
3518        struct cifs_readdata *rdata = container_of(work,
3519                                                struct cifs_readdata, work);
3520
3521        complete(&rdata->done);
3522        collect_uncached_read_data(rdata->ctx);
3523        /* the below call can possibly free the last ref to aio ctx */
3524        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3525}
3526
3527static int
3528uncached_fill_pages(struct TCP_Server_Info *server,
3529                    struct cifs_readdata *rdata, struct iov_iter *iter,
3530                    unsigned int len)
3531{
3532        int result = 0;
3533        unsigned int i;
3534        unsigned int nr_pages = rdata->nr_pages;
3535        unsigned int page_offset = rdata->page_offset;
3536
3537        rdata->got_bytes = 0;
3538        rdata->tailsz = PAGE_SIZE;
3539        for (i = 0; i < nr_pages; i++) {
3540                struct page *page = rdata->pages[i];
3541                size_t n;
3542                unsigned int segment_size = rdata->pagesz;
3543
3544                if (i == 0)
3545                        segment_size -= page_offset;
3546                else
3547                        page_offset = 0;
3548
3549
3550                if (len <= 0) {
3551                        /* no need to hold page hostage */
3552                        rdata->pages[i] = NULL;
3553                        rdata->nr_pages--;
3554                        put_page(page);
3555                        continue;
3556                }
3557
3558                n = len;
3559                if (len >= segment_size)
3560                        /* enough data to fill the page */
3561                        n = segment_size;
3562                else
3563                        rdata->tailsz = len;
3564                len -= n;
3565
3566                if (iter)
3567                        result = copy_page_from_iter(
3568                                        page, page_offset, n, iter);
3569#ifdef CONFIG_CIFS_SMB_DIRECT
3570                else if (rdata->mr)
3571                        result = n;
3572#endif
3573                else
3574                        result = cifs_read_page_from_socket(
3575                                        server, page, page_offset, n);
3576                if (result < 0)
3577                        break;
3578
3579                rdata->got_bytes += result;
3580        }
3581
3582        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3583                                                rdata->got_bytes : result;
3584}
3585
3586static int
3587cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3588                              struct cifs_readdata *rdata, unsigned int len)
3589{
3590        return uncached_fill_pages(server, rdata, NULL, len);
3591}
3592
3593static int
3594cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3595                              struct cifs_readdata *rdata,
3596                              struct iov_iter *iter)
3597{
3598        return uncached_fill_pages(server, rdata, iter, iter->count);
3599}
3600
3601static int cifs_resend_rdata(struct cifs_readdata *rdata,
3602                        struct list_head *rdata_list,
3603                        struct cifs_aio_ctx *ctx)
3604{
3605        unsigned int rsize;
3606        struct cifs_credits credits;
3607        int rc;
3608        struct TCP_Server_Info *server;
3609
3610        /* XXX: should we pick a new channel here? */
3611        server = rdata->server;
3612
3613        do {
3614                if (rdata->cfile->invalidHandle) {
3615                        rc = cifs_reopen_file(rdata->cfile, true);
3616                        if (rc == -EAGAIN)
3617                                continue;
3618                        else if (rc)
3619                                break;
3620                }
3621
3622                /*
3623                 * Wait for credits to resend this rdata.
3624                 * Note: we are attempting to resend the whole rdata not in
3625                 * segments
3626                 */
3627                do {
3628                        rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3629                                                &rsize, &credits);
3630
3631                        if (rc)
3632                                goto fail;
3633
3634                        if (rsize < rdata->bytes) {
3635                                add_credits_and_wake_if(server, &credits, 0);
3636                                msleep(1000);
3637                        }
3638                } while (rsize < rdata->bytes);
3639                rdata->credits = credits;
3640
3641                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3642                if (!rc) {
3643                        if (rdata->cfile->invalidHandle)
3644                                rc = -EAGAIN;
3645                        else {
3646#ifdef CONFIG_CIFS_SMB_DIRECT
3647                                if (rdata->mr) {
3648                                        rdata->mr->need_invalidate = true;
3649                                        smbd_deregister_mr(rdata->mr);
3650                                        rdata->mr = NULL;
3651                                }
3652#endif
3653                                rc = server->ops->async_readv(rdata);
3654                        }
3655                }
3656
3657                /* If the read was successfully sent, we are done */
3658                if (!rc) {
3659                        /* Add to aio pending list */
3660                        list_add_tail(&rdata->list, rdata_list);
3661                        return 0;
3662                }
3663
3664                /* Roll back credits and retry if needed */
3665                add_credits_and_wake_if(server, &rdata->credits, 0);
3666        } while (rc == -EAGAIN);
3667
3668fail:
3669        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3670        return rc;
3671}
3672
3673static int
3674cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3675                     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3676                     struct cifs_aio_ctx *ctx)
3677{
3678        struct cifs_readdata *rdata;
3679        unsigned int npages, rsize;
3680        struct cifs_credits credits_on_stack;
3681        struct cifs_credits *credits = &credits_on_stack;
3682        size_t cur_len;
3683        int rc;
3684        pid_t pid;
3685        struct TCP_Server_Info *server;
3686        struct page **pagevec;
3687        size_t start;
3688        struct iov_iter direct_iov = ctx->iter;
3689
3690        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3691
3692        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3693                pid = open_file->pid;
3694        else
3695                pid = current->tgid;
3696
3697        if (ctx->direct_io)
3698                iov_iter_advance(&direct_iov, offset - ctx->pos);
3699
3700        do {
3701                if (open_file->invalidHandle) {
3702                        rc = cifs_reopen_file(open_file, true);
3703                        if (rc == -EAGAIN)
3704                                continue;
3705                        else if (rc)
3706                                break;
3707                }
3708
3709                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3710                                                   &rsize, credits);
3711                if (rc)
3712                        break;
3713
3714                cur_len = min_t(const size_t, len, rsize);
3715
3716                if (ctx->direct_io) {
3717                        ssize_t result;
3718
3719                        result = iov_iter_get_pages_alloc(
3720                                        &direct_iov, &pagevec,
3721                                        cur_len, &start);
3722                        if (result < 0) {
3723                                cifs_dbg(VFS,
3724                                         "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3725                                         result, iov_iter_type(&direct_iov),
3726                                         direct_iov.iov_offset,
3727                                         direct_iov.count);
3728                                dump_stack();
3729
3730                                rc = result;
3731                                add_credits_and_wake_if(server, credits, 0);
3732                                break;
3733                        }
3734                        cur_len = (size_t)result;
3735                        iov_iter_advance(&direct_iov, cur_len);
3736
3737                        rdata = cifs_readdata_direct_alloc(
3738                                        pagevec, cifs_uncached_readv_complete);
3739                        if (!rdata) {
3740                                add_credits_and_wake_if(server, credits, 0);
3741                                rc = -ENOMEM;
3742                                break;
3743                        }
3744
3745                        npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3746                        rdata->page_offset = start;
3747                        rdata->tailsz = npages > 1 ?
3748                                cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3749                                cur_len;
3750
3751                } else {
3752
3753                        npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3754                        /* allocate a readdata struct */
3755                        rdata = cifs_readdata_alloc(npages,
3756                                            cifs_uncached_readv_complete);
3757                        if (!rdata) {
3758                                add_credits_and_wake_if(server, credits, 0);
3759                                rc = -ENOMEM;
3760                                break;
3761                        }
3762
3763                        rc = cifs_read_allocate_pages(rdata, npages);
3764                        if (rc) {
3765                                kvfree(rdata->pages);
3766                                kfree(rdata);
3767                                add_credits_and_wake_if(server, credits, 0);
3768                                break;
3769                        }
3770
3771                        rdata->tailsz = PAGE_SIZE;
3772                }
3773
3774                rdata->server = server;
3775                rdata->cfile = cifsFileInfo_get(open_file);
3776                rdata->nr_pages = npages;
3777                rdata->offset = offset;
3778                rdata->bytes = cur_len;
3779                rdata->pid = pid;
3780                rdata->pagesz = PAGE_SIZE;
3781                rdata->read_into_pages = cifs_uncached_read_into_pages;
3782                rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3783                rdata->credits = credits_on_stack;
3784                rdata->ctx = ctx;
3785                kref_get(&ctx->refcount);
3786
3787                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3788
3789                if (!rc) {
3790                        if (rdata->cfile->invalidHandle)
3791                                rc = -EAGAIN;
3792                        else
3793                                rc = server->ops->async_readv(rdata);
3794                }
3795
3796                if (rc) {
3797                        add_credits_and_wake_if(server, &rdata->credits, 0);
3798                        kref_put(&rdata->refcount,
3799                                cifs_uncached_readdata_release);
3800                        if (rc == -EAGAIN) {
3801                                iov_iter_revert(&direct_iov, cur_len);
3802                                continue;
3803                        }
3804                        break;
3805                }
3806
3807                list_add_tail(&rdata->list, rdata_list);
3808                offset += cur_len;
3809                len -= cur_len;
3810        } while (len > 0);
3811
3812        return rc;
3813}
3814
3815static void
3816collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3817{
3818        struct cifs_readdata *rdata, *tmp;
3819        struct iov_iter *to = &ctx->iter;
3820        struct cifs_sb_info *cifs_sb;
3821        int rc;
3822
3823        cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3824
3825        mutex_lock(&ctx->aio_mutex);
3826
3827        if (list_empty(&ctx->list)) {
3828                mutex_unlock(&ctx->aio_mutex);
3829                return;
3830        }
3831
3832        rc = ctx->rc;
3833        /* the loop below should proceed in the order of increasing offsets */
3834again:
3835        list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3836                if (!rc) {
3837                        if (!try_wait_for_completion(&rdata->done)) {
3838                                mutex_unlock(&ctx->aio_mutex);
3839                                return;
3840                        }
3841
3842                        if (rdata->result == -EAGAIN) {
3843                                /* resend call if it's a retryable error */
3844                                struct list_head tmp_list;
3845                                unsigned int got_bytes = rdata->got_bytes;
3846
3847                                list_del_init(&rdata->list);
3848                                INIT_LIST_HEAD(&tmp_list);
3849
3850                                /*
3851                                 * Got a part of data and then reconnect has
3852                                 * happened -- fill the buffer and continue
3853                                 * reading.
3854                                 */
3855                                if (got_bytes && got_bytes < rdata->bytes) {
3856                                        rc = 0;
3857                                        if (!ctx->direct_io)
3858                                                rc = cifs_readdata_to_iov(rdata, to);
3859                                        if (rc) {
3860                                                kref_put(&rdata->refcount,
3861                                                        cifs_uncached_readdata_release);
3862                                                continue;
3863                                        }
3864                                }
3865
3866                                if (ctx->direct_io) {
3867                                        /*
3868                                         * Re-use rdata as this is a
3869                                         * direct I/O
3870                                         */
3871                                        rc = cifs_resend_rdata(
3872                                                rdata,
3873                                                &tmp_list, ctx);
3874                                } else {
3875                                        rc = cifs_send_async_read(
3876                                                rdata->offset + got_bytes,
3877                                                rdata->bytes - got_bytes,
3878                                                rdata->cfile, cifs_sb,
3879                                                &tmp_list, ctx);
3880
3881                                        kref_put(&rdata->refcount,
3882                                                cifs_uncached_readdata_release);
3883                                }
3884
3885                                list_splice(&tmp_list, &ctx->list);
3886
3887                                goto again;
3888                        } else if (rdata->result)
3889                                rc = rdata->result;
3890                        else if (!ctx->direct_io)
3891                                rc = cifs_readdata_to_iov(rdata, to);
3892
3893                        /* if there was a short read -- discard anything left */
3894                        if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3895                                rc = -ENODATA;
3896
3897                        ctx->total_len += rdata->got_bytes;
3898                }
3899                list_del_init(&rdata->list);
3900                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3901        }
3902
3903        if (!ctx->direct_io)
3904                ctx->total_len = ctx->len - iov_iter_count(to);
3905
3906        /* mask nodata case */
3907        if (rc == -ENODATA)
3908                rc = 0;
3909
3910        ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3911
3912        mutex_unlock(&ctx->aio_mutex);
3913
3914        if (ctx->iocb && ctx->iocb->ki_complete)
3915                ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3916        else
3917                complete(&ctx->done);
3918}
3919
3920static ssize_t __cifs_readv(
3921        struct kiocb *iocb, struct iov_iter *to, bool direct)
3922{
3923        size_t len;
3924        struct file *file = iocb->ki_filp;
3925        struct cifs_sb_info *cifs_sb;
3926        struct cifsFileInfo *cfile;
3927        struct cifs_tcon *tcon;
3928        ssize_t rc, total_read = 0;
3929        loff_t offset = iocb->ki_pos;
3930        struct cifs_aio_ctx *ctx;
3931
3932        /*
3933         * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3934         * fall back to data copy read path
3935         * this could be improved by getting pages directly in ITER_KVEC
3936         */
3937        if (direct && iov_iter_is_kvec(to)) {
3938                cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3939                direct = false;
3940        }
3941
3942        len = iov_iter_count(to);
3943        if (!len)
3944                return 0;
3945
3946        cifs_sb = CIFS_FILE_SB(file);
3947        cfile = file->private_data;
3948        tcon = tlink_tcon(cfile->tlink);
3949
3950        if (!tcon->ses->server->ops->async_readv)
3951                return -ENOSYS;
3952
3953        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3954                cifs_dbg(FYI, "attempting read on write only file instance\n");
3955
3956        ctx = cifs_aio_ctx_alloc();
3957        if (!ctx)
3958                return -ENOMEM;
3959
3960        ctx->cfile = cifsFileInfo_get(cfile);
3961
3962        if (!is_sync_kiocb(iocb))
3963                ctx->iocb = iocb;
3964
3965        if (iter_is_iovec(to))
3966                ctx->should_dirty = true;
3967
3968        if (direct) {
3969                ctx->pos = offset;
3970                ctx->direct_io = true;
3971                ctx->iter = *to;
3972                ctx->len = len;
3973        } else {
3974                rc = setup_aio_ctx_iter(ctx, to, READ);
3975                if (rc) {
3976                        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3977                        return rc;
3978                }
3979                len = ctx->len;
3980        }
3981
3982        /* grab a lock here due to read response handlers can access ctx */
3983        mutex_lock(&ctx->aio_mutex);
3984
3985        rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3986
3987        /* if at least one read request send succeeded, then reset rc */
3988        if (!list_empty(&ctx->list))
3989                rc = 0;
3990
3991        mutex_unlock(&ctx->aio_mutex);
3992
3993        if (rc) {
3994                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3995                return rc;
3996        }
3997
3998        if (!is_sync_kiocb(iocb)) {
3999                kref_put(&ctx->refcount, cifs_aio_ctx_release);
4000                return -EIOCBQUEUED;
4001        }
4002
4003        rc = wait_for_completion_killable(&ctx->done);
4004        if (rc) {
4005                mutex_lock(&ctx->aio_mutex);
4006                ctx->rc = rc = -EINTR;
4007                total_read = ctx->total_len;
4008                mutex_unlock(&ctx->aio_mutex);
4009        } else {
4010                rc = ctx->rc;
4011                total_read = ctx->total_len;
4012        }
4013
4014        kref_put(&ctx->refcount, cifs_aio_ctx_release);
4015
4016        if (total_read) {
4017                iocb->ki_pos += total_read;
4018                return total_read;
4019        }
4020        return rc;
4021}
4022
4023ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4024{
4025        return __cifs_readv(iocb, to, true);
4026}
4027
4028ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4029{
4030        return __cifs_readv(iocb, to, false);
4031}
4032
4033ssize_t
4034cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4035{
4036        struct inode *inode = file_inode(iocb->ki_filp);
4037        struct cifsInodeInfo *cinode = CIFS_I(inode);
4038        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4039        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4040                                                iocb->ki_filp->private_data;
4041        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4042        int rc = -EACCES;
4043
4044        /*
4045         * In strict cache mode we need to read from the server all the time
4046         * if we don't have level II oplock because the server can delay mtime
4047         * change - so we can't make a decision about inode invalidating.
4048         * And we can also fail with pagereading if there are mandatory locks
4049         * on pages affected by this read but not on the region from pos to
4050         * pos+len-1.
4051         */
4052        if (!CIFS_CACHE_READ(cinode))
4053                return cifs_user_readv(iocb, to);
4054
4055        if (cap_unix(tcon->ses) &&
4056            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4057            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4058                return generic_file_read_iter(iocb, to);
4059
4060        /*
4061         * We need to hold the sem to be sure nobody modifies lock list
4062         * with a brlock that prevents reading.
4063         */
4064        down_read(&cinode->lock_sem);
4065        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4066                                     tcon->ses->server->vals->shared_lock_type,
4067                                     0, NULL, CIFS_READ_OP))
4068                rc = generic_file_read_iter(iocb, to);
4069        up_read(&cinode->lock_sem);
4070        return rc;
4071}
4072
4073static ssize_t
4074cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4075{
4076        int rc = -EACCES;
4077        unsigned int bytes_read = 0;
4078        unsigned int total_read;
4079        unsigned int current_read_size;
4080        unsigned int rsize;
4081        struct cifs_sb_info *cifs_sb;
4082        struct cifs_tcon *tcon;
4083        struct TCP_Server_Info *server;
4084        unsigned int xid;
4085        char *cur_offset;
4086        struct cifsFileInfo *open_file;
4087        struct cifs_io_parms io_parms = {0};
4088        int buf_type = CIFS_NO_BUFFER;
4089        __u32 pid;
4090
4091        xid = get_xid();
4092        cifs_sb = CIFS_FILE_SB(file);
4093
4094        /* FIXME: set up handlers for larger reads and/or convert to async */
4095        rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4096
4097        if (file->private_data == NULL) {
4098                rc = -EBADF;
4099                free_xid(xid);
4100                return rc;
4101        }
4102        open_file = file->private_data;
4103        tcon = tlink_tcon(open_file->tlink);
4104        server = cifs_pick_channel(tcon->ses);
4105
4106        if (!server->ops->sync_read) {
4107                free_xid(xid);
4108                return -ENOSYS;
4109        }
4110
4111        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4112                pid = open_file->pid;
4113        else
4114                pid = current->tgid;
4115
4116        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4117                cifs_dbg(FYI, "attempting read on write only file instance\n");
4118
4119        for (total_read = 0, cur_offset = read_data; read_size > total_read;
4120             total_read += bytes_read, cur_offset += bytes_read) {
4121                do {
4122                        current_read_size = min_t(uint, read_size - total_read,
4123                                                  rsize);
4124                        /*
4125                         * For windows me and 9x we do not want to request more
4126                         * than it negotiated since it will refuse the read
4127                         * then.
4128                         */
4129                        if (!(tcon->ses->capabilities &
4130                                tcon->ses->server->vals->cap_large_files)) {
4131                                current_read_size = min_t(uint,
4132                                        current_read_size, CIFSMaxBufSize);
4133                        }
4134                        if (open_file->invalidHandle) {
4135                                rc = cifs_reopen_file(open_file, true);
4136                                if (rc != 0)
4137                                        break;
4138                        }
4139                        io_parms.pid = pid;
4140                        io_parms.tcon = tcon;
4141                        io_parms.offset = *offset;
4142                        io_parms.length = current_read_size;
4143                        io_parms.server = server;
4144                        rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4145                                                    &bytes_read, &cur_offset,
4146                                                    &buf_type);
4147                } while (rc == -EAGAIN);
4148
4149                if (rc || (bytes_read == 0)) {
4150                        if (total_read) {
4151                                break;
4152                        } else {
4153                                free_xid(xid);
4154                                return rc;
4155                        }
4156                } else {
4157                        cifs_stats_bytes_read(tcon, total_read);
4158                        *offset += bytes_read;
4159                }
4160        }
4161        free_xid(xid);
4162        return total_read;
4163}
4164
4165/*
4166 * If the page is mmap'ed into a process' page tables, then we need to make
4167 * sure that it doesn't change while being written back.
4168 */
4169static vm_fault_t
4170cifs_page_mkwrite(struct vm_fault *vmf)
4171{
4172        struct page *page = vmf->page;
4173
4174        lock_page(page);
4175        return VM_FAULT_LOCKED;
4176}
4177
4178static const struct vm_operations_struct cifs_file_vm_ops = {
4179        .fault = filemap_fault,
4180        .map_pages = filemap_map_pages,
4181        .page_mkwrite = cifs_page_mkwrite,
4182};
4183
4184int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4185{
4186        int xid, rc = 0;
4187        struct inode *inode = file_inode(file);
4188
4189        xid = get_xid();
4190
4191        if (!CIFS_CACHE_READ(CIFS_I(inode)))
4192                rc = cifs_zap_mapping(inode);
4193        if (!rc)
4194                rc = generic_file_mmap(file, vma);
4195        if (!rc)
4196                vma->vm_ops = &cifs_file_vm_ops;
4197
4198        free_xid(xid);
4199        return rc;
4200}
4201
4202int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4203{
4204        int rc, xid;
4205
4206        xid = get_xid();
4207
4208        rc = cifs_revalidate_file(file);
4209        if (rc)
4210                cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4211                         rc);
4212        if (!rc)
4213                rc = generic_file_mmap(file, vma);
4214        if (!rc)
4215                vma->vm_ops = &cifs_file_vm_ops;
4216
4217        free_xid(xid);
4218        return rc;
4219}
4220
4221static void
4222cifs_readv_complete(struct work_struct *work)
4223{
4224        unsigned int i, got_bytes;
4225        struct cifs_readdata *rdata = container_of(work,
4226                                                struct cifs_readdata, work);
4227
4228        got_bytes = rdata->got_bytes;
4229        for (i = 0; i < rdata->nr_pages; i++) {
4230                struct page *page = rdata->pages[i];
4231
4232                lru_cache_add(page);
4233
4234                if (rdata->result == 0 ||
4235                    (rdata->result == -EAGAIN && got_bytes)) {
4236                        flush_dcache_page(page);
4237                        SetPageUptodate(page);
4238                }
4239
4240                unlock_page(page);
4241
4242                if (rdata->result == 0 ||
4243                    (rdata->result == -EAGAIN && got_bytes))
4244                        cifs_readpage_to_fscache(rdata->mapping->host, page);
4245
4246                got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4247
4248                put_page(page);
4249                rdata->pages[i] = NULL;
4250        }
4251        kref_put(&rdata->refcount, cifs_readdata_release);
4252}
4253
4254static int
4255readpages_fill_pages(struct TCP_Server_Info *server,
4256                     struct cifs_readdata *rdata, struct iov_iter *iter,
4257                     unsigned int len)
4258{
4259        int result = 0;
4260        unsigned int i;
4261        u64 eof;
4262        pgoff_t eof_index;
4263        unsigned int nr_pages = rdata->nr_pages;
4264        unsigned int page_offset = rdata->page_offset;
4265
4266        /* determine the eof that the server (probably) has */
4267        eof = CIFS_I(rdata->mapping->host)->server_eof;
4268        eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4269        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4270
4271        rdata->got_bytes = 0;
4272        rdata->tailsz = PAGE_SIZE;
4273        for (i = 0; i < nr_pages; i++) {
4274                struct page *page = rdata->pages[i];
4275                unsigned int to_read = rdata->pagesz;
4276                size_t n;
4277
4278                if (i == 0)
4279                        to_read -= page_offset;
4280                else
4281                        page_offset = 0;
4282
4283                n = to_read;
4284
4285                if (len >= to_read) {
4286                        len -= to_read;
4287                } else if (len > 0) {
4288                        /* enough for partial page, fill and zero the rest */
4289                        zero_user(page, len + page_offset, to_read - len);
4290                        n = rdata->tailsz = len;
4291                        len = 0;
4292                } else if (page->index > eof_index) {
4293                        /*
4294                         * The VFS will not try to do readahead past the
4295                         * i_size, but it's possible that we have outstanding
4296                         * writes with gaps in the middle and the i_size hasn't
4297                         * caught up yet. Populate those with zeroed out pages
4298                         * to prevent the VFS from repeatedly attempting to
4299                         * fill them until the writes are flushed.
4300                         */
4301                        zero_user(page, 0, PAGE_SIZE);
4302                        lru_cache_add(page);
4303                        flush_dcache_page(page);
4304                        SetPageUptodate(page);
4305                        unlock_page(page);
4306                        put_page(page);
4307                        rdata->pages[i] = NULL;
4308                        rdata->nr_pages--;
4309                        continue;
4310                } else {
4311                        /* no need to hold page hostage */
4312                        lru_cache_add(page);
4313                        unlock_page(page);
4314                        put_page(page);
4315                        rdata->pages[i] = NULL;
4316                        rdata->nr_pages--;
4317                        continue;
4318                }
4319
4320                if (iter)
4321                        result = copy_page_from_iter(
4322                                        page, page_offset, n, iter);
4323#ifdef CONFIG_CIFS_SMB_DIRECT
4324                else if (rdata->mr)
4325                        result = n;
4326#endif
4327                else
4328                        result = cifs_read_page_from_socket(
4329                                        server, page, page_offset, n);
4330                if (result < 0)
4331                        break;
4332
4333                rdata->got_bytes += result;
4334        }
4335
4336        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4337                                                rdata->got_bytes : result;
4338}
4339
4340static int
4341cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4342                               struct cifs_readdata *rdata, unsigned int len)
4343{
4344        return readpages_fill_pages(server, rdata, NULL, len);
4345}
4346
4347static int
4348cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4349                               struct cifs_readdata *rdata,
4350                               struct iov_iter *iter)
4351{
4352        return readpages_fill_pages(server, rdata, iter, iter->count);
4353}
4354
4355static int
4356readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4357                    unsigned int rsize, struct list_head *tmplist,
4358                    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4359{
4360        struct page *page, *tpage;
4361        unsigned int expected_index;
4362        int rc;
4363        gfp_t gfp = readahead_gfp_mask(mapping);
4364
4365        INIT_LIST_HEAD(tmplist);
4366
4367        page = lru_to_page(page_list);
4368
4369        /*
4370         * Lock the page and put it in the cache. Since no one else
4371         * should have access to this page, we're safe to simply set
4372         * PG_locked without checking it first.
4373         */
4374        __SetPageLocked(page);
4375        rc = add_to_page_cache_locked(page, mapping,
4376                                      page->index, gfp);
4377
4378        /* give up if we can't stick it in the cache */
4379        if (rc) {
4380                __ClearPageLocked(page);
4381                return rc;
4382        }
4383
4384        /* move first page to the tmplist */
4385        *offset = (loff_t)page->index << PAGE_SHIFT;
4386        *bytes = PAGE_SIZE;
4387        *nr_pages = 1;
4388        list_move_tail(&page->lru, tmplist);
4389
4390        /* now try and add more pages onto the request */
4391        expected_index = page->index + 1;
4392        list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4393                /* discontinuity ? */
4394                if (page->index != expected_index)
4395                        break;
4396
4397                /* would this page push the read over the rsize? */
4398                if (*bytes + PAGE_SIZE > rsize)
4399                        break;
4400
4401                __SetPageLocked(page);
4402                rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4403                if (rc) {
4404                        __ClearPageLocked(page);
4405                        break;
4406                }
4407                list_move_tail(&page->lru, tmplist);
4408                (*bytes) += PAGE_SIZE;
4409                expected_index++;
4410                (*nr_pages)++;
4411        }
4412        return rc;
4413}
4414
4415static int cifs_readpages(struct file *file, struct address_space *mapping,
4416        struct list_head *page_list, unsigned num_pages)
4417{
4418        int rc;
4419        int err = 0;
4420        struct list_head tmplist;
4421        struct cifsFileInfo *open_file = file->private_data;
4422        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4423        struct TCP_Server_Info *server;
4424        pid_t pid;
4425        unsigned int xid;
4426
4427        xid = get_xid();
4428        /*
4429         * Reads as many pages as possible from fscache. Returns -ENOBUFS
4430         * immediately if the cookie is negative
4431         *
4432         * After this point, every page in the list might have PG_fscache set,
4433         * so we will need to clean that up off of every page we don't use.
4434         */
4435        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4436                                         &num_pages);
4437        if (rc == 0) {
4438                free_xid(xid);
4439                return rc;
4440        }
4441
4442        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4443                pid = open_file->pid;
4444        else
4445                pid = current->tgid;
4446
4447        rc = 0;
4448        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4449
4450        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4451                 __func__, file, mapping, num_pages);
4452
4453        /*
4454         * Start with the page at end of list and move it to private
4455         * list. Do the same with any following pages until we hit
4456         * the rsize limit, hit an index discontinuity, or run out of
4457         * pages. Issue the async read and then start the loop again
4458         * until the list is empty.
4459         *
4460         * Note that list order is important. The page_list is in
4461         * the order of declining indexes. When we put the pages in
4462         * the rdata->pages, then we want them in increasing order.
4463         */
4464        while (!list_empty(page_list) && !err) {
4465                unsigned int i, nr_pages, bytes, rsize;
4466                loff_t offset;
4467                struct page *page, *tpage;
4468                struct cifs_readdata *rdata;
4469                struct cifs_credits credits_on_stack;
4470                struct cifs_credits *credits = &credits_on_stack;
4471
4472                if (open_file->invalidHandle) {
4473                        rc = cifs_reopen_file(open_file, true);
4474                        if (rc == -EAGAIN)
4475                                continue;
4476                        else if (rc)
4477                                break;
4478                }
4479
4480                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4481                                                   &rsize, credits);
4482                if (rc)
4483                        break;
4484
4485                /*
4486                 * Give up immediately if rsize is too small to read an entire
4487                 * page. The VFS will fall back to readpage. We should never
4488                 * reach this point however since we set ra_pages to 0 when the
4489                 * rsize is smaller than a cache page.
4490                 */
4491                if (unlikely(rsize < PAGE_SIZE)) {
4492                        add_credits_and_wake_if(server, credits, 0);
4493                        free_xid(xid);
4494                        return 0;
4495                }
4496
4497                nr_pages = 0;
4498                err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4499                                         &nr_pages, &offset, &bytes);
4500                if (!nr_pages) {
4501                        add_credits_and_wake_if(server, credits, 0);
4502                        break;
4503                }
4504
4505                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4506                if (!rdata) {
4507                        /* best to give up if we're out of mem */
4508                        list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4509                                list_del(&page->lru);
4510                                lru_cache_add(page);
4511                                unlock_page(page);
4512                                put_page(page);
4513                        }
4514                        rc = -ENOMEM;
4515                        add_credits_and_wake_if(server, credits, 0);
4516                        break;
4517                }
4518
4519                rdata->cfile = cifsFileInfo_get(open_file);
4520                rdata->server = server;
4521                rdata->mapping = mapping;
4522                rdata->offset = offset;
4523                rdata->bytes = bytes;
4524                rdata->pid = pid;
4525                rdata->pagesz = PAGE_SIZE;
4526                rdata->tailsz = PAGE_SIZE;
4527                rdata->read_into_pages = cifs_readpages_read_into_pages;
4528                rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4529                rdata->credits<