linux/fs/nfs/write.c
<<
>>
Prefs
   1/*
   2 * linux/fs/nfs/write.c
   3 *
   4 * Write file data over NFS.
   5 *
   6 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
   7 */
   8
   9#include <linux/types.h>
  10#include <linux/slab.h>
  11#include <linux/mm.h>
  12#include <linux/pagemap.h>
  13#include <linux/file.h>
  14#include <linux/writeback.h>
  15#include <linux/swap.h>
  16#include <linux/migrate.h>
  17
  18#include <linux/sunrpc/clnt.h>
  19#include <linux/nfs_fs.h>
  20#include <linux/nfs_mount.h>
  21#include <linux/nfs_page.h>
  22#include <linux/backing-dev.h>
  23#include <linux/export.h>
  24
  25#include <asm/uaccess.h>
  26
  27#include "delegation.h"
  28#include "internal.h"
  29#include "iostat.h"
  30#include "nfs4_fs.h"
  31#include "fscache.h"
  32#include "pnfs.h"
  33
  34#define NFSDBG_FACILITY         NFSDBG_PAGECACHE
  35
  36#define MIN_POOL_WRITE          (32)
  37#define MIN_POOL_COMMIT         (4)
  38
  39/*
  40 * Local function declarations
  41 */
  42static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
  43                                  struct inode *inode, int ioflags);
  44static void nfs_redirty_request(struct nfs_page *req);
  45static const struct rpc_call_ops nfs_write_partial_ops;
  46static const struct rpc_call_ops nfs_write_full_ops;
  47static const struct rpc_call_ops nfs_commit_ops;
  48
  49static struct kmem_cache *nfs_wdata_cachep;
  50static mempool_t *nfs_wdata_mempool;
  51static mempool_t *nfs_commit_mempool;
  52
  53struct nfs_write_data *nfs_commitdata_alloc(void)
  54{
  55        struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
  56
  57        if (p) {
  58                memset(p, 0, sizeof(*p));
  59                INIT_LIST_HEAD(&p->pages);
  60        }
  61        return p;
  62}
  63EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
  64
  65void nfs_commit_free(struct nfs_write_data *p)
  66{
  67        if (p && (p->pagevec != &p->page_array[0]))
  68                kfree(p->pagevec);
  69        mempool_free(p, nfs_commit_mempool);
  70}
  71EXPORT_SYMBOL_GPL(nfs_commit_free);
  72
  73struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
  74{
  75        struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
  76
  77        if (p) {
  78                memset(p, 0, sizeof(*p));
  79                INIT_LIST_HEAD(&p->pages);
  80                p->npages = pagecount;
  81                if (pagecount <= ARRAY_SIZE(p->page_array))
  82                        p->pagevec = p->page_array;
  83                else {
  84                        p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
  85                        if (!p->pagevec) {
  86                                mempool_free(p, nfs_wdata_mempool);
  87                                p = NULL;
  88                        }
  89                }
  90        }
  91        return p;
  92}
  93
  94void nfs_writedata_free(struct nfs_write_data *p)
  95{
  96        if (p && (p->pagevec != &p->page_array[0]))
  97                kfree(p->pagevec);
  98        mempool_free(p, nfs_wdata_mempool);
  99}
 100
 101void nfs_writedata_release(struct nfs_write_data *wdata)
 102{
 103        put_lseg(wdata->lseg);
 104        put_nfs_open_context(wdata->args.context);
 105        nfs_writedata_free(wdata);
 106}
 107
 108static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
 109{
 110        ctx->error = error;
 111        smp_wmb();
 112        set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
 113}
 114
 115static struct nfs_page *nfs_page_find_request_locked(struct page *page)
 116{
 117        struct nfs_page *req = NULL;
 118
 119        if (PagePrivate(page)) {
 120                req = (struct nfs_page *)page_private(page);
 121                if (req != NULL)
 122                        kref_get(&req->wb_kref);
 123        }
 124        return req;
 125}
 126
 127static struct nfs_page *nfs_page_find_request(struct page *page)
 128{
 129        struct inode *inode = page->mapping->host;
 130        struct nfs_page *req = NULL;
 131
 132        spin_lock(&inode->i_lock);
 133        req = nfs_page_find_request_locked(page);
 134        spin_unlock(&inode->i_lock);
 135        return req;
 136}
 137
 138/* Adjust the file length if we're writing beyond the end */
 139static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
 140{
 141        struct inode *inode = page->mapping->host;
 142        loff_t end, i_size;
 143        pgoff_t end_index;
 144
 145        spin_lock(&inode->i_lock);
 146        i_size = i_size_read(inode);
 147        end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
 148        if (i_size > 0 && page->index < end_index)
 149                goto out;
 150        end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
 151        if (i_size >= end)
 152                goto out;
 153        i_size_write(inode, end);
 154        nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
 155out:
 156        spin_unlock(&inode->i_lock);
 157}
 158
 159/* A writeback failed: mark the page as bad, and invalidate the page cache */
 160static void nfs_set_pageerror(struct page *page)
 161{
 162        SetPageError(page);
 163        nfs_zap_mapping(page->mapping->host, page->mapping);
 164}
 165
 166/* We can set the PG_uptodate flag if we see that a write request
 167 * covers the full page.
 168 */
 169static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
 170{
 171        if (PageUptodate(page))
 172                return;
 173        if (base != 0)
 174                return;
 175        if (count != nfs_page_length(page))
 176                return;
 177        SetPageUptodate(page);
 178}
 179
 180static int wb_priority(struct writeback_control *wbc)
 181{
 182        if (wbc->for_reclaim)
 183                return FLUSH_HIGHPRI | FLUSH_STABLE;
 184        if (wbc->for_kupdate || wbc->for_background)
 185                return FLUSH_LOWPRI | FLUSH_COND_STABLE;
 186        return FLUSH_COND_STABLE;
 187}
 188
 189/*
 190 * NFS congestion control
 191 */
 192
 193int nfs_congestion_kb;
 194
 195#define NFS_CONGESTION_ON_THRESH        (nfs_congestion_kb >> (PAGE_SHIFT-10))
 196#define NFS_CONGESTION_OFF_THRESH       \
 197        (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
 198
 199static int nfs_set_page_writeback(struct page *page)
 200{
 201        int ret = test_set_page_writeback(page);
 202
 203        if (!ret) {
 204                struct inode *inode = page->mapping->host;
 205                struct nfs_server *nfss = NFS_SERVER(inode);
 206
 207                page_cache_get(page);
 208                if (atomic_long_inc_return(&nfss->writeback) >
 209                                NFS_CONGESTION_ON_THRESH) {
 210                        set_bdi_congested(&nfss->backing_dev_info,
 211                                                BLK_RW_ASYNC);
 212                }
 213        }
 214        return ret;
 215}
 216
 217static void nfs_end_page_writeback(struct page *page)
 218{
 219        struct inode *inode = page->mapping->host;
 220        struct nfs_server *nfss = NFS_SERVER(inode);
 221
 222        end_page_writeback(page);
 223        page_cache_release(page);
 224        if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
 225                clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 226}
 227
 228static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
 229{
 230        struct inode *inode = page->mapping->host;
 231        struct nfs_page *req;
 232        int ret;
 233
 234        spin_lock(&inode->i_lock);
 235        for (;;) {
 236                req = nfs_page_find_request_locked(page);
 237                if (req == NULL)
 238                        break;
 239                if (nfs_set_page_tag_locked(req))
 240                        break;
 241                /* Note: If we hold the page lock, as is the case in nfs_writepage,
 242                 *       then the call to nfs_set_page_tag_locked() will always
 243                 *       succeed provided that someone hasn't already marked the
 244                 *       request as dirty (in which case we don't care).
 245                 */
 246                spin_unlock(&inode->i_lock);
 247                if (!nonblock)
 248                        ret = nfs_wait_on_request(req);
 249                else
 250                        ret = -EAGAIN;
 251                nfs_release_request(req);
 252                if (ret != 0)
 253                        return ERR_PTR(ret);
 254                spin_lock(&inode->i_lock);
 255        }
 256        spin_unlock(&inode->i_lock);
 257        return req;
 258}
 259
 260/*
 261 * Find an associated nfs write request, and prepare to flush it out
 262 * May return an error if the user signalled nfs_wait_on_request().
 263 */
 264static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 265                                struct page *page, bool nonblock)
 266{
 267        struct nfs_page *req;
 268        int ret = 0;
 269
 270        req = nfs_find_and_lock_request(page, nonblock);
 271        if (!req)
 272                goto out;
 273        ret = PTR_ERR(req);
 274        if (IS_ERR(req))
 275                goto out;
 276
 277        ret = nfs_set_page_writeback(page);
 278        BUG_ON(ret != 0);
 279        BUG_ON(test_bit(PG_CLEAN, &req->wb_flags));
 280
 281        if (!nfs_pageio_add_request(pgio, req)) {
 282                nfs_redirty_request(req);
 283                ret = pgio->pg_error;
 284        }
 285out:
 286        return ret;
 287}
 288
 289static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
 290{
 291        struct inode *inode = page->mapping->host;
 292        int ret;
 293
 294        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
 295        nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
 296
 297        nfs_pageio_cond_complete(pgio, page->index);
 298        ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
 299        if (ret == -EAGAIN) {
 300                redirty_page_for_writepage(wbc, page);
 301                ret = 0;
 302        }
 303        return ret;
 304}
 305
 306/*
 307 * Write an mmapped page to the server.
 308 */
 309static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
 310{
 311        struct nfs_pageio_descriptor pgio;
 312        int err;
 313
 314        nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
 315        err = nfs_do_writepage(page, wbc, &pgio);
 316        nfs_pageio_complete(&pgio);
 317        if (err < 0)
 318                return err;
 319        if (pgio.pg_error < 0)
 320                return pgio.pg_error;
 321        return 0;
 322}
 323
 324int nfs_writepage(struct page *page, struct writeback_control *wbc)
 325{
 326        int ret;
 327
 328        ret = nfs_writepage_locked(page, wbc);
 329        unlock_page(page);
 330        return ret;
 331}
 332
 333static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
 334{
 335        int ret;
 336
 337        ret = nfs_do_writepage(page, wbc, data);
 338        unlock_page(page);
 339        return ret;
 340}
 341
 342int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 343{
 344        struct inode *inode = mapping->host;
 345        unsigned long *bitlock = &NFS_I(inode)->flags;
 346        struct nfs_pageio_descriptor pgio;
 347        int err;
 348
 349        /* Stop dirtying of new pages while we sync */
 350        err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
 351                        nfs_wait_bit_killable, TASK_KILLABLE);
 352        if (err)
 353                goto out_err;
 354
 355        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 356
 357        nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
 358        err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 359        nfs_pageio_complete(&pgio);
 360
 361        clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
 362        smp_mb__after_clear_bit();
 363        wake_up_bit(bitlock, NFS_INO_FLUSHING);
 364
 365        if (err < 0)
 366                goto out_err;
 367        err = pgio.pg_error;
 368        if (err < 0)
 369                goto out_err;
 370        return 0;
 371out_err:
 372        return err;
 373}
 374
 375/*
 376 * Insert a write request into an inode
 377 */
 378static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 379{
 380        struct nfs_inode *nfsi = NFS_I(inode);
 381        int error;
 382
 383        error = radix_tree_preload(GFP_NOFS);
 384        if (error != 0)
 385                goto out;
 386
 387        /* Lock the request! */
 388        nfs_lock_request_dontget(req);
 389
 390        spin_lock(&inode->i_lock);
 391        error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
 392        BUG_ON(error);
 393        if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
 394                inode->i_version++;
 395        set_bit(PG_MAPPED, &req->wb_flags);
 396        SetPagePrivate(req->wb_page);
 397        set_page_private(req->wb_page, (unsigned long)req);
 398        nfsi->npages++;
 399        kref_get(&req->wb_kref);
 400        radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
 401                                NFS_PAGE_TAG_LOCKED);
 402        spin_unlock(&inode->i_lock);
 403        radix_tree_preload_end();
 404out:
 405        return error;
 406}
 407
 408/*
 409 * Remove a write request from an inode
 410 */
 411static void nfs_inode_remove_request(struct nfs_page *req)
 412{
 413        struct inode *inode = req->wb_context->dentry->d_inode;
 414        struct nfs_inode *nfsi = NFS_I(inode);
 415
 416        BUG_ON (!NFS_WBACK_BUSY(req));
 417
 418        spin_lock(&inode->i_lock);
 419        set_page_private(req->wb_page, 0);
 420        ClearPagePrivate(req->wb_page);
 421        clear_bit(PG_MAPPED, &req->wb_flags);
 422        radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
 423        nfsi->npages--;
 424        spin_unlock(&inode->i_lock);
 425        nfs_release_request(req);
 426}
 427
 428static void
 429nfs_mark_request_dirty(struct nfs_page *req)
 430{
 431        __set_page_dirty_nobuffers(req->wb_page);
 432}
 433
 434#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 435/*
 436 * Add a request to the inode's commit list.
 437 */
 438static void
 439nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
 440{
 441        struct inode *inode = req->wb_context->dentry->d_inode;
 442        struct nfs_inode *nfsi = NFS_I(inode);
 443
 444        spin_lock(&inode->i_lock);
 445        set_bit(PG_CLEAN, &(req)->wb_flags);
 446        radix_tree_tag_set(&nfsi->nfs_page_tree,
 447                        req->wb_index,
 448                        NFS_PAGE_TAG_COMMIT);
 449        nfsi->ncommit++;
 450        spin_unlock(&inode->i_lock);
 451        pnfs_mark_request_commit(req, lseg);
 452        inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 453        inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
 454        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 455}
 456
 457static int
 458nfs_clear_request_commit(struct nfs_page *req)
 459{
 460        struct page *page = req->wb_page;
 461
 462        if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) {
 463                dec_zone_page_state(page, NR_UNSTABLE_NFS);
 464                dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
 465                return 1;
 466        }
 467        return 0;
 468}
 469
 470static inline
 471int nfs_write_need_commit(struct nfs_write_data *data)
 472{
 473        if (data->verf.committed == NFS_DATA_SYNC)
 474                return data->lseg == NULL;
 475        else
 476                return data->verf.committed != NFS_FILE_SYNC;
 477}
 478
 479static inline
 480int nfs_reschedule_unstable_write(struct nfs_page *req,
 481                                  struct nfs_write_data *data)
 482{
 483        if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
 484                nfs_mark_request_commit(req, data->lseg);
 485                return 1;
 486        }
 487        if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
 488                nfs_mark_request_dirty(req);
 489                return 1;
 490        }
 491        return 0;
 492}
 493#else
 494static inline void
 495nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
 496{
 497}
 498
 499static inline int
 500nfs_clear_request_commit(struct nfs_page *req)
 501{
 502        return 0;
 503}
 504
 505static inline
 506int nfs_write_need_commit(struct nfs_write_data *data)
 507{
 508        return 0;
 509}
 510
 511static inline
 512int nfs_reschedule_unstable_write(struct nfs_page *req,
 513                                  struct nfs_write_data *data)
 514{
 515        return 0;
 516}
 517#endif
 518
 519#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 520static int
 521nfs_need_commit(struct nfs_inode *nfsi)
 522{
 523        return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT);
 524}
 525
 526/*
 527 * nfs_scan_commit - Scan an inode for commit requests
 528 * @inode: NFS inode to scan
 529 * @dst: destination list
 530 * @idx_start: lower bound of page->index to scan.
 531 * @npages: idx_start + npages sets the upper bound to scan.
 532 *
 533 * Moves requests from the inode's 'commit' request list.
 534 * The requests are *not* checked to ensure that they form a contiguous set.
 535 */
 536static int
 537nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 538{
 539        struct nfs_inode *nfsi = NFS_I(inode);
 540        int ret;
 541
 542        if (!nfs_need_commit(nfsi))
 543                return 0;
 544
 545        spin_lock(&inode->i_lock);
 546        ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
 547        if (ret > 0)
 548                nfsi->ncommit -= ret;
 549        spin_unlock(&inode->i_lock);
 550
 551        if (nfs_need_commit(NFS_I(inode)))
 552                __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 553
 554        return ret;
 555}
 556#else
 557static inline int nfs_need_commit(struct nfs_inode *nfsi)
 558{
 559        return 0;
 560}
 561
 562static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 563{
 564        return 0;
 565}
 566#endif
 567
 568/*
 569 * Search for an existing write request, and attempt to update
 570 * it to reflect a new dirty region on a given page.
 571 *
 572 * If the attempt fails, then the existing request is flushed out
 573 * to disk.
 574 */
 575static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
 576                struct page *page,
 577                unsigned int offset,
 578                unsigned int bytes)
 579{
 580        struct nfs_page *req;
 581        unsigned int rqend;
 582        unsigned int end;
 583        int error;
 584
 585        if (!PagePrivate(page))
 586                return NULL;
 587
 588        end = offset + bytes;
 589        spin_lock(&inode->i_lock);
 590
 591        for (;;) {
 592                req = nfs_page_find_request_locked(page);
 593                if (req == NULL)
 594                        goto out_unlock;
 595
 596                rqend = req->wb_offset + req->wb_bytes;
 597                /*
 598                 * Tell the caller to flush out the request if
 599                 * the offsets are non-contiguous.
 600                 * Note: nfs_flush_incompatible() will already
 601                 * have flushed out requests having wrong owners.
 602                 */
 603                if (offset > rqend
 604                    || end < req->wb_offset)
 605                        goto out_flushme;
 606
 607                if (nfs_set_page_tag_locked(req))
 608                        break;
 609
 610                /* The request is locked, so wait and then retry */
 611                spin_unlock(&inode->i_lock);
 612                error = nfs_wait_on_request(req);
 613                nfs_release_request(req);
 614                if (error != 0)
 615                        goto out_err;
 616                spin_lock(&inode->i_lock);
 617        }
 618
 619        if (nfs_clear_request_commit(req) &&
 620            radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
 621                                 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) {
 622                NFS_I(inode)->ncommit--;
 623                pnfs_clear_request_commit(req);
 624        }
 625
 626        /* Okay, the request matches. Update the region */
 627        if (offset < req->wb_offset) {
 628                req->wb_offset = offset;
 629                req->wb_pgbase = offset;
 630        }
 631        if (end > rqend)
 632                req->wb_bytes = end - req->wb_offset;
 633        else
 634                req->wb_bytes = rqend - req->wb_offset;
 635out_unlock:
 636        spin_unlock(&inode->i_lock);
 637        return req;
 638out_flushme:
 639        spin_unlock(&inode->i_lock);
 640        nfs_release_request(req);
 641        error = nfs_wb_page(inode, page);
 642out_err:
 643        return ERR_PTR(error);
 644}
 645
 646/*
 647 * Try to update an existing write request, or create one if there is none.
 648 *
 649 * Note: Should always be called with the Page Lock held to prevent races
 650 * if we have to add a new request. Also assumes that the caller has
 651 * already called nfs_flush_incompatible() if necessary.
 652 */
 653static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
 654                struct page *page, unsigned int offset, unsigned int bytes)
 655{
 656        struct inode *inode = page->mapping->host;
 657        struct nfs_page *req;
 658        int error;
 659
 660        req = nfs_try_to_update_request(inode, page, offset, bytes);
 661        if (req != NULL)
 662                goto out;
 663        req = nfs_create_request(ctx, inode, page, offset, bytes);
 664        if (IS_ERR(req))
 665                goto out;
 666        error = nfs_inode_add_request(inode, req);
 667        if (error != 0) {
 668                nfs_release_request(req);
 669                req = ERR_PTR(error);
 670        }
 671out:
 672        return req;
 673}
 674
 675static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 676                unsigned int offset, unsigned int count)
 677{
 678        struct nfs_page *req;
 679
 680        req = nfs_setup_write_request(ctx, page, offset, count);
 681        if (IS_ERR(req))
 682                return PTR_ERR(req);
 683        /* Update file length */
 684        nfs_grow_file(page, offset, count);
 685        nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
 686        nfs_mark_request_dirty(req);
 687        nfs_clear_page_tag_locked(req);
 688        return 0;
 689}
 690
 691int nfs_flush_incompatible(struct file *file, struct page *page)
 692{
 693        struct nfs_open_context *ctx = nfs_file_open_context(file);
 694        struct nfs_page *req;
 695        int do_flush, status;
 696        /*
 697         * Look for a request corresponding to this page. If there
 698         * is one, and it belongs to another file, we flush it out
 699         * before we try to copy anything into the page. Do this
 700         * due to the lack of an ACCESS-type call in NFSv2.
 701         * Also do the same if we find a request from an existing
 702         * dropped page.
 703         */
 704        do {
 705                req = nfs_page_find_request(page);
 706                if (req == NULL)
 707                        return 0;
 708                do_flush = req->wb_page != page || req->wb_context != ctx ||
 709                        req->wb_lock_context->lockowner != current->files ||
 710                        req->wb_lock_context->pid != current->tgid;
 711                nfs_release_request(req);
 712                if (!do_flush)
 713                        return 0;
 714                status = nfs_wb_page(page->mapping->host, page);
 715        } while (status == 0);
 716        return status;
 717}
 718
 719/*
 720 * If the page cache is marked as unsafe or invalid, then we can't rely on
 721 * the PageUptodate() flag. In this case, we will need to turn off
 722 * write optimisations that depend on the page contents being correct.
 723 */
 724static int nfs_write_pageuptodate(struct page *page, struct inode *inode)
 725{
 726        return PageUptodate(page) &&
 727                !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA));
 728}
 729
 730/*
 731 * Update and possibly write a cached page of an NFS file.
 732 *
 733 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
 734 * things with a page scheduled for an RPC call (e.g. invalidate it).
 735 */
 736int nfs_updatepage(struct file *file, struct page *page,
 737                unsigned int offset, unsigned int count)
 738{
 739        struct nfs_open_context *ctx = nfs_file_open_context(file);
 740        struct inode    *inode = page->mapping->host;
 741        int             status = 0;
 742
 743        nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
 744
 745        dprintk("NFS:       nfs_updatepage(%s/%s %d@%lld)\n",
 746                file->f_path.dentry->d_parent->d_name.name,
 747                file->f_path.dentry->d_name.name, count,
 748                (long long)(page_offset(page) + offset));
 749
 750        /* If we're not using byte range locks, and we know the page
 751         * is up to date, it may be more efficient to extend the write
 752         * to cover the entire page in order to avoid fragmentation
 753         * inefficiencies.
 754         */
 755        if (nfs_write_pageuptodate(page, inode) &&
 756                        inode->i_flock == NULL &&
 757                        !(file->f_flags & O_DSYNC)) {
 758                count = max(count + offset, nfs_page_length(page));
 759                offset = 0;
 760        }
 761
 762        status = nfs_writepage_setup(ctx, page, offset, count);
 763        if (status < 0)
 764                nfs_set_pageerror(page);
 765        else
 766                __set_page_dirty_nobuffers(page);
 767
 768        dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",
 769                        status, (long long)i_size_read(inode));
 770        return status;
 771}
 772
 773static void nfs_writepage_release(struct nfs_page *req,
 774                                  struct nfs_write_data *data)
 775{
 776        struct page *page = req->wb_page;
 777
 778        if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
 779                nfs_inode_remove_request(req);
 780        nfs_clear_page_tag_locked(req);
 781        nfs_end_page_writeback(page);
 782}
 783
 784static int flush_task_priority(int how)
 785{
 786        switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
 787                case FLUSH_HIGHPRI:
 788                        return RPC_PRIORITY_HIGH;
 789                case FLUSH_LOWPRI:
 790                        return RPC_PRIORITY_LOW;
 791        }
 792        return RPC_PRIORITY_NORMAL;
 793}
 794
 795int nfs_initiate_write(struct nfs_write_data *data,
 796                       struct rpc_clnt *clnt,
 797                       const struct rpc_call_ops *call_ops,
 798                       int how)
 799{
 800        struct inode *inode = data->inode;
 801        int priority = flush_task_priority(how);
 802        struct rpc_task *task;
 803        struct rpc_message msg = {
 804                .rpc_argp = &data->args,
 805                .rpc_resp = &data->res,
 806                .rpc_cred = data->cred,
 807        };
 808        struct rpc_task_setup task_setup_data = {
 809                .rpc_client = clnt,
 810                .task = &data->task,
 811                .rpc_message = &msg,
 812                .callback_ops = call_ops,
 813                .callback_data = data,
 814                .workqueue = nfsiod_workqueue,
 815                .flags = RPC_TASK_ASYNC,
 816                .priority = priority,
 817        };
 818        int ret = 0;
 819
 820        /* Set up the initial task struct.  */
 821        NFS_PROTO(inode)->write_setup(data, &msg);
 822
 823        dprintk("NFS: %5u initiated write call "
 824                "(req %s/%lld, %u bytes @ offset %llu)\n",
 825                data->task.tk_pid,
 826                inode->i_sb->s_id,
 827                (long long)NFS_FILEID(inode),
 828                data->args.count,
 829                (unsigned long long)data->args.offset);
 830
 831        task = rpc_run_task(&task_setup_data);
 832        if (IS_ERR(task)) {
 833                ret = PTR_ERR(task);
 834                goto out;
 835        }
 836        if (how & FLUSH_SYNC) {
 837                ret = rpc_wait_for_completion_task(task);
 838                if (ret == 0)
 839                        ret = task->tk_status;
 840        }
 841        rpc_put_task(task);
 842out:
 843        return ret;
 844}
 845EXPORT_SYMBOL_GPL(nfs_initiate_write);
 846
 847/*
 848 * Set up the argument/result storage required for the RPC call.
 849 */
 850static void nfs_write_rpcsetup(struct nfs_page *req,
 851                struct nfs_write_data *data,
 852                unsigned int count, unsigned int offset,
 853                int how)
 854{
 855        struct inode *inode = req->wb_context->dentry->d_inode;
 856
 857        /* Set up the RPC argument and reply structs
 858         * NB: take care not to mess about with data->commit et al. */
 859
 860        data->req = req;
 861        data->inode = inode = req->wb_context->dentry->d_inode;
 862        data->cred = req->wb_context->cred;
 863
 864        data->args.fh     = NFS_FH(inode);
 865        data->args.offset = req_offset(req) + offset;
 866        /* pnfs_set_layoutcommit needs this */
 867        data->mds_offset = data->args.offset;
 868        data->args.pgbase = req->wb_pgbase + offset;
 869        data->args.pages  = data->pagevec;
 870        data->args.count  = count;
 871        data->args.context = get_nfs_open_context(req->wb_context);
 872        data->args.lock_context = req->wb_lock_context;
 873        data->args.stable  = NFS_UNSTABLE;
 874        switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
 875        case 0:
 876                break;
 877        case FLUSH_COND_STABLE:
 878                if (nfs_need_commit(NFS_I(inode)))
 879                        break;
 880        default:
 881                data->args.stable = NFS_FILE_SYNC;
 882        }
 883
 884        data->res.fattr   = &data->fattr;
 885        data->res.count   = count;
 886        data->res.verf    = &data->verf;
 887        nfs_fattr_init(&data->fattr);
 888}
 889
 890static int nfs_do_write(struct nfs_write_data *data,
 891                const struct rpc_call_ops *call_ops,
 892                int how)
 893{
 894        struct inode *inode = data->args.context->dentry->d_inode;
 895
 896        return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
 897}
 898
 899static int nfs_do_multiple_writes(struct list_head *head,
 900                const struct rpc_call_ops *call_ops,
 901                int how)
 902{
 903        struct nfs_write_data *data;
 904        int ret = 0;
 905
 906        while (!list_empty(head)) {
 907                int ret2;
 908
 909                data = list_entry(head->next, struct nfs_write_data, list);
 910                list_del_init(&data->list);
 911                
 912                ret2 = nfs_do_write(data, call_ops, how);
 913                 if (ret == 0)
 914                         ret = ret2;
 915        }
 916        return ret;
 917}
 918
 919/* If a nfs_flush_* function fails, it should remove reqs from @head and
 920 * call this on each, which will prepare them to be retried on next
 921 * writeback using standard nfs.
 922 */
 923static void nfs_redirty_request(struct nfs_page *req)
 924{
 925        struct page *page = req->wb_page;
 926
 927        nfs_mark_request_dirty(req);
 928        nfs_clear_page_tag_locked(req);
 929        nfs_end_page_writeback(page);
 930}
 931
 932/*
 933 * Generate multiple small requests to write out a single
 934 * contiguous dirty area on one page.
 935 */
 936static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
 937{
 938        struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
 939        struct page *page = req->wb_page;
 940        struct nfs_write_data *data;
 941        size_t wsize = desc->pg_bsize, nbytes;
 942        unsigned int offset;
 943        int requests = 0;
 944        int ret = 0;
 945
 946        nfs_list_remove_request(req);
 947
 948        if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 949            (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
 950             desc->pg_count > wsize))
 951                desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 952
 953
 954        offset = 0;
 955        nbytes = desc->pg_count;
 956        do {
 957                size_t len = min(nbytes, wsize);
 958
 959                data = nfs_writedata_alloc(1);
 960                if (!data)
 961                        goto out_bad;
 962                data->pagevec[0] = page;
 963                nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
 964                list_add(&data->list, res);
 965                requests++;
 966                nbytes -= len;
 967                offset += len;
 968        } while (nbytes != 0);
 969        atomic_set(&req->wb_complete, requests);
 970        desc->pg_rpc_callops = &nfs_write_partial_ops;
 971        return ret;
 972
 973out_bad:
 974        while (!list_empty(res)) {
 975                data = list_entry(res->next, struct nfs_write_data, list);
 976                list_del(&data->list);
 977                nfs_writedata_free(data);
 978        }
 979        nfs_redirty_request(req);
 980        return -ENOMEM;
 981}
 982
 983/*
 984 * Create an RPC task for the given write request and kick it.
 985 * The page must have been locked by the caller.
 986 *
 987 * It may happen that the page we're passed is not marked dirty.
 988 * This is the case if nfs_updatepage detects a conflicting request
 989 * that has been written but not committed.
 990 */
 991static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
 992{
 993        struct nfs_page         *req;
 994        struct page             **pages;
 995        struct nfs_write_data   *data;
 996        struct list_head *head = &desc->pg_list;
 997        int ret = 0;
 998
 999        data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
1000                                                      desc->pg_count));
1001        if (!data) {
1002                while (!list_empty(head)) {
1003                        req = nfs_list_entry(head->next);
1004                        nfs_list_remove_request(req);
1005                        nfs_redirty_request(req);
1006                }
1007                ret = -ENOMEM;
1008                goto out;
1009        }
1010        pages = data->pagevec;
1011        while (!list_empty(head)) {
1012                req = nfs_list_entry(head->next);
1013                nfs_list_remove_request(req);
1014                nfs_list_add_request(req, &data->pages);
1015                *pages++ = req->wb_page;
1016        }
1017        req = nfs_list_entry(data->pages.next);
1018
1019        if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1020            (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
1021                desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1022
1023        /* Set up the argument struct */
1024        nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
1025        list_add(&data->list, res);
1026        desc->pg_rpc_callops = &nfs_write_full_ops;
1027out:
1028        return ret;
1029}
1030
1031int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
1032{
1033        if (desc->pg_bsize < PAGE_CACHE_SIZE)
1034                return nfs_flush_multi(desc, head);
1035        return nfs_flush_one(desc, head);
1036}
1037
1038static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1039{
1040        LIST_HEAD(head);
1041        int ret;
1042
1043        ret = nfs_generic_flush(desc, &head);
1044        if (ret == 0)
1045                ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
1046                                desc->pg_ioflags);
1047        return ret;
1048}
1049
1050static const struct nfs_pageio_ops nfs_pageio_write_ops = {
1051        .pg_test = nfs_generic_pg_test,
1052        .pg_doio = nfs_generic_pg_writepages,
1053};
1054
1055void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
1056                                  struct inode *inode, int ioflags)
1057{
1058        nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
1059                                NFS_SERVER(inode)->wsize, ioflags);
1060}
1061
1062void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1063{
1064        pgio->pg_ops = &nfs_pageio_write_ops;
1065        pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
1066}
1067EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1068
1069static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1070                                  struct inode *inode, int ioflags)
1071{
1072        if (!pnfs_pageio_init_write(pgio, inode, ioflags))
1073                nfs_pageio_init_write_mds(pgio, inode, ioflags);
1074}
1075
1076/*
1077 * Handle a write reply that flushed part of a page.
1078 */
1079static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
1080{
1081        struct nfs_write_data   *data = calldata;
1082
1083        dprintk("NFS: %5u write(%s/%lld %d@%lld)",
1084                task->tk_pid,
1085                data->req->wb_context->dentry->d_inode->i_sb->s_id,
1086                (long long)
1087                  NFS_FILEID(data->req->wb_context->dentry->d_inode),
1088                data->req->wb_bytes, (long long)req_offset(data->req));
1089
1090        nfs_writeback_done(task, data);
1091}
1092
1093static void nfs_writeback_release_partial(void *calldata)
1094{
1095        struct nfs_write_data   *data = calldata;
1096        struct nfs_page         *req = data->req;
1097        struct page             *page = req->wb_page;
1098        int status = data->task.tk_status;
1099
1100        if (status < 0) {
1101                nfs_set_pageerror(page);
1102                nfs_context_set_write_error(req->wb_context, status);
1103                dprintk(", error = %d\n", status);
1104                goto out;
1105        }
1106
1107        if (nfs_write_need_commit(data)) {
1108                struct inode *inode = page->mapping->host;
1109
1110                spin_lock(&inode->i_lock);
1111                if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
1112                        /* Do nothing we need to resend the writes */
1113                } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
1114                        memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1115                        dprintk(" defer commit\n");
1116                } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
1117                        set_bit(PG_NEED_RESCHED, &req->wb_flags);
1118                        clear_bit(PG_NEED_COMMIT, &req->wb_flags);
1119                        dprintk(" server reboot detected\n");
1120                }
1121                spin_unlock(&inode->i_lock);
1122        } else
1123                dprintk(" OK\n");
1124
1125out:
1126        if (atomic_dec_and_test(&req->wb_complete))
1127                nfs_writepage_release(req, data);
1128        nfs_writedata_release(calldata);
1129}
1130
1131#if defined(CONFIG_NFS_V4_1)
1132void nfs_write_prepare(struct rpc_task *task, void *calldata)
1133{
1134        struct nfs_write_data *data = calldata;
1135
1136        if (nfs4_setup_sequence(NFS_SERVER(data->inode),
1137                                &data->args.seq_args,
1138                                &data->res.seq_res, 1, task))
1139                return;
1140        rpc_call_start(task);
1141}
1142#endif /* CONFIG_NFS_V4_1 */
1143
1144static const struct rpc_call_ops nfs_write_partial_ops = {
1145#if defined(CONFIG_NFS_V4_1)
1146        .rpc_call_prepare = nfs_write_prepare,
1147#endif /* CONFIG_NFS_V4_1 */
1148        .rpc_call_done = nfs_writeback_done_partial,
1149        .rpc_release = nfs_writeback_release_partial,
1150};
1151
1152/*
1153 * Handle a write reply that flushes a whole page.
1154 *
1155 * FIXME: There is an inherent race with invalidate_inode_pages and
1156 *        writebacks since the page->count is kept > 1 for as long
1157 *        as the page has a write request pending.
1158 */
1159static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1160{
1161        struct nfs_write_data   *data = calldata;
1162
1163        nfs_writeback_done(task, data);
1164}
1165
1166static void nfs_writeback_release_full(void *calldata)
1167{
1168        struct nfs_write_data   *data = calldata;
1169        int status = data->task.tk_status;
1170
1171        /* Update attributes as result of writeback. */
1172        while (!list_empty(&data->pages)) {
1173                struct nfs_page *req = nfs_list_entry(data->pages.next);
1174                struct page *page = req->wb_page;
1175
1176                nfs_list_remove_request(req);
1177
1178                dprintk("NFS: %5u write (%s/%lld %d@%lld)",
1179                        data->task.tk_pid,
1180                        req->wb_context->dentry->d_inode->i_sb->s_id,
1181                        (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1182                        req->wb_bytes,
1183                        (long long)req_offset(req));
1184
1185                if (status < 0) {
1186                        nfs_set_pageerror(page);
1187                        nfs_context_set_write_error(req->wb_context, status);
1188                        dprintk(", error = %d\n", status);
1189                        goto remove_request;
1190                }
1191
1192                if (nfs_write_need_commit(data)) {
1193                        memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1194                        nfs_mark_request_commit(req, data->lseg);
1195                        dprintk(" marked for commit\n");
1196                        goto next;
1197                }
1198                dprintk(" OK\n");
1199remove_request:
1200                nfs_inode_remove_request(req);
1201        next:
1202                nfs_clear_page_tag_locked(req);
1203                nfs_end_page_writeback(page);
1204        }
1205        nfs_writedata_release(calldata);
1206}
1207
1208static const struct rpc_call_ops nfs_write_full_ops = {
1209#if defined(CONFIG_NFS_V4_1)
1210        .rpc_call_prepare = nfs_write_prepare,
1211#endif /* CONFIG_NFS_V4_1 */
1212        .rpc_call_done = nfs_writeback_done_full,
1213        .rpc_release = nfs_writeback_release_full,
1214};
1215
1216
1217/*
1218 * This function is called when the WRITE call is complete.
1219 */
1220void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1221{
1222        struct nfs_writeargs    *argp = &data->args;
1223        struct nfs_writeres     *resp = &data->res;
1224        int status;
1225
1226        dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
1227                task->tk_pid, task->tk_status);
1228
1229        /*
1230         * ->write_done will attempt to use post-op attributes to detect
1231         * conflicting writes by other clients.  A strict interpretation
1232         * of close-to-open would allow us to continue caching even if
1233         * another writer had changed the file, but some applications
1234         * depend on tighter cache coherency when writing.
1235         */
1236        status = NFS_PROTO(data->inode)->write_done(task, data);
1237        if (status != 0)
1238                return;
1239        nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1240
1241#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1242        if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
1243                /* We tried a write call, but the server did not
1244                 * commit data to stable storage even though we
1245                 * requested it.
1246                 * Note: There is a known bug in Tru64 < 5.0 in which
1247                 *       the server reports NFS_DATA_SYNC, but performs
1248                 *       NFS_FILE_SYNC. We therefore implement this checking
1249                 *       as a dprintk() in order to avoid filling syslog.
1250                 */
1251                static unsigned long    complain;
1252
1253                /* Note this will print the MDS for a DS write */
1254                if (time_before(complain, jiffies)) {
1255                        dprintk("NFS:       faulty NFS server %s:"
1256                                " (committed = %d) != (stable = %d)\n",
1257                                NFS_SERVER(data->inode)->nfs_client->cl_hostname,
1258                                resp->verf->committed, argp->stable);
1259                        complain = jiffies + 300 * HZ;
1260                }
1261        }
1262#endif
1263        /* Is this a short write? */
1264        if (task->tk_status >= 0 && resp->count < argp->count) {
1265                static unsigned long    complain;
1266
1267                nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
1268
1269                /* Has the server at least made some progress? */
1270                if (resp->count != 0) {
1271                        /* Was this an NFSv2 write or an NFSv3 stable write? */
1272                        if (resp->verf->committed != NFS_UNSTABLE) {
1273                                /* Resend from where the server left off */
1274                                data->mds_offset += resp->count;
1275                                argp->offset += resp->count;
1276                                argp->pgbase += resp->count;
1277                                argp->count -= resp->count;
1278                        } else {
1279                                /* Resend as a stable write in order to avoid
1280                                 * headaches in the case of a server crash.
1281                                 */
1282                                argp->stable = NFS_FILE_SYNC;
1283                        }
1284                        rpc_restart_call_prepare(task);
1285                        return;
1286                }
1287                if (time_before(complain, jiffies)) {
1288                        printk(KERN_WARNING
1289                               "NFS: Server wrote zero bytes, expected %u.\n",
1290                                        argp->count);
1291                        complain = jiffies + 300 * HZ;
1292                }
1293                /* Can't do anything about it except throw an error. */
1294                task->tk_status = -EIO;
1295        }
1296        return;
1297}
1298
1299
1300#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1301static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1302{
1303        int ret;
1304
1305        if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
1306                return 1;
1307        if (!may_wait)
1308                return 0;
1309        ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
1310                                NFS_INO_COMMIT,
1311                                nfs_wait_bit_killable,
1312                                TASK_KILLABLE);
1313        return (ret < 0) ? ret : 1;
1314}
1315
1316void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1317{
1318        clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1319        smp_mb__after_clear_bit();
1320        wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1321}
1322EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
1323
1324void nfs_commitdata_release(void *data)
1325{
1326        struct nfs_write_data *wdata = data;
1327
1328        put_lseg(wdata->lseg);
1329        put_nfs_open_context(wdata->args.context);
1330        nfs_commit_free(wdata);
1331}
1332EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1333
1334int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
1335                        const struct rpc_call_ops *call_ops,
1336                        int how)
1337{
1338        struct rpc_task *task;
1339        int priority = flush_task_priority(how);
1340        struct rpc_message msg = {
1341                .rpc_argp = &data->args,
1342                .rpc_resp = &data->res,
1343                .rpc_cred = data->cred,
1344        };
1345        struct rpc_task_setup task_setup_data = {
1346                .task = &data->task,
1347                .rpc_client = clnt,
1348                .rpc_message = &msg,
1349                .callback_ops = call_ops,
1350                .callback_data = data,
1351                .workqueue = nfsiod_workqueue,
1352                .flags = RPC_TASK_ASYNC,
1353                .priority = priority,
1354        };
1355        /* Set up the initial task struct.  */
1356        NFS_PROTO(data->inode)->commit_setup(data, &msg);
1357
1358        dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
1359
1360        task = rpc_run_task(&task_setup_data);
1361        if (IS_ERR(task))
1362                return PTR_ERR(task);
1363        if (how & FLUSH_SYNC)
1364                rpc_wait_for_completion_task(task);
1365        rpc_put_task(task);
1366        return 0;
1367}
1368EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1369
1370/*
1371 * Set up the argument/result storage required for the RPC call.
1372 */
1373void nfs_init_commit(struct nfs_write_data *data,
1374                            struct list_head *head,
1375                            struct pnfs_layout_segment *lseg)
1376{
1377        struct nfs_page *first = nfs_list_entry(head->next);
1378        struct inode *inode = first->wb_context->dentry->d_inode;
1379
1380        /* Set up the RPC argument and reply structs
1381         * NB: take care not to mess about with data->commit et al. */
1382
1383        list_splice_init(head, &data->pages);
1384
1385        data->inode       = inode;
1386        data->cred        = first->wb_context->cred;
1387        data->lseg        = lseg; /* reference transferred */
1388        data->mds_ops     = &nfs_commit_ops;
1389
1390        data->args.fh     = NFS_FH(data->inode);
1391        /* Note: we always request a commit of the entire inode */
1392        data->args.offset = 0;
1393        data->args.count  = 0;
1394        data->args.context = get_nfs_open_context(first->wb_context);
1395        data->res.count   = 0;
1396        data->res.fattr   = &data->fattr;
1397        data->res.verf    = &data->verf;
1398        nfs_fattr_init(&data->fattr);
1399}
1400EXPORT_SYMBOL_GPL(nfs_init_commit);
1401
1402void nfs_retry_commit(struct list_head *page_list,
1403                      struct pnfs_layout_segment *lseg)
1404{
1405        struct nfs_page *req;
1406
1407        while (!list_empty(page_list)) {
1408                req = nfs_list_entry(page_list->next);
1409                nfs_list_remove_request(req);
1410                nfs_mark_request_commit(req, lseg);
1411                dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1412                dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1413                             BDI_RECLAIMABLE);
1414                nfs_clear_page_tag_locked(req);
1415        }
1416}
1417EXPORT_SYMBOL_GPL(nfs_retry_commit);
1418
1419/*
1420 * Commit dirty pages
1421 */
1422static int
1423nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1424{
1425        struct nfs_write_data   *data;
1426
1427        data = nfs_commitdata_alloc();
1428
1429        if (!data)
1430                goto out_bad;
1431
1432        /* Set up the argument struct */
1433        nfs_init_commit(data, head, NULL);
1434        return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
1435 out_bad:
1436        nfs_retry_commit(head, NULL);
1437        nfs_commit_clear_lock(NFS_I(inode));
1438        return -ENOMEM;
1439}
1440
1441/*
1442 * COMMIT call returned
1443 */
1444static void nfs_commit_done(struct rpc_task *task, void *calldata)
1445{
1446        struct nfs_write_data   *data = calldata;
1447
1448        dprintk("NFS: %5u nfs_commit_done (status %d)\n",
1449                                task->tk_pid, task->tk_status);
1450
1451        /* Call the NFS version-specific code */
1452        NFS_PROTO(data->inode)->commit_done(task, data);
1453}
1454
1455void nfs_commit_release_pages(struct nfs_write_data *data)
1456{
1457        struct nfs_page *req;
1458        int status = data->task.tk_status;
1459
1460        while (!list_empty(&data->pages)) {
1461                req = nfs_list_entry(data->pages.next);
1462                nfs_list_remove_request(req);
1463                nfs_clear_request_commit(req);
1464
1465                dprintk("NFS:       commit (%s/%lld %d@%lld)",
1466                        req->wb_context->dentry->d_sb->s_id,
1467                        (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1468                        req->wb_bytes,
1469                        (long long)req_offset(req));
1470                if (status < 0) {
1471                        nfs_context_set_write_error(req->wb_context, status);
1472                        nfs_inode_remove_request(req);
1473                        dprintk(", error = %d\n", status);
1474                        goto next;
1475                }
1476
1477                /* Okay, COMMIT succeeded, apparently. Check the verifier
1478                 * returned by the server against all stored verfs. */
1479                if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
1480                        /* We have a match */
1481                        nfs_inode_remove_request(req);
1482                        dprintk(" OK\n");
1483                        goto next;
1484                }
1485                /* We have a mismatch. Write the page again */
1486                dprintk(" mismatch\n");
1487                nfs_mark_request_dirty(req);
1488        next:
1489                nfs_clear_page_tag_locked(req);
1490        }
1491}
1492EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
1493
1494static void nfs_commit_release(void *calldata)
1495{
1496        struct nfs_write_data *data = calldata;
1497
1498        nfs_commit_release_pages(data);
1499        nfs_commit_clear_lock(NFS_I(data->inode));
1500        nfs_commitdata_release(calldata);
1501}
1502
1503static const struct rpc_call_ops nfs_commit_ops = {
1504#if defined(CONFIG_NFS_V4_1)
1505        .rpc_call_prepare = nfs_write_prepare,
1506#endif /* CONFIG_NFS_V4_1 */
1507        .rpc_call_done = nfs_commit_done,
1508        .rpc_release = nfs_commit_release,
1509};
1510
1511int nfs_commit_inode(struct inode *inode, int how)
1512{
1513        LIST_HEAD(head);
1514        int may_wait = how & FLUSH_SYNC;
1515        int res;
1516
1517        res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1518        if (res <= 0)
1519                goto out_mark_dirty;
1520        res = nfs_scan_commit(inode, &head, 0, 0);
1521        if (res) {
1522                int error;
1523
1524                error = pnfs_commit_list(inode, &head, how);
1525                if (error == PNFS_NOT_ATTEMPTED)
1526                        error = nfs_commit_list(inode, &head, how);
1527                if (error < 0)
1528                        return error;
1529                if (!may_wait)
1530                        goto out_mark_dirty;
1531                error = wait_on_bit(&NFS_I(inode)->flags,
1532                                NFS_INO_COMMIT,
1533                                nfs_wait_bit_killable,
1534                                TASK_KILLABLE);
1535                if (error < 0)
1536                        return error;
1537        } else
1538                nfs_commit_clear_lock(NFS_I(inode));
1539        return res;
1540        /* Note: If we exit without ensuring that the commit is complete,
1541         * we must mark the inode as dirty. Otherwise, future calls to
1542         * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
1543         * that the data is on the disk.
1544         */
1545out_mark_dirty:
1546        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1547        return res;
1548}
1549
1550static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
1551{
1552        struct nfs_inode *nfsi = NFS_I(inode);
1553        int flags = FLUSH_SYNC;
1554        int ret = 0;
1555
1556        /* no commits means nothing needs to be done */
1557        if (!nfsi->ncommit)
1558                return ret;
1559
1560        if (wbc->sync_mode == WB_SYNC_NONE) {
1561                /* Don't commit yet if this is a non-blocking flush and there
1562                 * are a lot of outstanding writes for this mapping.
1563                 */
1564                if (nfsi->ncommit <= (nfsi->npages >> 1))
1565                        goto out_mark_dirty;
1566
1567                /* don't wait for the COMMIT response */
1568                flags = 0;
1569        }
1570
1571        ret = nfs_commit_inode(inode, flags);
1572        if (ret >= 0) {
1573                if (wbc->sync_mode == WB_SYNC_NONE) {
1574                        if (ret < wbc->nr_to_write)
1575                                wbc->nr_to_write -= ret;
1576                        else
1577                                wbc->nr_to_write = 0;
1578                }
1579                return 0;
1580        }
1581out_mark_dirty:
1582        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1583        return ret;
1584}
1585#else
1586static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
1587{
1588        return 0;
1589}
1590#endif
1591
1592int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1593{
1594        int ret;
1595
1596        ret = nfs_commit_unstable_pages(inode, wbc);
1597        if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
1598                int status;
1599                bool sync = true;
1600
1601                if (wbc->sync_mode == WB_SYNC_NONE)
1602                        sync = false;
1603
1604                status = pnfs_layoutcommit_inode(inode, sync);
1605                if (status < 0)
1606                        return status;
1607        }
1608        return ret;
1609}
1610
1611/*
1612 * flush the inode to disk.
1613 */
1614int nfs_wb_all(struct inode *inode)
1615{
1616        struct writeback_control wbc = {
1617                .sync_mode = WB_SYNC_ALL,
1618                .nr_to_write = LONG_MAX,
1619                .range_start = 0,
1620                .range_end = LLONG_MAX,
1621        };
1622
1623        return sync_inode(inode, &wbc);
1624}
1625
1626int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1627{
1628        struct nfs_page *req;
1629        int ret = 0;
1630
1631        BUG_ON(!PageLocked(page));
1632        for (;;) {
1633                wait_on_page_writeback(page);
1634                req = nfs_page_find_request(page);
1635                if (req == NULL)
1636                        break;
1637                if (nfs_lock_request_dontget(req)) {
1638                        nfs_inode_remove_request(req);
1639                        /*
1640                         * In case nfs_inode_remove_request has marked the
1641                         * page as being dirty
1642                         */
1643                        cancel_dirty_page(page, PAGE_CACHE_SIZE);
1644                        nfs_unlock_request(req);
1645                        break;
1646                }
1647                ret = nfs_wait_on_request(req);
1648                nfs_release_request(req);
1649                if (ret < 0)
1650                        break;
1651        }
1652        return ret;
1653}
1654
1655/*
1656 * Write back all requests on one page - we do this before reading it.
1657 */
1658int nfs_wb_page(struct inode *inode, struct page *page)
1659{
1660        loff_t range_start = page_offset(page);
1661        loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
1662        struct writeback_control wbc = {
1663                .sync_mode = WB_SYNC_ALL,
1664                .nr_to_write = 0,
1665                .range_start = range_start,
1666                .range_end = range_end,
1667        };
1668        int ret;
1669
1670        for (;;) {
1671                wait_on_page_writeback(page);
1672                if (clear_page_dirty_for_io(page)) {
1673                        ret = nfs_writepage_locked(page, &wbc);
1674                        if (ret < 0)
1675                                goto out_error;
1676                        continue;
1677                }
1678                if (!PagePrivate(page))
1679                        break;
1680                ret = nfs_commit_inode(inode, FLUSH_SYNC);
1681                if (ret < 0)
1682                        goto out_error;
1683        }
1684        return 0;
1685out_error:
1686        return ret;
1687}
1688
1689#ifdef CONFIG_MIGRATION
1690int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1691                struct page *page, enum migrate_mode mode)
1692{
1693        /*
1694         * If PagePrivate is set, then the page is currently associated with
1695         * an in-progress read or write request. Don't try to migrate it.
1696         *
1697         * FIXME: we could do this in principle, but we'll need a way to ensure
1698         *        that we can safely release the inode reference while holding
1699         *        the page lock.
1700         */
1701        if (PagePrivate(page))
1702                return -EBUSY;
1703
1704        nfs_fscache_release_page(page, GFP_KERNEL);
1705
1706        return migrate_page(mapping, newpage, page, mode);
1707}
1708#endif
1709
1710int __init nfs_init_writepagecache(void)
1711{
1712        nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1713                                             sizeof(struct nfs_write_data),
1714                                             0, SLAB_HWCACHE_ALIGN,
1715                                             NULL);
1716        if (nfs_wdata_cachep == NULL)
1717                return -ENOMEM;
1718
1719        nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
1720                                                     nfs_wdata_cachep);
1721        if (nfs_wdata_mempool == NULL)
1722                return -ENOMEM;
1723
1724        nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
1725                                                      nfs_wdata_cachep);
1726        if (nfs_commit_mempool == NULL)
1727                return -ENOMEM;
1728
1729        /*
1730         * NFS congestion size, scale with available memory.
1731         *
1732         *  64MB:    8192k
1733         * 128MB:   11585k
1734         * 256MB:   16384k
1735         * 512MB:   23170k
1736         *   1GB:   32768k
1737         *   2GB:   46340k
1738         *   4GB:   65536k
1739         *   8GB:   92681k
1740         *  16GB:  131072k
1741         *
1742         * This allows larger machines to have larger/more transfers.
1743         * Limit the default to 256M
1744         */
1745        nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
1746        if (nfs_congestion_kb > 256*1024)
1747                nfs_congestion_kb = 256*1024;
1748
1749        return 0;
1750}
1751
1752void nfs_destroy_writepagecache(void)
1753{
1754        mempool_destroy(nfs_commit_mempool);
1755        mempool_destroy(nfs_wdata_mempool);
1756        kmem_cache_destroy(nfs_wdata_cachep);
1757}
1758
1759
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.