linux-old/fs/nfs/pagelist.c
<<
>>
Prefs
   1/*
   2 * linux/fs/nfs/pagelist.c
   3 *
   4 * A set of helper functions for managing NFS read and write requests.
   5 * The main purpose of these routines is to provide support for the
   6 * coalescing of several requests into a single RPC call.
   7 *
   8 * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no>
   9 *
  10 */
  11
  12#include <linux/config.h>
  13#include <linux/slab.h>
  14#include <linux/file.h>
  15#include <linux/sunrpc/clnt.h>
  16#include <linux/nfs3.h>
  17#include <linux/nfs_page.h>
  18#include <linux/nfs_fs.h>
  19#include <linux/nfs_flushd.h>
  20#include <linux/nfs_mount.h>
  21
  22#define NFS_PARANOIA 1
  23
  24/*
  25 * Spinlock
  26 */
  27spinlock_t nfs_wreq_lock = SPIN_LOCK_UNLOCKED;
  28
  29static kmem_cache_t *nfs_page_cachep;
  30
  31static inline struct nfs_page *
  32nfs_page_alloc(void)
  33{
  34        struct nfs_page *p;
  35        p = kmem_cache_alloc(nfs_page_cachep, SLAB_NOFS);
  36        if (p) {
  37                memset(p, 0, sizeof(*p));
  38                INIT_LIST_HEAD(&p->wb_hash);
  39                INIT_LIST_HEAD(&p->wb_list);
  40                INIT_LIST_HEAD(&p->wb_lru);
  41                init_waitqueue_head(&p->wb_wait);
  42        }
  43        return p;
  44}
  45
  46static inline void
  47nfs_page_free(struct nfs_page *p)
  48{
  49        kmem_cache_free(nfs_page_cachep, p);
  50}
  51
  52static int nfs_try_to_free_pages(struct nfs_server *);
  53
  54/**
  55 * nfs_create_request - Create an NFS read/write request.
  56 * @cred: RPC credential to use
  57 * @inode: inode to which the request is attached
  58 * @page: page to write
  59 * @offset: starting offset within the page for the write
  60 * @count: number of bytes to read/write
  61 *
  62 * The page must be locked by the caller. This makes sure we never
  63 * create two different requests for the same page, and avoids
  64 * a possible deadlock when we reach the hard limit on the number
  65 * of dirty pages.
  66 * User should ensure it is safe to sleep in this function.
  67 */
  68struct nfs_page *
  69nfs_create_request(struct rpc_cred *cred, struct inode *inode,
  70                   struct page *page,
  71                   unsigned int offset, unsigned int count)
  72{
  73        struct nfs_server *server = NFS_SERVER(inode);
  74        struct nfs_reqlist      *cache = NFS_REQUESTLIST(inode);
  75        struct nfs_page         *req;
  76
  77        /* Deal with hard limits.  */
  78        for (;;) {
  79                /* Prevent races by incrementing *before* we test */
  80                atomic_inc(&cache->nr_requests);
  81
  82                /* If we haven't reached the local hard limit yet,
  83                 * try to allocate the request struct */
  84                if (atomic_read(&cache->nr_requests) <= MAX_REQUEST_HARD) {
  85                        req = nfs_page_alloc();
  86                        if (req != NULL)
  87                                break;
  88                }
  89
  90                atomic_dec(&cache->nr_requests);
  91
  92                /* Try to free up at least one request in order to stay
  93                 * below the hard limit
  94                 */
  95                if (nfs_try_to_free_pages(server))
  96                        continue;
  97                if (signalled() && (server->flags & NFS_MOUNT_INTR))
  98                        return ERR_PTR(-ERESTARTSYS);
  99                yield();
 100        }
 101
 102        /* Initialize the request struct. Initially, we assume a
 103         * long write-back delay. This will be adjusted in
 104         * update_nfs_request below if the region is not locked. */
 105        req->wb_page    = page;
 106        page_cache_get(page);
 107        req->wb_offset  = offset;
 108        req->wb_bytes   = count;
 109
 110        if (cred)
 111                req->wb_cred = get_rpccred(cred);
 112        req->wb_inode   = inode;
 113        req->wb_count   = 1;
 114
 115        return req;
 116}
 117
 118/**
 119 * nfs_clear_request - Free up all resources allocated to the request
 120 * @req:
 121 *
 122 * Release all resources associated with a write request after it
 123 * has completed.
 124 */
 125void nfs_clear_request(struct nfs_page *req)
 126{
 127        /* Release struct file or cached credential */
 128        if (req->wb_file) {
 129                fput(req->wb_file);
 130                req->wb_file = NULL;
 131        }
 132        if (req->wb_cred) {
 133                put_rpccred(req->wb_cred);
 134                req->wb_cred = NULL;
 135        }
 136        if (req->wb_page) {
 137                page_cache_release(req->wb_page);
 138                req->wb_page = NULL;
 139                atomic_dec(&NFS_REQUESTLIST(req->wb_inode)->nr_requests);
 140        }
 141}
 142
 143
 144/**
 145 * nfs_release_request - Release the count on an NFS read/write request
 146 * @req: request to release
 147 *
 148 * Note: Should never be called with the spinlock held!
 149 */
 150void
 151nfs_release_request(struct nfs_page *req)
 152{
 153        spin_lock(&nfs_wreq_lock);
 154        if (--req->wb_count) {
 155                spin_unlock(&nfs_wreq_lock);
 156                return;
 157        }
 158        __nfs_del_lru(req);
 159        spin_unlock(&nfs_wreq_lock);
 160
 161#ifdef NFS_PARANOIA
 162        if (!list_empty(&req->wb_list))
 163                BUG();
 164        if (!list_empty(&req->wb_hash))
 165                BUG();
 166        if (NFS_WBACK_BUSY(req))
 167                BUG();
 168        if (atomic_read(&NFS_REQUESTLIST(req->wb_inode)->nr_requests) < 0)
 169                BUG();
 170#endif
 171
 172        /* Release struct file or cached credential */
 173        nfs_clear_request(req);
 174        nfs_page_free(req);
 175}
 176
 177/**
 178 * nfs_list_add_request - Insert a request into a sorted list
 179 * @req: request
 180 * @head: head of list into which to insert the request.
 181 *
 182 * Note that the wb_list is sorted by page index in order to facilitate
 183 * coalescing of requests.
 184 * We use an insertion sort that is optimized for the case of appended
 185 * writes.
 186 */
 187void
 188nfs_list_add_request(struct nfs_page *req, struct list_head *head)
 189{
 190        struct list_head *pos;
 191        unsigned long pg_idx = page_index(req->wb_page);
 192
 193#ifdef NFS_PARANOIA
 194        if (!list_empty(&req->wb_list)) {
 195                printk(KERN_ERR "NFS: Add to list failed!\n");
 196                BUG();
 197        }
 198#endif
 199        list_for_each_prev(pos, head) {
 200                struct nfs_page *p = nfs_list_entry(pos);
 201                if (page_index(p->wb_page) < pg_idx)
 202                        break;
 203        }
 204        list_add(&req->wb_list, pos);
 205        req->wb_list_head = head;
 206}
 207
 208/**
 209 * nfs_wait_on_request - Wait for a request to complete.
 210 * @req: request to wait upon.
 211 *
 212 * Interruptible by signals only if mounted with intr flag.
 213 * The user is responsible for holding a count on the request.
 214 */
 215int
 216nfs_wait_on_request(struct nfs_page *req)
 217{
 218        struct inode    *inode = req->wb_inode;
 219        struct rpc_clnt *clnt = NFS_CLIENT(inode);
 220
 221        if (!NFS_WBACK_BUSY(req))
 222                return 0;
 223        return nfs_wait_event(clnt, req->wb_wait, !NFS_WBACK_BUSY(req));
 224}
 225
 226/**
 227 * nfs_coalesce_requests - Split coalesced requests out from a list.
 228 * @head: source list
 229 * @dst: destination list
 230 * @nmax: maximum number of requests to coalesce
 231 *
 232 * Moves a maximum of 'nmax' elements from one list to another.
 233 * The elements are checked to ensure that they form a contiguous set
 234 * of pages, and that they originated from the same file.
 235 */
 236int
 237nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
 238                      unsigned int nmax)
 239{
 240        struct nfs_page         *req = NULL;
 241        unsigned int            npages = 0;
 242
 243        while (!list_empty(head)) {
 244                struct nfs_page *prev = req;
 245
 246                req = nfs_list_entry(head->next);
 247                if (prev) {
 248                        if (req->wb_cred != prev->wb_cred)
 249                                break;
 250                        if (page_index(req->wb_page) != page_index(prev->wb_page)+1)
 251                                break;
 252
 253                        if (req->wb_offset != 0)
 254                                break;
 255                }
 256                nfs_list_remove_request(req);
 257                nfs_list_add_request(req, dst);
 258                npages++;
 259                if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE)
 260                        break;
 261                if (npages >= nmax)
 262                        break;
 263        }
 264        return npages;
 265}
 266
 267/*
 268 * nfs_scan_forward - Coalesce more requests
 269 * @req: First request to add
 270 * @dst: destination list
 271 * @nmax: maximum number of requests to coalesce
 272 *
 273 * Tries to coalesce more requests by traversing the request's wb_list.
 274 * Moves the resulting list into dst. Requests are guaranteed to be
 275 * contiguous, and to originate from the same file.
 276 */
 277static int
 278nfs_scan_forward(struct nfs_page *req, struct list_head *dst, int nmax)
 279{
 280        struct nfs_server *server = NFS_SERVER(req->wb_inode);
 281        struct list_head *pos, *head = req->wb_list_head;
 282        struct rpc_cred *cred = req->wb_cred;
 283        unsigned long idx = page_index(req->wb_page) + 1;
 284        int npages = 0;
 285
 286        for (pos = req->wb_list.next; nfs_lock_request(req); pos = pos->next) {
 287                nfs_list_remove_request(req);
 288                nfs_list_add_request(req, dst);
 289                __nfs_del_lru(req);
 290                __nfs_add_lru(&server->lru_busy, req);
 291                npages++;
 292                if (npages == nmax)
 293                        break;
 294                if (pos == head)
 295                        break;
 296                if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE)
 297                        break;
 298                req = nfs_list_entry(pos);
 299                if (page_index(req->wb_page) != idx++)
 300                        break;
 301                if (req->wb_offset != 0)
 302                        break;
 303                if (req->wb_cred != cred)
 304                        break;
 305        }
 306        return npages;
 307}
 308
 309/**
 310 * nfs_scan_lru - Scan one of the least recently used list
 311 * @head: One of the NFS superblock lru lists
 312 * @dst: Destination list
 313 * @nmax: maximum number of requests to coalesce
 314 *
 315 * Scans one of the NFS superblock lru lists for upto nmax requests
 316 * and returns them on a list. The requests are all guaranteed to be
 317 * contiguous, originating from the same inode and the same file.
 318 */
 319int
 320nfs_scan_lru(struct list_head *head, struct list_head *dst, int nmax)
 321{
 322        struct list_head *pos;
 323        struct nfs_page *req;
 324        int npages = 0;
 325
 326        list_for_each(pos, head) {
 327                req = nfs_lru_entry(pos);
 328                npages = nfs_scan_forward(req, dst, nmax);
 329                if (npages)
 330                        break;
 331        }
 332        return npages;
 333}
 334
 335/**
 336 * nfs_scan_lru_timeout - Scan one of the superblock lru lists for timed out requests
 337 * @head: One of the NFS superblock lru lists
 338 * @dst: Destination list
 339 * @nmax: maximum number of requests to coalesce
 340 *
 341 * Scans one of the NFS superblock lru lists for upto nmax requests
 342 * and returns them on a list. The requests are all guaranteed to be
 343 * contiguous, originating from the same inode and the same file.
 344 * The first request on the destination list will be timed out, the
 345 * others are not guaranteed to be so.
 346 */
 347int
 348nfs_scan_lru_timeout(struct list_head *head, struct list_head *dst, int nmax)
 349{
 350        struct list_head *pos;
 351        struct nfs_page *req;
 352        int npages = 0;
 353
 354        list_for_each(pos, head) {
 355                req = nfs_lru_entry(pos);
 356                if (time_after(req->wb_timeout, jiffies))
 357                        break;
 358                npages = nfs_scan_forward(req, dst, nmax);
 359                if (npages)
 360                        break;
 361        }
 362        return npages;
 363}
 364
 365/**
 366 * nfs_scan_list - Scan a list for matching requests
 367 * @head: One of the NFS inode request lists
 368 * @dst: Destination list
 369 * @file: if set, ensure we match requests from this file
 370 * @idx_start: lower bound of page->index to scan
 371 * @npages: idx_start + npages sets the upper bound to scan.
 372 *
 373 * Moves elements from one of the inode request lists.
 374 * If the number of requests is set to 0, the entire address_space
 375 * starting at index idx_start, is scanned.
 376 * The requests are *not* checked to ensure that they form a contiguous set.
 377 * You must be holding the nfs_wreq_lock when calling this function
 378 */
 379int
 380nfs_scan_list(struct list_head *head, struct list_head *dst,
 381              struct file *file,
 382              unsigned long idx_start, unsigned int npages)
 383{
 384        struct list_head        *pos, *tmp;
 385        struct nfs_page         *req;
 386        unsigned long           idx_end;
 387        int                     res;
 388
 389        res = 0;
 390        if (npages == 0)
 391                idx_end = ~0;
 392        else
 393                idx_end = idx_start + npages - 1;
 394
 395        list_for_each_safe(pos, tmp, head) {
 396                unsigned long pg_idx;
 397
 398                req = nfs_list_entry(pos);
 399
 400                if (file && req->wb_file != file)
 401                        continue;
 402
 403                pg_idx = page_index(req->wb_page);
 404                if (pg_idx < idx_start)
 405                        continue;
 406                if (pg_idx > idx_end)
 407                        break;
 408
 409                if (!nfs_lock_request(req))
 410                        continue;
 411                nfs_list_remove_request(req);
 412                nfs_list_add_request(req, dst);
 413                __nfs_del_lru(req);
 414                __nfs_add_lru(&NFS_SERVER(req->wb_inode)->lru_busy, req);
 415                res++;
 416        }
 417        return res;
 418}
 419
 420/*
 421 * nfs_try_to_free_pages - Free up NFS read/write requests
 422 * @server: The NFS superblock
 423 *
 424 * This function attempts to flush out NFS reads and writes in order
 425 * to keep the hard limit on the total number of pending requests
 426 * on a given NFS partition.
 427 * Note: we first try to commit unstable writes, then flush out pending
 428 *       reads, then finally the dirty pages.
 429 *       The assumption is that this reflects the ordering from the fastest
 430 *       to the slowest method for reclaiming requests.
 431 */
 432static int
 433nfs_try_to_free_pages(struct nfs_server *server)
 434{
 435        LIST_HEAD(head);
 436        struct nfs_page *req = NULL;
 437        int nreq;
 438
 439        for (;;) {
 440                if (req) {
 441                        int status = nfs_wait_on_request(req);
 442                        nfs_release_request(req);
 443                        if (status)
 444                                break;
 445                        req = NULL;
 446                }
 447                nreq = atomic_read(&server->rw_requests->nr_requests);
 448                if (nreq < MAX_REQUEST_HARD)
 449                        return 1;
 450                spin_lock(&nfs_wreq_lock);
 451                /* Are there any busy RPC calls that might free up requests? */
 452                if (!list_empty(&server->lru_busy)) {
 453                        req = nfs_lru_entry(server->lru_busy.next);
 454                        req->wb_count++;
 455                        __nfs_del_lru(req);
 456                        spin_unlock(&nfs_wreq_lock);
 457                        continue;
 458                }
 459
 460#ifdef CONFIG_NFS_V3
 461                /* Let's try to free up some completed NFSv3 unstable writes */
 462                nfs_scan_lru_commit(server, &head);
 463                if (!list_empty(&head)) {
 464                        spin_unlock(&nfs_wreq_lock);
 465                        nfs_commit_list(&head, 0);
 466                        continue;
 467                }
 468#endif
 469                /* OK, so we try to free up some pending readaheads */
 470                nfs_scan_lru_read(server, &head);
 471                if (!list_empty(&head)) {
 472                        spin_unlock(&nfs_wreq_lock);
 473                        nfs_pagein_list(&head, server->rpages);
 474                        continue;
 475                }
 476                /* Last resort: we try to flush out single requests */
 477                nfs_scan_lru_dirty(server, &head);
 478                if (!list_empty(&head)) {
 479                        spin_unlock(&nfs_wreq_lock);
 480                        nfs_flush_list(&head, server->wpages, FLUSH_STABLE);
 481                        continue;
 482                }
 483                spin_unlock(&nfs_wreq_lock);
 484                break;
 485        }
 486        /* We failed to free up requests */
 487        return 0;
 488}
 489
 490int nfs_init_nfspagecache(void)
 491{
 492        nfs_page_cachep = kmem_cache_create("nfs_page",
 493                                            sizeof(struct nfs_page),
 494                                            0, SLAB_HWCACHE_ALIGN,
 495                                            NULL, NULL);
 496        if (nfs_page_cachep == NULL)
 497                return -ENOMEM;
 498
 499        return 0;
 500}
 501
 502void nfs_destroy_nfspagecache(void)
 503{
 504        if (kmem_cache_destroy(nfs_page_cachep))
 505                printk(KERN_INFO "nfs_page: not all structures were freed\n");
 506}
 507
 508
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.