linux/fs/fuse/dev.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9#include "fuse_i.h"
  10
  11#include <linux/init.h>
  12#include <linux/module.h>
  13#include <linux/poll.h>
  14#include <linux/uio.h>
  15#include <linux/miscdevice.h>
  16#include <linux/pagemap.h>
  17#include <linux/file.h>
  18#include <linux/slab.h>
  19#include <linux/pipe_fs_i.h>
  20#include <linux/swap.h>
  21#include <linux/splice.h>
  22
  23MODULE_ALIAS_MISCDEV(FUSE_MINOR);
  24MODULE_ALIAS("devname:fuse");
  25
  26static struct kmem_cache *fuse_req_cachep;
  27
  28static struct fuse_conn *fuse_get_conn(struct file *file)
  29{
  30        /*
  31         * Lockless access is OK, because file->private data is set
  32         * once during mount and is valid until the file is released.
  33         */
  34        return file->private_data;
  35}
  36
  37static void fuse_request_init(struct fuse_req *req)
  38{
  39        memset(req, 0, sizeof(*req));
  40        INIT_LIST_HEAD(&req->list);
  41        INIT_LIST_HEAD(&req->intr_entry);
  42        init_waitqueue_head(&req->waitq);
  43        atomic_set(&req->count, 1);
  44}
  45
  46struct fuse_req *fuse_request_alloc(void)
  47{
  48        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
  49        if (req)
  50                fuse_request_init(req);
  51        return req;
  52}
  53EXPORT_SYMBOL_GPL(fuse_request_alloc);
  54
  55struct fuse_req *fuse_request_alloc_nofs(void)
  56{
  57        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
  58        if (req)
  59                fuse_request_init(req);
  60        return req;
  61}
  62
  63void fuse_request_free(struct fuse_req *req)
  64{
  65        kmem_cache_free(fuse_req_cachep, req);
  66}
  67
  68static void block_sigs(sigset_t *oldset)
  69{
  70        sigset_t mask;
  71
  72        siginitsetinv(&mask, sigmask(SIGKILL));
  73        sigprocmask(SIG_BLOCK, &mask, oldset);
  74}
  75
  76static void restore_sigs(sigset_t *oldset)
  77{
  78        sigprocmask(SIG_SETMASK, oldset, NULL);
  79}
  80
  81static void __fuse_get_request(struct fuse_req *req)
  82{
  83        atomic_inc(&req->count);
  84}
  85
  86/* Must be called with > 1 refcount */
  87static void __fuse_put_request(struct fuse_req *req)
  88{
  89        BUG_ON(atomic_read(&req->count) < 2);
  90        atomic_dec(&req->count);
  91}
  92
  93static void fuse_req_init_context(struct fuse_req *req)
  94{
  95        req->in.h.uid = current_fsuid();
  96        req->in.h.gid = current_fsgid();
  97        req->in.h.pid = current->pid;
  98}
  99
 100struct fuse_req *fuse_get_req(struct fuse_conn *fc)
 101{
 102        struct fuse_req *req;
 103        sigset_t oldset;
 104        int intr;
 105        int err;
 106
 107        atomic_inc(&fc->num_waiting);
 108        block_sigs(&oldset);
 109        intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
 110        restore_sigs(&oldset);
 111        err = -EINTR;
 112        if (intr)
 113                goto out;
 114
 115        err = -ENOTCONN;
 116        if (!fc->connected)
 117                goto out;
 118
 119        req = fuse_request_alloc();
 120        err = -ENOMEM;
 121        if (!req)
 122                goto out;
 123
 124        fuse_req_init_context(req);
 125        req->waiting = 1;
 126        return req;
 127
 128 out:
 129        atomic_dec(&fc->num_waiting);
 130        return ERR_PTR(err);
 131}
 132EXPORT_SYMBOL_GPL(fuse_get_req);
 133
 134/*
 135 * Return request in fuse_file->reserved_req.  However that may
 136 * currently be in use.  If that is the case, wait for it to become
 137 * available.
 138 */
 139static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
 140                                         struct file *file)
 141{
 142        struct fuse_req *req = NULL;
 143        struct fuse_file *ff = file->private_data;
 144
 145        do {
 146                wait_event(fc->reserved_req_waitq, ff->reserved_req);
 147                spin_lock(&fc->lock);
 148                if (ff->reserved_req) {
 149                        req = ff->reserved_req;
 150                        ff->reserved_req = NULL;
 151                        get_file(file);
 152                        req->stolen_file = file;
 153                }
 154                spin_unlock(&fc->lock);
 155        } while (!req);
 156
 157        return req;
 158}
 159
 160/*
 161 * Put stolen request back into fuse_file->reserved_req
 162 */
 163static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
 164{
 165        struct file *file = req->stolen_file;
 166        struct fuse_file *ff = file->private_data;
 167
 168        spin_lock(&fc->lock);
 169        fuse_request_init(req);
 170        BUG_ON(ff->reserved_req);
 171        ff->reserved_req = req;
 172        wake_up_all(&fc->reserved_req_waitq);
 173        spin_unlock(&fc->lock);
 174        fput(file);
 175}
 176
 177/*
 178 * Gets a requests for a file operation, always succeeds
 179 *
 180 * This is used for sending the FLUSH request, which must get to
 181 * userspace, due to POSIX locks which may need to be unlocked.
 182 *
 183 * If allocation fails due to OOM, use the reserved request in
 184 * fuse_file.
 185 *
 186 * This is very unlikely to deadlock accidentally, since the
 187 * filesystem should not have it's own file open.  If deadlock is
 188 * intentional, it can still be broken by "aborting" the filesystem.
 189 */
 190struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
 191{
 192        struct fuse_req *req;
 193
 194        atomic_inc(&fc->num_waiting);
 195        wait_event(fc->blocked_waitq, !fc->blocked);
 196        req = fuse_request_alloc();
 197        if (!req)
 198                req = get_reserved_req(fc, file);
 199
 200        fuse_req_init_context(req);
 201        req->waiting = 1;
 202        return req;
 203}
 204
 205void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 206{
 207        if (atomic_dec_and_test(&req->count)) {
 208                if (req->waiting)
 209                        atomic_dec(&fc->num_waiting);
 210
 211                if (req->stolen_file)
 212                        put_reserved_req(fc, req);
 213                else
 214                        fuse_request_free(req);
 215        }
 216}
 217EXPORT_SYMBOL_GPL(fuse_put_request);
 218
 219static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 220{
 221        unsigned nbytes = 0;
 222        unsigned i;
 223
 224        for (i = 0; i < numargs; i++)
 225                nbytes += args[i].size;
 226
 227        return nbytes;
 228}
 229
 230static u64 fuse_get_unique(struct fuse_conn *fc)
 231{
 232        fc->reqctr++;
 233        /* zero is special */
 234        if (fc->reqctr == 0)
 235                fc->reqctr = 1;
 236
 237        return fc->reqctr;
 238}
 239
 240static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 241{
 242        req->in.h.len = sizeof(struct fuse_in_header) +
 243                len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
 244        list_add_tail(&req->list, &fc->pending);
 245        req->state = FUSE_REQ_PENDING;
 246        if (!req->waiting) {
 247                req->waiting = 1;
 248                atomic_inc(&fc->num_waiting);
 249        }
 250        wake_up(&fc->waitq);
 251        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 252}
 253
 254void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 255                       u64 nodeid, u64 nlookup)
 256{
 257        forget->forget_one.nodeid = nodeid;
 258        forget->forget_one.nlookup = nlookup;
 259
 260        spin_lock(&fc->lock);
 261        if (fc->connected) {
 262                fc->forget_list_tail->next = forget;
 263                fc->forget_list_tail = forget;
 264                wake_up(&fc->waitq);
 265                kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 266        } else {
 267                kfree(forget);
 268        }
 269        spin_unlock(&fc->lock);
 270}
 271
 272static void flush_bg_queue(struct fuse_conn *fc)
 273{
 274        while (fc->active_background < fc->max_background &&
 275               !list_empty(&fc->bg_queue)) {
 276                struct fuse_req *req;
 277
 278                req = list_entry(fc->bg_queue.next, struct fuse_req, list);
 279                list_del(&req->list);
 280                fc->active_background++;
 281                req->in.h.unique = fuse_get_unique(fc);
 282                queue_request(fc, req);
 283        }
 284}
 285
 286/*
 287 * This function is called when a request is finished.  Either a reply
 288 * has arrived or it was aborted (and not yet sent) or some error
 289 * occurred during communication with userspace, or the device file
 290 * was closed.  The requester thread is woken up (if still waiting),
 291 * the 'end' callback is called if given, else the reference to the
 292 * request is released
 293 *
 294 * Called with fc->lock, unlocks it
 295 */
 296static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 297__releases(fc->lock)
 298{
 299        void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
 300        req->end = NULL;
 301        list_del(&req->list);
 302        list_del(&req->intr_entry);
 303        req->state = FUSE_REQ_FINISHED;
 304        if (req->background) {
 305                if (fc->num_background == fc->max_background) {
 306                        fc->blocked = 0;
 307                        wake_up_all(&fc->blocked_waitq);
 308                }
 309                if (fc->num_background == fc->congestion_threshold &&
 310                    fc->connected && fc->bdi_initialized) {
 311                        clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 312                        clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 313                }
 314                fc->num_background--;
 315                fc->active_background--;
 316                flush_bg_queue(fc);
 317        }
 318        spin_unlock(&fc->lock);
 319        wake_up(&req->waitq);
 320        if (end)
 321                end(fc, req);
 322        fuse_put_request(fc, req);
 323}
 324
 325static void wait_answer_interruptible(struct fuse_conn *fc,
 326                                      struct fuse_req *req)
 327__releases(fc->lock)
 328__acquires(fc->lock)
 329{
 330        if (signal_pending(current))
 331                return;
 332
 333        spin_unlock(&fc->lock);
 334        wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
 335        spin_lock(&fc->lock);
 336}
 337
 338static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
 339{
 340        list_add_tail(&req->intr_entry, &fc->interrupts);
 341        wake_up(&fc->waitq);
 342        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 343}
 344
 345static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
 346__releases(fc->lock)
 347__acquires(fc->lock)
 348{
 349        if (!fc->no_interrupt) {
 350                /* Any signal may interrupt this */
 351                wait_answer_interruptible(fc, req);
 352
 353                if (req->aborted)
 354                        goto aborted;
 355                if (req->state == FUSE_REQ_FINISHED)
 356                        return;
 357
 358                req->interrupted = 1;
 359                if (req->state == FUSE_REQ_SENT)
 360                        queue_interrupt(fc, req);
 361        }
 362
 363        if (!req->force) {
 364                sigset_t oldset;
 365
 366                /* Only fatal signals may interrupt this */
 367                block_sigs(&oldset);
 368                wait_answer_interruptible(fc, req);
 369                restore_sigs(&oldset);
 370
 371                if (req->aborted)
 372                        goto aborted;
 373                if (req->state == FUSE_REQ_FINISHED)
 374                        return;
 375
 376                /* Request is not yet in userspace, bail out */
 377                if (req->state == FUSE_REQ_PENDING) {
 378                        list_del(&req->list);
 379                        __fuse_put_request(req);
 380                        req->out.h.error = -EINTR;
 381                        return;
 382                }
 383        }
 384
 385        /*
 386         * Either request is already in userspace, or it was forced.
 387         * Wait it out.
 388         */
 389        spin_unlock(&fc->lock);
 390        wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
 391        spin_lock(&fc->lock);
 392
 393        if (!req->aborted)
 394                return;
 395
 396 aborted:
 397        BUG_ON(req->state != FUSE_REQ_FINISHED);
 398        if (req->locked) {
 399                /* This is uninterruptible sleep, because data is
 400                   being copied to/from the buffers of req.  During
 401                   locked state, there mustn't be any filesystem
 402                   operation (e.g. page fault), since that could lead
 403                   to deadlock */
 404                spin_unlock(&fc->lock);
 405                wait_event(req->waitq, !req->locked);
 406                spin_lock(&fc->lock);
 407        }
 408}
 409
 410void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 411{
 412        req->isreply = 1;
 413        spin_lock(&fc->lock);
 414        if (!fc->connected)
 415                req->out.h.error = -ENOTCONN;
 416        else if (fc->conn_error)
 417                req->out.h.error = -ECONNREFUSED;
 418        else {
 419                req->in.h.unique = fuse_get_unique(fc);
 420                queue_request(fc, req);
 421                /* acquire extra reference, since request is still needed
 422                   after request_end() */
 423                __fuse_get_request(req);
 424
 425                request_wait_answer(fc, req);
 426        }
 427        spin_unlock(&fc->lock);
 428}
 429EXPORT_SYMBOL_GPL(fuse_request_send);
 430
 431static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 432                                            struct fuse_req *req)
 433{
 434        req->background = 1;
 435        fc->num_background++;
 436        if (fc->num_background == fc->max_background)
 437                fc->blocked = 1;
 438        if (fc->num_background == fc->congestion_threshold &&
 439            fc->bdi_initialized) {
 440                set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 441                set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 442        }
 443        list_add_tail(&req->list, &fc->bg_queue);
 444        flush_bg_queue(fc);
 445}
 446
 447static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
 448{
 449        spin_lock(&fc->lock);
 450        if (fc->connected) {
 451                fuse_request_send_nowait_locked(fc, req);
 452                spin_unlock(&fc->lock);
 453        } else {
 454                req->out.h.error = -ENOTCONN;
 455                request_end(fc, req);
 456        }
 457}
 458
 459void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 460{
 461        req->isreply = 1;
 462        fuse_request_send_nowait(fc, req);
 463}
 464EXPORT_SYMBOL_GPL(fuse_request_send_background);
 465
 466static int fuse_request_send_notify_reply(struct fuse_conn *fc,
 467                                          struct fuse_req *req, u64 unique)
 468{
 469        int err = -ENODEV;
 470
 471        req->isreply = 0;
 472        req->in.h.unique = unique;
 473        spin_lock(&fc->lock);
 474        if (fc->connected) {
 475                queue_request(fc, req);
 476                err = 0;
 477        }
 478        spin_unlock(&fc->lock);
 479
 480        return err;
 481}
 482
 483/*
 484 * Called under fc->lock
 485 *
 486 * fc->connected must have been checked previously
 487 */
 488void fuse_request_send_background_locked(struct fuse_conn *fc,
 489                                         struct fuse_req *req)
 490{
 491        req->isreply = 1;
 492        fuse_request_send_nowait_locked(fc, req);
 493}
 494
 495/*
 496 * Lock the request.  Up to the next unlock_request() there mustn't be
 497 * anything that could cause a page-fault.  If the request was already
 498 * aborted bail out.
 499 */
 500static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
 501{
 502        int err = 0;
 503        if (req) {
 504                spin_lock(&fc->lock);
 505                if (req->aborted)
 506                        err = -ENOENT;
 507                else
 508                        req->locked = 1;
 509                spin_unlock(&fc->lock);
 510        }
 511        return err;
 512}
 513
 514/*
 515 * Unlock request.  If it was aborted during being locked, the
 516 * requester thread is currently waiting for it to be unlocked, so
 517 * wake it up.
 518 */
 519static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
 520{
 521        if (req) {
 522                spin_lock(&fc->lock);
 523                req->locked = 0;
 524                if (req->aborted)
 525                        wake_up(&req->waitq);
 526                spin_unlock(&fc->lock);
 527        }
 528}
 529
 530struct fuse_copy_state {
 531        struct fuse_conn *fc;
 532        int write;
 533        struct fuse_req *req;
 534        const struct iovec *iov;
 535        struct pipe_buffer *pipebufs;
 536        struct pipe_buffer *currbuf;
 537        struct pipe_inode_info *pipe;
 538        unsigned long nr_segs;
 539        unsigned long seglen;
 540        unsigned long addr;
 541        struct page *pg;
 542        void *mapaddr;
 543        void *buf;
 544        unsigned len;
 545        unsigned move_pages:1;
 546};
 547
 548static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
 549                           int write,
 550                           const struct iovec *iov, unsigned long nr_segs)
 551{
 552        memset(cs, 0, sizeof(*cs));
 553        cs->fc = fc;
 554        cs->write = write;
 555        cs->iov = iov;
 556        cs->nr_segs = nr_segs;
 557}
 558
 559/* Unmap and put previous page of userspace buffer */
 560static void fuse_copy_finish(struct fuse_copy_state *cs)
 561{
 562        if (cs->currbuf) {
 563                struct pipe_buffer *buf = cs->currbuf;
 564
 565                if (!cs->write) {
 566                        buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
 567                } else {
 568                        kunmap(buf->page);
 569                        buf->len = PAGE_SIZE - cs->len;
 570                }
 571                cs->currbuf = NULL;
 572                cs->mapaddr = NULL;
 573        } else if (cs->mapaddr) {
 574                kunmap(cs->pg);
 575                if (cs->write) {
 576                        flush_dcache_page(cs->pg);
 577                        set_page_dirty_lock(cs->pg);
 578                }
 579                put_page(cs->pg);
 580                cs->mapaddr = NULL;
 581        }
 582}
 583
 584/*
 585 * Get another pagefull of userspace buffer, and map it to kernel
 586 * address space, and lock request
 587 */
 588static int fuse_copy_fill(struct fuse_copy_state *cs)
 589{
 590        unsigned long offset;
 591        int err;
 592
 593        unlock_request(cs->fc, cs->req);
 594        fuse_copy_finish(cs);
 595        if (cs->pipebufs) {
 596                struct pipe_buffer *buf = cs->pipebufs;
 597
 598                if (!cs->write) {
 599                        err = buf->ops->confirm(cs->pipe, buf);
 600                        if (err)
 601                                return err;
 602
 603                        BUG_ON(!cs->nr_segs);
 604                        cs->currbuf = buf;
 605                        cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
 606                        cs->len = buf->len;
 607                        cs->buf = cs->mapaddr + buf->offset;
 608                        cs->pipebufs++;
 609                        cs->nr_segs--;
 610                } else {
 611                        struct page *page;
 612
 613                        if (cs->nr_segs == cs->pipe->buffers)
 614                                return -EIO;
 615
 616                        page = alloc_page(GFP_HIGHUSER);
 617                        if (!page)
 618                                return -ENOMEM;
 619
 620                        buf->page = page;
 621                        buf->offset = 0;
 622                        buf->len = 0;
 623
 624                        cs->currbuf = buf;
 625                        cs->mapaddr = kmap(page);
 626                        cs->buf = cs->mapaddr;
 627                        cs->len = PAGE_SIZE;
 628                        cs->pipebufs++;
 629                        cs->nr_segs++;
 630                }
 631        } else {
 632                if (!cs->seglen) {
 633                        BUG_ON(!cs->nr_segs);
 634                        cs->seglen = cs->iov[0].iov_len;
 635                        cs->addr = (unsigned long) cs->iov[0].iov_base;
 636                        cs->iov++;
 637                        cs->nr_segs--;
 638                }
 639                err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
 640                if (err < 0)
 641                        return err;
 642                BUG_ON(err != 1);
 643                offset = cs->addr % PAGE_SIZE;
 644                cs->mapaddr = kmap(cs->pg);
 645                cs->buf = cs->mapaddr + offset;
 646                cs->len = min(PAGE_SIZE - offset, cs->seglen);
 647                cs->seglen -= cs->len;
 648                cs->addr += cs->len;
 649        }
 650
 651        return lock_request(cs->fc, cs->req);
 652}
 653
 654/* Do as much copy to/from userspace buffer as we can */
 655static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
 656{
 657        unsigned ncpy = min(*size, cs->len);
 658        if (val) {
 659                if (cs->write)
 660                        memcpy(cs->buf, *val, ncpy);
 661                else
 662                        memcpy(*val, cs->buf, ncpy);
 663                *val += ncpy;
 664        }
 665        *size -= ncpy;
 666        cs->len -= ncpy;
 667        cs->buf += ncpy;
 668        return ncpy;
 669}
 670
 671static int fuse_check_page(struct page *page)
 672{
 673        if (page_mapcount(page) ||
 674            page->mapping != NULL ||
 675            page_count(page) != 1 ||
 676            (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
 677             ~(1 << PG_locked |
 678               1 << PG_referenced |
 679               1 << PG_uptodate |
 680               1 << PG_lru |
 681               1 << PG_active |
 682               1 << PG_reclaim))) {
 683                printk(KERN_WARNING "fuse: trying to steal weird page\n");
 684                printk(KERN_WARNING "  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
 685                return 1;
 686        }
 687        return 0;
 688}
 689
 690static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 691{
 692        int err;
 693        struct page *oldpage = *pagep;
 694        struct page *newpage;
 695        struct pipe_buffer *buf = cs->pipebufs;
 696        struct address_space *mapping;
 697        pgoff_t index;
 698
 699        unlock_request(cs->fc, cs->req);
 700        fuse_copy_finish(cs);
 701
 702        err = buf->ops->confirm(cs->pipe, buf);
 703        if (err)
 704                return err;
 705
 706        BUG_ON(!cs->nr_segs);
 707        cs->currbuf = buf;
 708        cs->len = buf->len;
 709        cs->pipebufs++;
 710        cs->nr_segs--;
 711
 712        if (cs->len != PAGE_SIZE)
 713                goto out_fallback;
 714
 715        if (buf->ops->steal(cs->pipe, buf) != 0)
 716                goto out_fallback;
 717
 718        newpage = buf->page;
 719
 720        if (WARN_ON(!PageUptodate(newpage)))
 721                return -EIO;
 722
 723        ClearPageMappedToDisk(newpage);
 724
 725        if (fuse_check_page(newpage) != 0)
 726                goto out_fallback_unlock;
 727
 728        mapping = oldpage->mapping;
 729        index = oldpage->index;
 730
 731        /*
 732         * This is a new and locked page, it shouldn't be mapped or
 733         * have any special flags on it
 734         */
 735        if (WARN_ON(page_mapped(oldpage)))
 736                goto out_fallback_unlock;
 737        if (WARN_ON(page_has_private(oldpage)))
 738                goto out_fallback_unlock;
 739        if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
 740                goto out_fallback_unlock;
 741        if (WARN_ON(PageMlocked(oldpage)))
 742                goto out_fallback_unlock;
 743
 744        err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
 745        if (err) {
 746                unlock_page(newpage);
 747                return err;
 748        }
 749
 750        page_cache_get(newpage);
 751
 752        if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 753                lru_cache_add_file(newpage);
 754
 755        err = 0;
 756        spin_lock(&cs->fc->lock);
 757        if (cs->req->aborted)
 758                err = -ENOENT;
 759        else
 760                *pagep = newpage;
 761        spin_unlock(&cs->fc->lock);
 762
 763        if (err) {
 764                unlock_page(newpage);
 765                page_cache_release(newpage);
 766                return err;
 767        }
 768
 769        unlock_page(oldpage);
 770        page_cache_release(oldpage);
 771        cs->len = 0;
 772
 773        return 0;
 774
 775out_fallback_unlock:
 776        unlock_page(newpage);
 777out_fallback:
 778        cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
 779        cs->buf = cs->mapaddr + buf->offset;
 780
 781        err = lock_request(cs->fc, cs->req);
 782        if (err)
 783                return err;
 784
 785        return 1;
 786}
 787
 788static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
 789                         unsigned offset, unsigned count)
 790{
 791        struct pipe_buffer *buf;
 792
 793        if (cs->nr_segs == cs->pipe->buffers)
 794                return -EIO;
 795
 796        unlock_request(cs->fc, cs->req);
 797        fuse_copy_finish(cs);
 798
 799        buf = cs->pipebufs;
 800        page_cache_get(page);
 801        buf->page = page;
 802        buf->offset = offset;
 803        buf->len = count;
 804
 805        cs->pipebufs++;
 806        cs->nr_segs++;
 807        cs->len = 0;
 808
 809        return 0;
 810}
 811
 812/*
 813 * Copy a page in the request to/from the userspace buffer.  Must be
 814 * done atomically
 815 */
 816static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 817                          unsigned offset, unsigned count, int zeroing)
 818{
 819        int err;
 820        struct page *page = *pagep;
 821
 822        if (page && zeroing && count < PAGE_SIZE)
 823                clear_highpage(page);
 824
 825        while (count) {
 826                if (cs->write && cs->pipebufs && page) {
 827                        return fuse_ref_page(cs, page, offset, count);
 828                } else if (!cs->len) {
 829                        if (cs->move_pages && page &&
 830                            offset == 0 && count == PAGE_SIZE) {
 831                                err = fuse_try_move_page(cs, pagep);
 832                                if (err <= 0)
 833                                        return err;
 834                        } else {
 835                                err = fuse_copy_fill(cs);
 836                                if (err)
 837                                        return err;
 838                        }
 839                }
 840                if (page) {
 841                        void *mapaddr = kmap_atomic(page);
 842                        void *buf = mapaddr + offset;
 843                        offset += fuse_copy_do(cs, &buf, &count);
 844                        kunmap_atomic(mapaddr);
 845                } else
 846                        offset += fuse_copy_do(cs, NULL, &count);
 847        }
 848        if (page && !cs->write)
 849                flush_dcache_page(page);
 850        return 0;
 851}
 852
 853/* Copy pages in the request to/from userspace buffer */
 854static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
 855                           int zeroing)
 856{
 857        unsigned i;
 858        struct fuse_req *req = cs->req;
 859        unsigned offset = req->page_offset;
 860        unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
 861
 862        for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
 863                int err;
 864
 865                err = fuse_copy_page(cs, &req->pages[i], offset, count,
 866                                     zeroing);
 867                if (err)
 868                        return err;
 869
 870                nbytes -= count;
 871                count = min(nbytes, (unsigned) PAGE_SIZE);
 872                offset = 0;
 873        }
 874        return 0;
 875}
 876
 877/* Copy a single argument in the request to/from userspace buffer */
 878static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
 879{
 880        while (size) {
 881                if (!cs->len) {
 882                        int err = fuse_copy_fill(cs);
 883                        if (err)
 884                                return err;
 885                }
 886                fuse_copy_do(cs, &val, &size);
 887        }
 888        return 0;
 889}
 890
 891/* Copy request arguments to/from userspace buffer */
 892static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
 893                          unsigned argpages, struct fuse_arg *args,
 894                          int zeroing)
 895{
 896        int err = 0;
 897        unsigned i;
 898
 899        for (i = 0; !err && i < numargs; i++)  {
 900                struct fuse_arg *arg = &args[i];
 901                if (i == numargs - 1 && argpages)
 902                        err = fuse_copy_pages(cs, arg->size, zeroing);
 903                else
 904                        err = fuse_copy_one(cs, arg->value, arg->size);
 905        }
 906        return err;
 907}
 908
 909static int forget_pending(struct fuse_conn *fc)
 910{
 911        return fc->forget_list_head.next != NULL;
 912}
 913
 914static int request_pending(struct fuse_conn *fc)
 915{
 916        return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
 917                forget_pending(fc);
 918}
 919
 920/* Wait until a request is available on the pending list */
 921static void request_wait(struct fuse_conn *fc)
 922__releases(fc->lock)
 923__acquires(fc->lock)
 924{
 925        DECLARE_WAITQUEUE(wait, current);
 926
 927        add_wait_queue_exclusive(&fc->waitq, &wait);
 928        while (fc->connected && !request_pending(fc)) {
 929                set_current_state(TASK_INTERRUPTIBLE);
 930                if (signal_pending(current))
 931                        break;
 932
 933                spin_unlock(&fc->lock);
 934                schedule();
 935                spin_lock(&fc->lock);
 936        }
 937        set_current_state(TASK_RUNNING);
 938        remove_wait_queue(&fc->waitq, &wait);
 939}
 940
 941/*
 942 * Transfer an interrupt request to userspace
 943 *
 944 * Unlike other requests this is assembled on demand, without a need
 945 * to allocate a separate fuse_req structure.
 946 *
 947 * Called with fc->lock held, releases it
 948 */
 949static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
 950                               size_t nbytes, struct fuse_req *req)
 951__releases(fc->lock)
 952{
 953        struct fuse_in_header ih;
 954        struct fuse_interrupt_in arg;
 955        unsigned reqsize = sizeof(ih) + sizeof(arg);
 956        int err;
 957
 958        list_del_init(&req->intr_entry);
 959        req->intr_unique = fuse_get_unique(fc);
 960        memset(&ih, 0, sizeof(ih));
 961        memset(&arg, 0, sizeof(arg));
 962        ih.len = reqsize;
 963        ih.opcode = FUSE_INTERRUPT;
 964        ih.unique = req->intr_unique;
 965        arg.unique = req->in.h.unique;
 966
 967        spin_unlock(&fc->lock);
 968        if (nbytes < reqsize)
 969                return -EINVAL;
 970
 971        err = fuse_copy_one(cs, &ih, sizeof(ih));
 972        if (!err)
 973                err = fuse_copy_one(cs, &arg, sizeof(arg));
 974        fuse_copy_finish(cs);
 975
 976        return err ? err : reqsize;
 977}
 978
 979static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
 980                                               unsigned max,
 981                                               unsigned *countp)
 982{
 983        struct fuse_forget_link *head = fc->forget_list_head.next;
 984        struct fuse_forget_link **newhead = &head;
 985        unsigned count;
 986
 987        for (count = 0; *newhead != NULL && count < max; count++)
 988                newhead = &(*newhead)->next;
 989
 990        fc->forget_list_head.next = *newhead;
 991        *newhead = NULL;
 992        if (fc->forget_list_head.next == NULL)
 993                fc->forget_list_tail = &fc->forget_list_head;
 994
 995        if (countp != NULL)
 996                *countp = count;
 997
 998        return head;
 999}
1000
1001static int fuse_read_single_forget(struct fuse_conn *fc,
1002                                   struct fuse_copy_state *cs,
1003                                   size_t nbytes)
1004__releases(fc->lock)
1005{
1006        int err;
1007        struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
1008        struct fuse_forget_in arg = {
1009                .nlookup = forget->forget_one.nlookup,
1010        };
1011        struct fuse_in_header ih = {
1012                .opcode = FUSE_FORGET,
1013                .nodeid = forget->forget_one.nodeid,
1014                .unique = fuse_get_unique(fc),
1015                .len = sizeof(ih) + sizeof(arg),
1016        };
1017
1018        spin_unlock(&fc->lock);
1019        kfree(forget);
1020        if (nbytes < ih.len)
1021                return -EINVAL;
1022
1023        err = fuse_copy_one(cs, &ih, sizeof(ih));
1024        if (!err)
1025                err = fuse_copy_one(cs, &arg, sizeof(arg));
1026        fuse_copy_finish(cs);
1027
1028        if (err)
1029                return err;
1030
1031        return ih.len;
1032}
1033
1034static int fuse_read_batch_forget(struct fuse_conn *fc,
1035                                   struct fuse_copy_state *cs, size_t nbytes)
1036__releases(fc->lock)
1037{
1038        int err;
1039        unsigned max_forgets;
1040        unsigned count;
1041        struct fuse_forget_link *head;
1042        struct fuse_batch_forget_in arg = { .count = 0 };
1043        struct fuse_in_header ih = {
1044                .opcode = FUSE_BATCH_FORGET,
1045                .unique = fuse_get_unique(fc),
1046                .len = sizeof(ih) + sizeof(arg),
1047        };
1048
1049        if (nbytes < ih.len) {
1050                spin_unlock(&fc->lock);
1051                return -EINVAL;
1052        }
1053
1054        max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1055        head = dequeue_forget(fc, max_forgets, &count);
1056        spin_unlock(&fc->lock);
1057
1058        arg.count = count;
1059        ih.len += count * sizeof(struct fuse_forget_one);
1060        err = fuse_copy_one(cs, &ih, sizeof(ih));
1061        if (!err)
1062                err = fuse_copy_one(cs, &arg, sizeof(arg));
1063
1064        while (head) {
1065                struct fuse_forget_link *forget = head;
1066
1067                if (!err) {
1068                        err = fuse_copy_one(cs, &forget->forget_one,
1069                                            sizeof(forget->forget_one));
1070                }
1071                head = forget->next;
1072                kfree(forget);
1073        }
1074
1075        fuse_copy_finish(cs);
1076
1077        if (err)
1078                return err;
1079
1080        return ih.len;
1081}
1082
1083static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
1084                            size_t nbytes)
1085__releases(fc->lock)
1086{
1087        if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
1088                return fuse_read_single_forget(fc, cs, nbytes);
1089        else
1090                return fuse_read_batch_forget(fc, cs, nbytes);
1091}
1092
1093/*
1094 * Read a single request into the userspace filesystem's buffer.  This
1095 * function waits until a request is available, then removes it from
1096 * the pending list and copies request data to userspace buffer.  If
1097 * no reply is needed (FORGET) or request has been aborted or there
1098 * was an error during the copying then it's finished by calling
1099 * request_end().  Otherwise add it to the processing list, and set
1100 * the 'sent' flag.
1101 */
1102static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1103                                struct fuse_copy_state *cs, size_t nbytes)
1104{
1105        int err;
1106        struct fuse_req *req;
1107        struct fuse_in *in;
1108        unsigned reqsize;
1109
1110 restart:
1111        spin_lock(&fc->lock);
1112        err = -EAGAIN;
1113        if ((file->f_flags & O_NONBLOCK) && fc->connected &&
1114            !request_pending(fc))
1115                goto err_unlock;
1116
1117        request_wait(fc);
1118        err = -ENODEV;
1119        if (!fc->connected)
1120                goto err_unlock;
1121        err = -ERESTARTSYS;
1122        if (!request_pending(fc))
1123                goto err_unlock;
1124
1125        if (!list_empty(&fc->interrupts)) {
1126                req = list_entry(fc->interrupts.next, struct fuse_req,
1127                                 intr_entry);
1128                return fuse_read_interrupt(fc, cs, nbytes, req);
1129        }
1130
1131        if (forget_pending(fc)) {
1132                if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
1133                        return fuse_read_forget(fc, cs, nbytes);
1134
1135                if (fc->forget_batch <= -8)
1136                        fc->forget_batch = 16;
1137        }
1138
1139        req = list_entry(fc->pending.next, struct fuse_req, list);
1140        req->state = FUSE_REQ_READING;
1141        list_move(&req->list, &fc->io);
1142
1143        in = &req->in;
1144        reqsize = in->h.len;
1145        /* If request is too large, reply with an error and restart the read */
1146        if (nbytes < reqsize) {
1147                req->out.h.error = -EIO;
1148                /* SETXATTR is special, since it may contain too large data */
1149                if (in->h.opcode == FUSE_SETXATTR)
1150                        req->out.h.error = -E2BIG;
1151                request_end(fc, req);
1152                goto restart;
1153        }
1154        spin_unlock(&fc->lock);
1155        cs->req = req;
1156        err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1157        if (!err)
1158                err = fuse_copy_args(cs, in->numargs, in->argpages,
1159                                     (struct fuse_arg *) in->args, 0);
1160        fuse_copy_finish(cs);
1161        spin_lock(&fc->lock);
1162        req->locked = 0;
1163        if (req->aborted) {
1164                request_end(fc, req);
1165                return -ENODEV;
1166        }
1167        if (err) {
1168                req->out.h.error = -EIO;
1169                request_end(fc, req);
1170                return err;
1171        }
1172        if (!req->isreply)
1173                request_end(fc, req);
1174        else {
1175                req->state = FUSE_REQ_SENT;
1176                list_move_tail(&req->list, &fc->processing);
1177                if (req->interrupted)
1178                        queue_interrupt(fc, req);
1179                spin_unlock(&fc->lock);
1180        }
1181        return reqsize;
1182
1183 err_unlock:
1184        spin_unlock(&fc->lock);
1185        return err;
1186}
1187
1188static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1189                              unsigned long nr_segs, loff_t pos)
1190{
1191        struct fuse_copy_state cs;
1192        struct file *file = iocb->ki_filp;
1193        struct fuse_conn *fc = fuse_get_conn(file);
1194        if (!fc)
1195                return -EPERM;
1196
1197        fuse_copy_init(&cs, fc, 1, iov, nr_segs);
1198
1199        return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
1200}
1201
1202static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
1203                                   struct pipe_buffer *buf)
1204{
1205        return 1;
1206}
1207
1208static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
1209        .can_merge = 0,
1210        .map = generic_pipe_buf_map,
1211        .unmap = generic_pipe_buf_unmap,
1212        .confirm = generic_pipe_buf_confirm,
1213        .release = generic_pipe_buf_release,
1214        .steal = fuse_dev_pipe_buf_steal,
1215        .get = generic_pipe_buf_get,
1216};
1217
1218static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1219                                    struct pipe_inode_info *pipe,
1220                                    size_t len, unsigned int flags)
1221{
1222        int ret;
1223        int page_nr = 0;
1224        int do_wakeup = 0;
1225        struct pipe_buffer *bufs;
1226        struct fuse_copy_state cs;
1227        struct fuse_conn *fc = fuse_get_conn(in);
1228        if (!fc)
1229                return -EPERM;
1230
1231        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1232        if (!bufs)
1233                return -ENOMEM;
1234
1235        fuse_copy_init(&cs, fc, 1, NULL, 0);
1236        cs.pipebufs = bufs;
1237        cs.pipe = pipe;
1238        ret = fuse_dev_do_read(fc, in, &cs, len);
1239        if (ret < 0)
1240                goto out;
1241
1242        ret = 0;
1243        pipe_lock(pipe);
1244
1245        if (!pipe->readers) {
1246                send_sig(SIGPIPE, current, 0);
1247                if (!ret)
1248                        ret = -EPIPE;
1249                goto out_unlock;
1250        }
1251
1252        if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1253                ret = -EIO;
1254                goto out_unlock;
1255        }
1256
1257        while (page_nr < cs.nr_segs) {
1258                int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1259                struct pipe_buffer *buf = pipe->bufs + newbuf;
1260
1261                buf->page = bufs[page_nr].page;
1262                buf->offset = bufs[page_nr].offset;
1263                buf->len = bufs[page_nr].len;
1264                buf->ops = &fuse_dev_pipe_buf_ops;
1265
1266                pipe->nrbufs++;
1267                page_nr++;
1268                ret += buf->len;
1269
1270                if (pipe->inode)
1271                        do_wakeup = 1;
1272        }
1273
1274out_unlock:
1275        pipe_unlock(pipe);
1276
1277        if (do_wakeup) {
1278                smp_mb();
1279                if (waitqueue_active(&pipe->wait))
1280                        wake_up_interruptible(&pipe->wait);
1281                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1282        }
1283
1284out:
1285        for (; page_nr < cs.nr_segs; page_nr++)
1286                page_cache_release(bufs[page_nr].page);
1287
1288        kfree(bufs);
1289        return ret;
1290}
1291
1292static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1293                            struct fuse_copy_state *cs)
1294{
1295        struct fuse_notify_poll_wakeup_out outarg;
1296        int err = -EINVAL;
1297
1298        if (size != sizeof(outarg))
1299                goto err;
1300
1301        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1302        if (err)
1303                goto err;
1304
1305        fuse_copy_finish(cs);
1306        return fuse_notify_poll_wakeup(fc, &outarg);
1307
1308err:
1309        fuse_copy_finish(cs);
1310        return err;
1311}
1312
1313static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1314                                   struct fuse_copy_state *cs)
1315{
1316        struct fuse_notify_inval_inode_out outarg;
1317        int err = -EINVAL;
1318
1319        if (size != sizeof(outarg))
1320                goto err;
1321
1322        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1323        if (err)
1324                goto err;
1325        fuse_copy_finish(cs);
1326
1327        down_read(&fc->killsb);
1328        err = -ENOENT;
1329        if (fc->sb) {
1330                err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1331                                               outarg.off, outarg.len);
1332        }
1333        up_read(&fc->killsb);
1334        return err;
1335
1336err:
1337        fuse_copy_finish(cs);
1338        return err;
1339}
1340
1341static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1342                                   struct fuse_copy_state *cs)
1343{
1344        struct fuse_notify_inval_entry_out outarg;
1345        int err = -ENOMEM;
1346        char *buf;
1347        struct qstr name;
1348
1349        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1350        if (!buf)
1351                goto err;
1352
1353        err = -EINVAL;
1354        if (size < sizeof(outarg))
1355                goto err;
1356
1357        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1358        if (err)
1359                goto err;
1360
1361        err = -ENAMETOOLONG;
1362        if (outarg.namelen > FUSE_NAME_MAX)
1363                goto err;
1364
1365        err = -EINVAL;
1366        if (size != sizeof(outarg) + outarg.namelen + 1)
1367                goto err;
1368
1369        name.name = buf;
1370        name.len = outarg.namelen;
1371        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1372        if (err)
1373                goto err;
1374        fuse_copy_finish(cs);
1375        buf[outarg.namelen] = 0;
1376        name.hash = full_name_hash(name.name, name.len);
1377
1378        down_read(&fc->killsb);
1379        err = -ENOENT;
1380        if (fc->sb)
1381                err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1382        up_read(&fc->killsb);
1383        kfree(buf);
1384        return err;
1385
1386err:
1387        kfree(buf);
1388        fuse_copy_finish(cs);
1389        return err;
1390}
1391
1392static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1393                              struct fuse_copy_state *cs)
1394{
1395        struct fuse_notify_delete_out outarg;
1396        int err = -ENOMEM;
1397        char *buf;
1398        struct qstr name;
1399
1400        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1401        if (!buf)
1402                goto err;
1403
1404        err = -EINVAL;
1405        if (size < sizeof(outarg))
1406                goto err;
1407
1408        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1409        if (err)
1410                goto err;
1411
1412        err = -ENAMETOOLONG;
1413        if (outarg.namelen > FUSE_NAME_MAX)
1414                goto err;
1415
1416        err = -EINVAL;
1417        if (size != sizeof(outarg) + outarg.namelen + 1)
1418                goto err;
1419
1420        name.name = buf;
1421        name.len = outarg.namelen;
1422        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1423        if (err)
1424                goto err;
1425        fuse_copy_finish(cs);
1426        buf[outarg.namelen] = 0;
1427        name.hash = full_name_hash(name.name, name.len);
1428
1429        down_read(&fc->killsb);
1430        err = -ENOENT;
1431        if (fc->sb)
1432                err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1433                                               outarg.child, &name);
1434        up_read(&fc->killsb);
1435        kfree(buf);
1436        return err;
1437
1438err:
1439        kfree(buf);
1440        fuse_copy_finish(cs);
1441        return err;
1442}
1443
1444static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1445                             struct fuse_copy_state *cs)
1446{
1447        struct fuse_notify_store_out outarg;
1448        struct inode *inode;
1449        struct address_space *mapping;
1450        u64 nodeid;
1451        int err;
1452        pgoff_t index;
1453        unsigned int offset;
1454        unsigned int num;
1455        loff_t file_size;
1456        loff_t end;
1457
1458        err = -EINVAL;
1459        if (size < sizeof(outarg))
1460                goto out_finish;
1461
1462        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1463        if (err)
1464                goto out_finish;
1465
1466        err = -EINVAL;
1467        if (size - sizeof(outarg) != outarg.size)
1468                goto out_finish;
1469
1470        nodeid = outarg.nodeid;
1471
1472        down_read(&fc->killsb);
1473
1474        err = -ENOENT;
1475        if (!fc->sb)
1476                goto out_up_killsb;
1477
1478        inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1479        if (!inode)
1480                goto out_up_killsb;
1481
1482        mapping = inode->i_mapping;
1483        index = outarg.offset >> PAGE_CACHE_SHIFT;
1484        offset = outarg.offset & ~PAGE_CACHE_MASK;
1485        file_size = i_size_read(inode);
1486        end = outarg.offset + outarg.size;
1487        if (end > file_size) {
1488                file_size = end;
1489                fuse_write_update_size(inode, file_size);
1490        }
1491
1492        num = outarg.size;
1493        while (num) {
1494                struct page *page;
1495                unsigned int this_num;
1496
1497                err = -ENOMEM;
1498                page = find_or_create_page(mapping, index,
1499                                           mapping_gfp_mask(mapping));
1500                if (!page)
1501                        goto out_iput;
1502
1503                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1504                err = fuse_copy_page(cs, &page, offset, this_num, 0);
1505                if (!err && offset == 0 && (num != 0 || file_size == end))
1506                        SetPageUptodate(page);
1507                unlock_page(page);
1508                page_cache_release(page);
1509
1510                if (err)
1511                        goto out_iput;
1512
1513                num -= this_num;
1514                offset = 0;
1515                index++;
1516        }
1517
1518        err = 0;
1519
1520out_iput:
1521        iput(inode);
1522out_up_killsb:
1523        up_read(&fc->killsb);
1524out_finish:
1525        fuse_copy_finish(cs);
1526        return err;
1527}
1528
1529static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1530{
1531        release_pages(req->pages, req->num_pages, 0);
1532}
1533
1534static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1535                         struct fuse_notify_retrieve_out *outarg)
1536{
1537        int err;
1538        struct address_space *mapping = inode->i_mapping;
1539        struct fuse_req *req;
1540        pgoff_t index;
1541        loff_t file_size;
1542        unsigned int num;
1543        unsigned int offset;
1544        size_t total_len = 0;
1545
1546        req = fuse_get_req(fc);
1547        if (IS_ERR(req))
1548                return PTR_ERR(req);
1549
1550        offset = outarg->offset & ~PAGE_CACHE_MASK;
1551
1552        req->in.h.opcode = FUSE_NOTIFY_REPLY;
1553        req->in.h.nodeid = outarg->nodeid;
1554        req->in.numargs = 2;
1555        req->in.argpages = 1;
1556        req->page_offset = offset;
1557        req->end = fuse_retrieve_end;
1558
1559        index = outarg->offset >> PAGE_CACHE_SHIFT;
1560        file_size = i_size_read(inode);
1561        num = outarg->size;
1562        if (outarg->offset > file_size)
1563                num = 0;
1564        else if (outarg->offset + num > file_size)
1565                num = file_size - outarg->offset;
1566
1567        while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
1568                struct page *page;
1569                unsigned int this_num;
1570
1571                page = find_get_page(mapping, index);
1572                if (!page)
1573                        break;
1574
1575                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1576                req->pages[req->num_pages] = page;
1577                req->num_pages++;
1578
1579                offset = 0;
1580                num -= this_num;
1581                total_len += this_num;
1582                index++;
1583        }
1584        req->misc.retrieve_in.offset = outarg->offset;
1585        req->misc.retrieve_in.size = total_len;
1586        req->in.args[0].size = sizeof(req->misc.retrieve_in);
1587        req->in.args[0].value = &req->misc.retrieve_in;
1588        req->in.args[1].size = total_len;
1589
1590        err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1591        if (err)
1592                fuse_retrieve_end(fc, req);
1593
1594        return err;
1595}
1596
1597static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1598                                struct fuse_copy_state *cs)
1599{
1600        struct fuse_notify_retrieve_out outarg;
1601        struct inode *inode;
1602        int err;
1603
1604        err = -EINVAL;
1605        if (size != sizeof(outarg))
1606                goto copy_finish;
1607
1608        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1609        if (err)
1610                goto copy_finish;
1611
1612        fuse_copy_finish(cs);
1613
1614        down_read(&fc->killsb);
1615        err = -ENOENT;
1616        if (fc->sb) {
1617                u64 nodeid = outarg.nodeid;
1618
1619                inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1620                if (inode) {
1621                        err = fuse_retrieve(fc, inode, &outarg);
1622                        iput(inode);
1623                }
1624        }
1625        up_read(&fc->killsb);
1626
1627        return err;
1628
1629copy_finish:
1630        fuse_copy_finish(cs);
1631        return err;
1632}
1633
1634static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1635                       unsigned int size, struct fuse_copy_state *cs)
1636{
1637        switch (code) {
1638        case FUSE_NOTIFY_POLL:
1639                return fuse_notify_poll(fc, size, cs);
1640
1641        case FUSE_NOTIFY_INVAL_INODE:
1642                return fuse_notify_inval_inode(fc, size, cs);
1643
1644        case FUSE_NOTIFY_INVAL_ENTRY:
1645                return fuse_notify_inval_entry(fc, size, cs);
1646
1647        case FUSE_NOTIFY_STORE:
1648                return fuse_notify_store(fc, size, cs);
1649
1650        case FUSE_NOTIFY_RETRIEVE:
1651                return fuse_notify_retrieve(fc, size, cs);
1652
1653        case FUSE_NOTIFY_DELETE:
1654                return fuse_notify_delete(fc, size, cs);
1655
1656        default:
1657                fuse_copy_finish(cs);
1658                return -EINVAL;
1659        }
1660}
1661
1662/* Look up request on processing list by unique ID */
1663static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
1664{
1665        struct list_head *entry;
1666
1667        list_for_each(entry, &fc->processing) {
1668                struct fuse_req *req;
1669                req = list_entry(entry, struct fuse_req, list);
1670                if (req->in.h.unique == unique || req->intr_unique == unique)
1671                        return req;
1672        }
1673        return NULL;
1674}
1675
1676static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1677                         unsigned nbytes)
1678{
1679        unsigned reqsize = sizeof(struct fuse_out_header);
1680
1681        if (out->h.error)
1682                return nbytes != reqsize ? -EINVAL : 0;
1683
1684        reqsize += len_args(out->numargs, out->args);
1685
1686        if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1687                return -EINVAL;
1688        else if (reqsize > nbytes) {
1689                struct fuse_arg *lastarg = &out->args[out->numargs-1];
1690                unsigned diffsize = reqsize - nbytes;
1691                if (diffsize > lastarg->size)
1692                        return -EINVAL;
1693                lastarg->size -= diffsize;
1694        }
1695        return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1696                              out->page_zeroing);
1697}
1698
1699/*
1700 * Write a single reply to a request.  First the header is copied from
1701 * the write buffer.  The request is then searched on the processing
1702 * list by the unique ID found in the header.  If found, then remove
1703 * it from the list and copy the rest of the buffer to the request.
1704 * The request is finished by calling request_end()
1705 */
1706static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1707                                 struct fuse_copy_state *cs, size_t nbytes)
1708{
1709        int err;
1710        struct fuse_req *req;
1711        struct fuse_out_header oh;
1712
1713        if (nbytes < sizeof(struct fuse_out_header))
1714                return -EINVAL;
1715
1716        err = fuse_copy_one(cs, &oh, sizeof(oh));
1717        if (err)
1718                goto err_finish;
1719
1720        err = -EINVAL;
1721        if (oh.len != nbytes)
1722                goto err_finish;
1723
1724        /*
1725         * Zero oh.unique indicates unsolicited notification message
1726         * and error contains notification code.
1727         */
1728        if (!oh.unique) {
1729                err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1730                return err ? err : nbytes;
1731        }
1732
1733        err = -EINVAL;
1734        if (oh.error <= -1000 || oh.error > 0)
1735                goto err_finish;
1736
1737        spin_lock(&fc->lock);
1738        err = -ENOENT;
1739        if (!fc->connected)
1740                goto err_unlock;
1741
1742        req = request_find(fc, oh.unique);
1743        if (!req)
1744                goto err_unlock;
1745
1746        if (req->aborted) {
1747                spin_unlock(&fc->lock);
1748                fuse_copy_finish(cs);
1749                spin_lock(&fc->lock);
1750                request_end(fc, req);
1751                return -ENOENT;
1752        }
1753        /* Is it an interrupt reply? */
1754        if (req->intr_unique == oh.unique) {
1755                err = -EINVAL;
1756                if (nbytes != sizeof(struct fuse_out_header))
1757                        goto err_unlock;
1758
1759                if (oh.error == -ENOSYS)
1760                        fc->no_interrupt = 1;
1761                else if (oh.error == -EAGAIN)
1762                        queue_interrupt(fc, req);
1763
1764                spin_unlock(&fc->lock);
1765                fuse_copy_finish(cs);
1766                return nbytes;
1767        }
1768
1769        req->state = FUSE_REQ_WRITING;
1770        list_move(&req->list, &fc->io);
1771        req->out.h = oh;
1772        req->locked = 1;
1773        cs->req = req;
1774        if (!req->out.page_replace)
1775                cs->move_pages = 0;
1776        spin_unlock(&fc->lock);
1777
1778        err = copy_out_args(cs, &req->out, nbytes);
1779        fuse_copy_finish(cs);
1780
1781        spin_lock(&fc->lock);
1782        req->locked = 0;
1783        if (!err) {
1784                if (req->aborted)
1785                        err = -ENOENT;
1786        } else if (!req->aborted)
1787                req->out.h.error = -EIO;
1788        request_end(fc, req);
1789
1790        return err ? err : nbytes;
1791
1792 err_unlock:
1793        spin_unlock(&fc->lock);
1794 err_finish:
1795        fuse_copy_finish(cs);
1796        return err;
1797}
1798
1799static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1800                              unsigned long nr_segs, loff_t pos)
1801{
1802        struct fuse_copy_state cs;
1803        struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1804        if (!fc)
1805                return -EPERM;
1806
1807        fuse_copy_init(&cs, fc, 0, iov, nr_segs);
1808
1809        return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1810}
1811
1812static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1813                                     struct file *out, loff_t *ppos,
1814                                     size_t len, unsigned int flags)
1815{
1816        unsigned nbuf;
1817        unsigned idx;
1818        struct pipe_buffer *bufs;
1819        struct fuse_copy_state cs;
1820        struct fuse_conn *fc;
1821        size_t rem;
1822        ssize_t ret;
1823
1824        fc = fuse_get_conn(out);
1825        if (!fc)
1826                return -EPERM;
1827
1828        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1829        if (!bufs)
1830                return -ENOMEM;
1831
1832        pipe_lock(pipe);
1833        nbuf = 0;
1834        rem = 0;
1835        for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1836                rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1837
1838        ret = -EINVAL;
1839        if (rem < len) {
1840                pipe_unlock(pipe);
1841                goto out;
1842        }
1843
1844        rem = len;
1845        while (rem) {
1846                struct pipe_buffer *ibuf;
1847                struct pipe_buffer *obuf;
1848
1849                BUG_ON(nbuf >= pipe->buffers);
1850                BUG_ON(!pipe->nrbufs);
1851                ibuf = &pipe->bufs[pipe->curbuf];
1852                obuf = &bufs[nbuf];
1853
1854                if (rem >= ibuf->len) {
1855                        *obuf = *ibuf;
1856                        ibuf->ops = NULL;
1857                        pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1858                        pipe->nrbufs--;
1859                } else {
1860                        ibuf->ops->get(pipe, ibuf);
1861                        *obuf = *ibuf;
1862                        obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1863                        obuf->len = rem;
1864                        ibuf->offset += obuf->len;
1865                        ibuf->len -= obuf->len;
1866                }
1867                nbuf++;
1868                rem -= obuf->len;
1869        }
1870        pipe_unlock(pipe);
1871
1872        fuse_copy_init(&cs, fc, 0, NULL, nbuf);
1873        cs.pipebufs = bufs;
1874        cs.pipe = pipe;
1875
1876        if (flags & SPLICE_F_MOVE)
1877                cs.move_pages = 1;
1878
1879        ret = fuse_dev_do_write(fc, &cs, len);
1880
1881        for (idx = 0; idx < nbuf; idx++) {
1882                struct pipe_buffer *buf = &bufs[idx];
1883                buf->ops->release(pipe, buf);
1884        }
1885out:
1886        kfree(bufs);
1887        return ret;
1888}
1889
1890static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1891{
1892        unsigned mask = POLLOUT | POLLWRNORM;
1893        struct fuse_conn *fc = fuse_get_conn(file);
1894        if (!fc)
1895                return POLLERR;
1896
1897        poll_wait(file, &fc->waitq, wait);
1898
1899        spin_lock(&fc->lock);
1900        if (!fc->connected)
1901                mask = POLLERR;
1902        else if (request_pending(fc))
1903                mask |= POLLIN | POLLRDNORM;
1904        spin_unlock(&fc->lock);
1905
1906        return mask;
1907}
1908
1909/*
1910 * Abort all requests on the given list (pending or processing)
1911 *
1912 * This function releases and reacquires fc->lock
1913 */
1914static void end_requests(struct fuse_conn *fc, struct list_head *head)
1915__releases(fc->lock)
1916__acquires(fc->lock)
1917{
1918        while (!list_empty(head)) {
1919                struct fuse_req *req;
1920                req = list_entry(head->next, struct fuse_req, list);
1921                req->out.h.error = -ECONNABORTED;
1922                request_end(fc, req);
1923                spin_lock(&fc->lock);
1924        }
1925}
1926
1927/*
1928 * Abort requests under I/O
1929 *
1930 * The requests are set to aborted and finished, and the request
1931 * waiter is woken up.  This will make request_wait_answer() wait
1932 * until the request is unlocked and then return.
1933 *
1934 * If the request is asynchronous, then the end function needs to be
1935 * called after waiting for the request to be unlocked (if it was
1936 * locked).
1937 */
1938static void end_io_requests(struct fuse_conn *fc)
1939__releases(fc->lock)
1940__acquires(fc->lock)
1941{
1942        while (!list_empty(&fc->io)) {
1943                struct fuse_req *req =
1944                        list_entry(fc->io.next, struct fuse_req, list);
1945                void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
1946
1947                req->aborted = 1;
1948                req->out.h.error = -ECONNABORTED;
1949                req->state = FUSE_REQ_FINISHED;
1950                list_del_init(&req->list);
1951                wake_up(&req->waitq);
1952                if (end) {
1953                        req->end = NULL;
1954                        __fuse_get_request(req);
1955                        spin_unlock(&fc->lock);
1956                        wait_event(req->waitq, !req->locked);
1957                        end(fc, req);
1958                        fuse_put_request(fc, req);
1959                        spin_lock(&fc->lock);
1960                }
1961        }
1962}
1963
1964static void end_queued_requests(struct fuse_conn *fc)
1965__releases(fc->lock)
1966__acquires(fc->lock)
1967{
1968        fc->max_background = UINT_MAX;
1969        flush_bg_queue(fc);
1970        end_requests(fc, &fc->pending);
1971        end_requests(fc, &fc->processing);
1972        while (forget_pending(fc))
1973                kfree(dequeue_forget(fc, 1, NULL));
1974}
1975
1976static void end_polls(struct fuse_conn *fc)
1977{
1978        struct rb_node *p;
1979
1980        p = rb_first(&fc->polled_files);
1981
1982        while (p) {
1983                struct fuse_file *ff;
1984                ff = rb_entry(p, struct fuse_file, polled_node);
1985                wake_up_interruptible_all(&ff->poll_wait);
1986
1987                p = rb_next(p);
1988        }
1989}
1990
1991/*
1992 * Abort all requests.
1993 *
1994 * Emergency exit in case of a malicious or accidental deadlock, or
1995 * just a hung filesystem.
1996 *
1997 * The same effect is usually achievable through killing the
1998 * filesystem daemon and all users of the filesystem.  The exception
1999 * is the combination of an asynchronous request and the tricky
2000 * deadlock (see Documentation/filesystems/fuse.txt).
2001 *
2002 * During the aborting, progression of requests from the pending and
2003 * processing lists onto the io list, and progression of new requests
2004 * onto the pending list is prevented by req->connected being false.
2005 *
2006 * Progression of requests under I/O to the processing list is
2007 * prevented by the req->aborted flag being true for these requests.
2008 * For this reason requests on the io list must be aborted first.
2009 */
2010void fuse_abort_conn(struct fuse_conn *fc)
2011{
2012        spin_lock(&fc->lock);
2013        if (fc->connected) {
2014                fc->connected = 0;
2015                fc->blocked = 0;
2016                end_io_requests(fc);
2017                end_queued_requests(fc);
2018                end_polls(fc);
2019                wake_up_all(&fc->waitq);
2020                wake_up_all(&fc->blocked_waitq);
2021                kill_fasync(&fc->fasync, SIGIO, POLL_IN);
2022        }
2023        spin_unlock(&fc->lock);
2024}
2025EXPORT_SYMBOL_GPL(fuse_abort_conn);
2026
2027int fuse_dev_release(struct inode *inode, struct file *file)
2028{
2029        struct fuse_conn *fc = fuse_get_conn(file);
2030        if (fc) {
2031                spin_lock(&fc->lock);
2032                fc->connected = 0;
2033                fc->blocked = 0;
2034                end_queued_requests(fc);
2035                end_polls(fc);
2036                wake_up_all(&fc->blocked_waitq);
2037                spin_unlock(&fc->lock);
2038                fuse_conn_put(fc);
2039        }
2040
2041        return 0;
2042}
2043EXPORT_SYMBOL_GPL(fuse_dev_release);
2044
2045static int fuse_dev_fasync(int fd, struct file *file, int on)
2046{
2047        struct fuse_conn *fc = fuse_get_conn(file);
2048        if (!fc)
2049                return -EPERM;
2050
2051        /* No locking - fasync_helper does its own locking */
2052        return fasync_helper(fd, file, on, &fc->fasync);
2053}
2054
2055const struct file_operations fuse_dev_operations = {
2056        .owner          = THIS_MODULE,
2057        .llseek         = no_llseek,
2058        .read           = do_sync_read,
2059        .aio_read       = fuse_dev_read,
2060        .splice_read    = fuse_dev_splice_read,
2061        .write          = do_sync_write,
2062        .aio_write      = fuse_dev_write,
2063        .splice_write   = fuse_dev_splice_write,
2064        .poll           = fuse_dev_poll,
2065        .release        = fuse_dev_release,
2066        .fasync         = fuse_dev_fasync,
2067};
2068EXPORT_SYMBOL_GPL(fuse_dev_operations);
2069
2070static struct miscdevice fuse_miscdevice = {
2071        .minor = FUSE_MINOR,
2072        .name  = "fuse",
2073        .fops = &fuse_dev_operations,
2074};
2075
2076int __init fuse_dev_init(void)
2077{
2078        int err = -ENOMEM;
2079        fuse_req_cachep = kmem_cache_create("fuse_request",
2080                                            sizeof(struct fuse_req),
2081                                            0, 0, NULL);
2082        if (!fuse_req_cachep)
2083                goto out;
2084
2085        err = misc_register(&fuse_miscdevice);
2086        if (err)
2087                goto out_cache_clean;
2088
2089        return 0;
2090
2091 out_cache_clean:
2092        kmem_cache_destroy(fuse_req_cachep);
2093 out:
2094        return err;
2095}
2096
2097void fuse_dev_cleanup(void)
2098{
2099        misc_deregister(&fuse_miscdevice);
2100        kmem_cache_destroy(fuse_req_cachep);
2101}
2102
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.