linux/fs/fuse/dev.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9#include "fuse_i.h"
  10
  11#include <linux/init.h>
  12#include <linux/module.h>
  13#include <linux/poll.h>
  14#include <linux/uio.h>
  15#include <linux/miscdevice.h>
  16#include <linux/pagemap.h>
  17#include <linux/file.h>
  18#include <linux/slab.h>
  19#include <linux/pipe_fs_i.h>
  20#include <linux/swap.h>
  21#include <linux/splice.h>
  22
  23MODULE_ALIAS_MISCDEV(FUSE_MINOR);
  24MODULE_ALIAS("devname:fuse");
  25
  26static struct kmem_cache *fuse_req_cachep;
  27
  28static struct fuse_conn *fuse_get_conn(struct file *file)
  29{
  30        /*
  31         * Lockless access is OK, because file->private data is set
  32         * once during mount and is valid until the file is released.
  33         */
  34        return file->private_data;
  35}
  36
  37static void fuse_request_init(struct fuse_req *req)
  38{
  39        memset(req, 0, sizeof(*req));
  40        INIT_LIST_HEAD(&req->list);
  41        INIT_LIST_HEAD(&req->intr_entry);
  42        init_waitqueue_head(&req->waitq);
  43        atomic_set(&req->count, 1);
  44}
  45
  46struct fuse_req *fuse_request_alloc(void)
  47{
  48        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
  49        if (req)
  50                fuse_request_init(req);
  51        return req;
  52}
  53EXPORT_SYMBOL_GPL(fuse_request_alloc);
  54
  55struct fuse_req *fuse_request_alloc_nofs(void)
  56{
  57        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
  58        if (req)
  59                fuse_request_init(req);
  60        return req;
  61}
  62
  63void fuse_request_free(struct fuse_req *req)
  64{
  65        kmem_cache_free(fuse_req_cachep, req);
  66}
  67
  68static void block_sigs(sigset_t *oldset)
  69{
  70        sigset_t mask;
  71
  72        siginitsetinv(&mask, sigmask(SIGKILL));
  73        sigprocmask(SIG_BLOCK, &mask, oldset);
  74}
  75
  76static void restore_sigs(sigset_t *oldset)
  77{
  78        sigprocmask(SIG_SETMASK, oldset, NULL);
  79}
  80
  81static void __fuse_get_request(struct fuse_req *req)
  82{
  83        atomic_inc(&req->count);
  84}
  85
  86/* Must be called with > 1 refcount */
  87static void __fuse_put_request(struct fuse_req *req)
  88{
  89        BUG_ON(atomic_read(&req->count) < 2);
  90        atomic_dec(&req->count);
  91}
  92
  93static void fuse_req_init_context(struct fuse_req *req)
  94{
  95        req->in.h.uid = current_fsuid();
  96        req->in.h.gid = current_fsgid();
  97        req->in.h.pid = current->pid;
  98}
  99
 100struct fuse_req *fuse_get_req(struct fuse_conn *fc)
 101{
 102        struct fuse_req *req;
 103        sigset_t oldset;
 104        int intr;
 105        int err;
 106
 107        atomic_inc(&fc->num_waiting);
 108        block_sigs(&oldset);
 109        intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
 110        restore_sigs(&oldset);
 111        err = -EINTR;
 112        if (intr)
 113                goto out;
 114
 115        err = -ENOTCONN;
 116        if (!fc->connected)
 117                goto out;
 118
 119        req = fuse_request_alloc();
 120        err = -ENOMEM;
 121        if (!req)
 122                goto out;
 123
 124        fuse_req_init_context(req);
 125        req->waiting = 1;
 126        return req;
 127
 128 out:
 129        atomic_dec(&fc->num_waiting);
 130        return ERR_PTR(err);
 131}
 132EXPORT_SYMBOL_GPL(fuse_get_req);
 133
 134/*
 135 * Return request in fuse_file->reserved_req.  However that may
 136 * currently be in use.  If that is the case, wait for it to become
 137 * available.
 138 */
 139static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
 140                                         struct file *file)
 141{
 142        struct fuse_req *req = NULL;
 143        struct fuse_file *ff = file->private_data;
 144
 145        do {
 146                wait_event(fc->reserved_req_waitq, ff->reserved_req);
 147                spin_lock(&fc->lock);
 148                if (ff->reserved_req) {
 149                        req = ff->reserved_req;
 150                        ff->reserved_req = NULL;
 151                        req->stolen_file = get_file(file);
 152                }
 153                spin_unlock(&fc->lock);
 154        } while (!req);
 155
 156        return req;
 157}
 158
 159/*
 160 * Put stolen request back into fuse_file->reserved_req
 161 */
 162static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
 163{
 164        struct file *file = req->stolen_file;
 165        struct fuse_file *ff = file->private_data;
 166
 167        spin_lock(&fc->lock);
 168        fuse_request_init(req);
 169        BUG_ON(ff->reserved_req);
 170        ff->reserved_req = req;
 171        wake_up_all(&fc->reserved_req_waitq);
 172        spin_unlock(&fc->lock);
 173        fput(file);
 174}
 175
 176/*
 177 * Gets a requests for a file operation, always succeeds
 178 *
 179 * This is used for sending the FLUSH request, which must get to
 180 * userspace, due to POSIX locks which may need to be unlocked.
 181 *
 182 * If allocation fails due to OOM, use the reserved request in
 183 * fuse_file.
 184 *
 185 * This is very unlikely to deadlock accidentally, since the
 186 * filesystem should not have it's own file open.  If deadlock is
 187 * intentional, it can still be broken by "aborting" the filesystem.
 188 */
 189struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
 190{
 191        struct fuse_req *req;
 192
 193        atomic_inc(&fc->num_waiting);
 194        wait_event(fc->blocked_waitq, !fc->blocked);
 195        req = fuse_request_alloc();
 196        if (!req)
 197                req = get_reserved_req(fc, file);
 198
 199        fuse_req_init_context(req);
 200        req->waiting = 1;
 201        return req;
 202}
 203
 204void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 205{
 206        if (atomic_dec_and_test(&req->count)) {
 207                if (req->waiting)
 208                        atomic_dec(&fc->num_waiting);
 209
 210                if (req->stolen_file)
 211                        put_reserved_req(fc, req);
 212                else
 213                        fuse_request_free(req);
 214        }
 215}
 216EXPORT_SYMBOL_GPL(fuse_put_request);
 217
 218static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 219{
 220        unsigned nbytes = 0;
 221        unsigned i;
 222
 223        for (i = 0; i < numargs; i++)
 224                nbytes += args[i].size;
 225
 226        return nbytes;
 227}
 228
 229static u64 fuse_get_unique(struct fuse_conn *fc)
 230{
 231        fc->reqctr++;
 232        /* zero is special */
 233        if (fc->reqctr == 0)
 234                fc->reqctr = 1;
 235
 236        return fc->reqctr;
 237}
 238
 239static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 240{
 241        req->in.h.len = sizeof(struct fuse_in_header) +
 242                len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
 243        list_add_tail(&req->list, &fc->pending);
 244        req->state = FUSE_REQ_PENDING;
 245        if (!req->waiting) {
 246                req->waiting = 1;
 247                atomic_inc(&fc->num_waiting);
 248        }
 249        wake_up(&fc->waitq);
 250        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 251}
 252
 253void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 254                       u64 nodeid, u64 nlookup)
 255{
 256        forget->forget_one.nodeid = nodeid;
 257        forget->forget_one.nlookup = nlookup;
 258
 259        spin_lock(&fc->lock);
 260        if (fc->connected) {
 261                fc->forget_list_tail->next = forget;
 262                fc->forget_list_tail = forget;
 263                wake_up(&fc->waitq);
 264                kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 265        } else {
 266                kfree(forget);
 267        }
 268        spin_unlock(&fc->lock);
 269}
 270
 271static void flush_bg_queue(struct fuse_conn *fc)
 272{
 273        while (fc->active_background < fc->max_background &&
 274               !list_empty(&fc->bg_queue)) {
 275                struct fuse_req *req;
 276
 277                req = list_entry(fc->bg_queue.next, struct fuse_req, list);
 278                list_del(&req->list);
 279                fc->active_background++;
 280                req->in.h.unique = fuse_get_unique(fc);
 281                queue_request(fc, req);
 282        }
 283}
 284
 285/*
 286 * This function is called when a request is finished.  Either a reply
 287 * has arrived or it was aborted (and not yet sent) or some error
 288 * occurred during communication with userspace, or the device file
 289 * was closed.  The requester thread is woken up (if still waiting),
 290 * the 'end' callback is called if given, else the reference to the
 291 * request is released
 292 *
 293 * Called with fc->lock, unlocks it
 294 */
 295static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 296__releases(fc->lock)
 297{
 298        void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
 299        req->end = NULL;
 300        list_del(&req->list);
 301        list_del(&req->intr_entry);
 302        req->state = FUSE_REQ_FINISHED;
 303        if (req->background) {
 304                if (fc->num_background == fc->max_background) {
 305                        fc->blocked = 0;
 306                        wake_up_all(&fc->blocked_waitq);
 307                }
 308                if (fc->num_background == fc->congestion_threshold &&
 309                    fc->connected && fc->bdi_initialized) {
 310                        clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 311                        clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 312                }
 313                fc->num_background--;
 314                fc->active_background--;
 315                flush_bg_queue(fc);
 316        }
 317        spin_unlock(&fc->lock);
 318        wake_up(&req->waitq);
 319        if (end)
 320                end(fc, req);
 321        fuse_put_request(fc, req);
 322}
 323
 324static void wait_answer_interruptible(struct fuse_conn *fc,
 325                                      struct fuse_req *req)
 326__releases(fc->lock)
 327__acquires(fc->lock)
 328{
 329        if (signal_pending(current))
 330                return;
 331
 332        spin_unlock(&fc->lock);
 333        wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
 334        spin_lock(&fc->lock);
 335}
 336
 337static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
 338{
 339        list_add_tail(&req->intr_entry, &fc->interrupts);
 340        wake_up(&fc->waitq);
 341        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 342}
 343
 344static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
 345__releases(fc->lock)
 346__acquires(fc->lock)
 347{
 348        if (!fc->no_interrupt) {
 349                /* Any signal may interrupt this */
 350                wait_answer_interruptible(fc, req);
 351
 352                if (req->aborted)
 353                        goto aborted;
 354                if (req->state == FUSE_REQ_FINISHED)
 355                        return;
 356
 357                req->interrupted = 1;
 358                if (req->state == FUSE_REQ_SENT)
 359                        queue_interrupt(fc, req);
 360        }
 361
 362        if (!req->force) {
 363                sigset_t oldset;
 364
 365                /* Only fatal signals may interrupt this */
 366                block_sigs(&oldset);
 367                wait_answer_interruptible(fc, req);
 368                restore_sigs(&oldset);
 369
 370                if (req->aborted)
 371                        goto aborted;
 372                if (req->state == FUSE_REQ_FINISHED)
 373                        return;
 374
 375                /* Request is not yet in userspace, bail out */
 376                if (req->state == FUSE_REQ_PENDING) {
 377                        list_del(&req->list);
 378                        __fuse_put_request(req);
 379                        req->out.h.error = -EINTR;
 380                        return;
 381                }
 382        }
 383
 384        /*
 385         * Either request is already in userspace, or it was forced.
 386         * Wait it out.
 387         */
 388        spin_unlock(&fc->lock);
 389        wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
 390        spin_lock(&fc->lock);
 391
 392        if (!req->aborted)
 393                return;
 394
 395 aborted:
 396        BUG_ON(req->state != FUSE_REQ_FINISHED);
 397        if (req->locked) {
 398                /* This is uninterruptible sleep, because data is
 399                   being copied to/from the buffers of req.  During
 400                   locked state, there mustn't be any filesystem
 401                   operation (e.g. page fault), since that could lead
 402                   to deadlock */
 403                spin_unlock(&fc->lock);
 404                wait_event(req->waitq, !req->locked);
 405                spin_lock(&fc->lock);
 406        }
 407}
 408
 409void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 410{
 411        req->isreply = 1;
 412        spin_lock(&fc->lock);
 413        if (!fc->connected)
 414                req->out.h.error = -ENOTCONN;
 415        else if (fc->conn_error)
 416                req->out.h.error = -ECONNREFUSED;
 417        else {
 418                req->in.h.unique = fuse_get_unique(fc);
 419                queue_request(fc, req);
 420                /* acquire extra reference, since request is still needed
 421                   after request_end() */
 422                __fuse_get_request(req);
 423
 424                request_wait_answer(fc, req);
 425        }
 426        spin_unlock(&fc->lock);
 427}
 428EXPORT_SYMBOL_GPL(fuse_request_send);
 429
 430static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 431                                            struct fuse_req *req)
 432{
 433        req->background = 1;
 434        fc->num_background++;
 435        if (fc->num_background == fc->max_background)
 436                fc->blocked = 1;
 437        if (fc->num_background == fc->congestion_threshold &&
 438            fc->bdi_initialized) {
 439                set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 440                set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 441        }
 442        list_add_tail(&req->list, &fc->bg_queue);
 443        flush_bg_queue(fc);
 444}
 445
 446static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
 447{
 448        spin_lock(&fc->lock);
 449        if (fc->connected) {
 450                fuse_request_send_nowait_locked(fc, req);
 451                spin_unlock(&fc->lock);
 452        } else {
 453                req->out.h.error = -ENOTCONN;
 454                request_end(fc, req);
 455        }
 456}
 457
 458void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 459{
 460        req->isreply = 1;
 461        fuse_request_send_nowait(fc, req);
 462}
 463EXPORT_SYMBOL_GPL(fuse_request_send_background);
 464
 465static int fuse_request_send_notify_reply(struct fuse_conn *fc,
 466                                          struct fuse_req *req, u64 unique)
 467{
 468        int err = -ENODEV;
 469
 470        req->isreply = 0;
 471        req->in.h.unique = unique;
 472        spin_lock(&fc->lock);
 473        if (fc->connected) {
 474                queue_request(fc, req);
 475                err = 0;
 476        }
 477        spin_unlock(&fc->lock);
 478
 479        return err;
 480}
 481
 482/*
 483 * Called under fc->lock
 484 *
 485 * fc->connected must have been checked previously
 486 */
 487void fuse_request_send_background_locked(struct fuse_conn *fc,
 488                                         struct fuse_req *req)
 489{
 490        req->isreply = 1;
 491        fuse_request_send_nowait_locked(fc, req);
 492}
 493
 494/*
 495 * Lock the request.  Up to the next unlock_request() there mustn't be
 496 * anything that could cause a page-fault.  If the request was already
 497 * aborted bail out.
 498 */
 499static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
 500{
 501        int err = 0;
 502        if (req) {
 503                spin_lock(&fc->lock);
 504                if (req->aborted)
 505                        err = -ENOENT;
 506                else
 507                        req->locked = 1;
 508                spin_unlock(&fc->lock);
 509        }
 510        return err;
 511}
 512
 513/*
 514 * Unlock request.  If it was aborted during being locked, the
 515 * requester thread is currently waiting for it to be unlocked, so
 516 * wake it up.
 517 */
 518static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
 519{
 520        if (req) {
 521                spin_lock(&fc->lock);
 522                req->locked = 0;
 523                if (req->aborted)
 524                        wake_up(&req->waitq);
 525                spin_unlock(&fc->lock);
 526        }
 527}
 528
 529struct fuse_copy_state {
 530        struct fuse_conn *fc;
 531        int write;
 532        struct fuse_req *req;
 533        const struct iovec *iov;
 534        struct pipe_buffer *pipebufs;
 535        struct pipe_buffer *currbuf;
 536        struct pipe_inode_info *pipe;
 537        unsigned long nr_segs;
 538        unsigned long seglen;
 539        unsigned long addr;
 540        struct page *pg;
 541        void *mapaddr;
 542        void *buf;
 543        unsigned len;
 544        unsigned move_pages:1;
 545};
 546
 547static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
 548                           int write,
 549                           const struct iovec *iov, unsigned long nr_segs)
 550{
 551        memset(cs, 0, sizeof(*cs));
 552        cs->fc = fc;
 553        cs->write = write;
 554        cs->iov = iov;
 555        cs->nr_segs = nr_segs;
 556}
 557
 558/* Unmap and put previous page of userspace buffer */
 559static void fuse_copy_finish(struct fuse_copy_state *cs)
 560{
 561        if (cs->currbuf) {
 562                struct pipe_buffer *buf = cs->currbuf;
 563
 564                if (!cs->write) {
 565                        buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
 566                } else {
 567                        kunmap(buf->page);
 568                        buf->len = PAGE_SIZE - cs->len;
 569                }
 570                cs->currbuf = NULL;
 571                cs->mapaddr = NULL;
 572        } else if (cs->mapaddr) {
 573                kunmap(cs->pg);
 574                if (cs->write) {
 575                        flush_dcache_page(cs->pg);
 576                        set_page_dirty_lock(cs->pg);
 577                }
 578                put_page(cs->pg);
 579                cs->mapaddr = NULL;
 580        }
 581}
 582
 583/*
 584 * Get another pagefull of userspace buffer, and map it to kernel
 585 * address space, and lock request
 586 */
 587static int fuse_copy_fill(struct fuse_copy_state *cs)
 588{
 589        unsigned long offset;
 590        int err;
 591
 592        unlock_request(cs->fc, cs->req);
 593        fuse_copy_finish(cs);
 594        if (cs->pipebufs) {
 595                struct pipe_buffer *buf = cs->pipebufs;
 596
 597                if (!cs->write) {
 598                        err = buf->ops->confirm(cs->pipe, buf);
 599                        if (err)
 600                                return err;
 601
 602                        BUG_ON(!cs->nr_segs);
 603                        cs->currbuf = buf;
 604                        cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
 605                        cs->len = buf->len;
 606                        cs->buf = cs->mapaddr + buf->offset;
 607                        cs->pipebufs++;
 608                        cs->nr_segs--;
 609                } else {
 610                        struct page *page;
 611
 612                        if (cs->nr_segs == cs->pipe->buffers)
 613                                return -EIO;
 614
 615                        page = alloc_page(GFP_HIGHUSER);
 616                        if (!page)
 617                                return -ENOMEM;
 618
 619                        buf->page = page;
 620                        buf->offset = 0;
 621                        buf->len = 0;
 622
 623                        cs->currbuf = buf;
 624                        cs->mapaddr = kmap(page);
 625                        cs->buf = cs->mapaddr;
 626                        cs->len = PAGE_SIZE;
 627                        cs->pipebufs++;
 628                        cs->nr_segs++;
 629                }
 630        } else {
 631                if (!cs->seglen) {
 632                        BUG_ON(!cs->nr_segs);
 633                        cs->seglen = cs->iov[0].iov_len;
 634                        cs->addr = (unsigned long) cs->iov[0].iov_base;
 635                        cs->iov++;
 636                        cs->nr_segs--;
 637                }
 638                err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
 639                if (err < 0)
 640                        return err;
 641                BUG_ON(err != 1);
 642                offset = cs->addr % PAGE_SIZE;
 643                cs->mapaddr = kmap(cs->pg);
 644                cs->buf = cs->mapaddr + offset;
 645                cs->len = min(PAGE_SIZE - offset, cs->seglen);
 646                cs->seglen -= cs->len;
 647                cs->addr += cs->len;
 648        }
 649
 650        return lock_request(cs->fc, cs->req);
 651}
 652
 653/* Do as much copy to/from userspace buffer as we can */
 654static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
 655{
 656        unsigned ncpy = min(*size, cs->len);
 657        if (val) {
 658                if (cs->write)
 659                        memcpy(cs->buf, *val, ncpy);
 660                else
 661                        memcpy(*val, cs->buf, ncpy);
 662                *val += ncpy;
 663        }
 664        *size -= ncpy;
 665        cs->len -= ncpy;
 666        cs->buf += ncpy;
 667        return ncpy;
 668}
 669
 670static int fuse_check_page(struct page *page)
 671{
 672        if (page_mapcount(page) ||
 673            page->mapping != NULL ||
 674            page_count(page) != 1 ||
 675            (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
 676             ~(1 << PG_locked |
 677               1 << PG_referenced |
 678               1 << PG_uptodate |
 679               1 << PG_lru |
 680               1 << PG_active |
 681               1 << PG_reclaim))) {
 682                printk(KERN_WARNING "fuse: trying to steal weird page\n");
 683                printk(KERN_WARNING "  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
 684                return 1;
 685        }
 686        return 0;
 687}
 688
 689static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 690{
 691        int err;
 692        struct page *oldpage = *pagep;
 693        struct page *newpage;
 694        struct pipe_buffer *buf = cs->pipebufs;
 695        struct address_space *mapping;
 696        pgoff_t index;
 697
 698        unlock_request(cs->fc, cs->req);
 699        fuse_copy_finish(cs);
 700
 701        err = buf->ops->confirm(cs->pipe, buf);
 702        if (err)
 703                return err;
 704
 705        BUG_ON(!cs->nr_segs);
 706        cs->currbuf = buf;
 707        cs->len = buf->len;
 708        cs->pipebufs++;
 709        cs->nr_segs--;
 710
 711        if (cs->len != PAGE_SIZE)
 712                goto out_fallback;
 713
 714        if (buf->ops->steal(cs->pipe, buf) != 0)
 715                goto out_fallback;
 716
 717        newpage = buf->page;
 718
 719        if (WARN_ON(!PageUptodate(newpage)))
 720                return -EIO;
 721
 722        ClearPageMappedToDisk(newpage);
 723
 724        if (fuse_check_page(newpage) != 0)
 725                goto out_fallback_unlock;
 726
 727        mapping = oldpage->mapping;
 728        index = oldpage->index;
 729
 730        /*
 731         * This is a new and locked page, it shouldn't be mapped or
 732         * have any special flags on it
 733         */
 734        if (WARN_ON(page_mapped(oldpage)))
 735                goto out_fallback_unlock;
 736        if (WARN_ON(page_has_private(oldpage)))
 737                goto out_fallback_unlock;
 738        if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
 739                goto out_fallback_unlock;
 740        if (WARN_ON(PageMlocked(oldpage)))
 741                goto out_fallback_unlock;
 742
 743        err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
 744        if (err) {
 745                unlock_page(newpage);
 746                return err;
 747        }
 748
 749        page_cache_get(newpage);
 750
 751        if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 752                lru_cache_add_file(newpage);
 753
 754        err = 0;
 755        spin_lock(&cs->fc->lock);
 756        if (cs->req->aborted)
 757                err = -ENOENT;
 758        else
 759                *pagep = newpage;
 760        spin_unlock(&cs->fc->lock);
 761
 762        if (err) {
 763                unlock_page(newpage);
 764                page_cache_release(newpage);
 765                return err;
 766        }
 767
 768        unlock_page(oldpage);
 769        page_cache_release(oldpage);
 770        cs->len = 0;
 771
 772        return 0;
 773
 774out_fallback_unlock:
 775        unlock_page(newpage);
 776out_fallback:
 777        cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
 778        cs->buf = cs->mapaddr + buf->offset;
 779
 780        err = lock_request(cs->fc, cs->req);
 781        if (err)
 782                return err;
 783
 784        return 1;
 785}
 786
 787static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
 788                         unsigned offset, unsigned count)
 789{
 790        struct pipe_buffer *buf;
 791
 792        if (cs->nr_segs == cs->pipe->buffers)
 793                return -EIO;
 794
 795        unlock_request(cs->fc, cs->req);
 796        fuse_copy_finish(cs);
 797
 798        buf = cs->pipebufs;
 799        page_cache_get(page);
 800        buf->page = page;
 801        buf->offset = offset;
 802        buf->len = count;
 803
 804        cs->pipebufs++;
 805        cs->nr_segs++;
 806        cs->len = 0;
 807
 808        return 0;
 809}
 810
 811/*
 812 * Copy a page in the request to/from the userspace buffer.  Must be
 813 * done atomically
 814 */
 815static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 816                          unsigned offset, unsigned count, int zeroing)
 817{
 818        int err;
 819        struct page *page = *pagep;
 820
 821        if (page && zeroing && count < PAGE_SIZE)
 822                clear_highpage(page);
 823
 824        while (count) {
 825                if (cs->write && cs->pipebufs && page) {
 826                        return fuse_ref_page(cs, page, offset, count);
 827                } else if (!cs->len) {
 828                        if (cs->move_pages && page &&
 829                            offset == 0 && count == PAGE_SIZE) {
 830                                err = fuse_try_move_page(cs, pagep);
 831                                if (err <= 0)
 832                                        return err;
 833                        } else {
 834                                err = fuse_copy_fill(cs);
 835                                if (err)
 836                                        return err;
 837                        }
 838                }
 839                if (page) {
 840                        void *mapaddr = kmap_atomic(page);
 841                        void *buf = mapaddr + offset;
 842                        offset += fuse_copy_do(cs, &buf, &count);
 843                        kunmap_atomic(mapaddr);
 844                } else
 845                        offset += fuse_copy_do(cs, NULL, &count);
 846        }
 847        if (page && !cs->write)
 848                flush_dcache_page(page);
 849        return 0;
 850}
 851
 852/* Copy pages in the request to/from userspace buffer */
 853static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
 854                           int zeroing)
 855{
 856        unsigned i;
 857        struct fuse_req *req = cs->req;
 858        unsigned offset = req->page_offset;
 859        unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
 860
 861        for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
 862                int err;
 863
 864                err = fuse_copy_page(cs, &req->pages[i], offset, count,
 865                                     zeroing);
 866                if (err)
 867                        return err;
 868
 869                nbytes -= count;
 870                count = min(nbytes, (unsigned) PAGE_SIZE);
 871                offset = 0;
 872        }
 873        return 0;
 874}
 875
 876/* Copy a single argument in the request to/from userspace buffer */
 877static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
 878{
 879        while (size) {
 880                if (!cs->len) {
 881                        int err = fuse_copy_fill(cs);
 882                        if (err)
 883                                return err;
 884                }
 885                fuse_copy_do(cs, &val, &size);
 886        }
 887        return 0;
 888}
 889
 890/* Copy request arguments to/from userspace buffer */
 891static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
 892                          unsigned argpages, struct fuse_arg *args,
 893                          int zeroing)
 894{
 895        int err = 0;
 896        unsigned i;
 897
 898        for (i = 0; !err && i < numargs; i++)  {
 899                struct fuse_arg *arg = &args[i];
 900                if (i == numargs - 1 && argpages)
 901                        err = fuse_copy_pages(cs, arg->size, zeroing);
 902                else
 903                        err = fuse_copy_one(cs, arg->value, arg->size);
 904        }
 905        return err;
 906}
 907
 908static int forget_pending(struct fuse_conn *fc)
 909{
 910        return fc->forget_list_head.next != NULL;
 911}
 912
 913static int request_pending(struct fuse_conn *fc)
 914{
 915        return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
 916                forget_pending(fc);
 917}
 918
 919/* Wait until a request is available on the pending list */
 920static void request_wait(struct fuse_conn *fc)
 921__releases(fc->lock)
 922__acquires(fc->lock)
 923{
 924        DECLARE_WAITQUEUE(wait, current);
 925
 926        add_wait_queue_exclusive(&fc->waitq, &wait);
 927        while (fc->connected && !request_pending(fc)) {
 928                set_current_state(TASK_INTERRUPTIBLE);
 929                if (signal_pending(current))
 930                        break;
 931
 932                spin_unlock(&fc->lock);
 933                schedule();
 934                spin_lock(&fc->lock);
 935        }
 936        set_current_state(TASK_RUNNING);
 937        remove_wait_queue(&fc->waitq, &wait);
 938}
 939
 940/*
 941 * Transfer an interrupt request to userspace
 942 *
 943 * Unlike other requests this is assembled on demand, without a need
 944 * to allocate a separate fuse_req structure.
 945 *
 946 * Called with fc->lock held, releases it
 947 */
 948static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
 949                               size_t nbytes, struct fuse_req *req)
 950__releases(fc->lock)
 951{
 952        struct fuse_in_header ih;
 953        struct fuse_interrupt_in arg;
 954        unsigned reqsize = sizeof(ih) + sizeof(arg);
 955        int err;
 956
 957        list_del_init(&req->intr_entry);
 958        req->intr_unique = fuse_get_unique(fc);
 959        memset(&ih, 0, sizeof(ih));
 960        memset(&arg, 0, sizeof(arg));
 961        ih.len = reqsize;
 962        ih.opcode = FUSE_INTERRUPT;
 963        ih.unique = req->intr_unique;
 964        arg.unique = req->in.h.unique;
 965
 966        spin_unlock(&fc->lock);
 967        if (nbytes < reqsize)
 968                return -EINVAL;
 969
 970        err = fuse_copy_one(cs, &ih, sizeof(ih));
 971        if (!err)
 972                err = fuse_copy_one(cs, &arg, sizeof(arg));
 973        fuse_copy_finish(cs);
 974
 975        return err ? err : reqsize;
 976}
 977
 978static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
 979                                               unsigned max,
 980                                               unsigned *countp)
 981{
 982        struct fuse_forget_link *head = fc->forget_list_head.next;
 983        struct fuse_forget_link **newhead = &head;
 984        unsigned count;
 985
 986        for (count = 0; *newhead != NULL && count < max; count++)
 987                newhead = &(*newhead)->next;
 988
 989        fc->forget_list_head.next = *newhead;
 990        *newhead = NULL;
 991        if (fc->forget_list_head.next == NULL)
 992                fc->forget_list_tail = &fc->forget_list_head;
 993
 994        if (countp != NULL)
 995                *countp = count;
 996
 997        return head;
 998}
 999
1000static int fuse_read_single_forget(struct fuse_conn *fc,
1001                                   struct fuse_copy_state *cs,
1002                                   size_t nbytes)
1003__releases(fc->lock)
1004{
1005        int err;
1006        struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
1007        struct fuse_forget_in arg = {
1008                .nlookup = forget->forget_one.nlookup,
1009        };
1010        struct fuse_in_header ih = {
1011                .opcode = FUSE_FORGET,
1012                .nodeid = forget->forget_one.nodeid,
1013                .unique = fuse_get_unique(fc),
1014                .len = sizeof(ih) + sizeof(arg),
1015        };
1016
1017        spin_unlock(&fc->lock);
1018        kfree(forget);
1019        if (nbytes < ih.len)
1020                return -EINVAL;
1021
1022        err = fuse_copy_one(cs, &ih, sizeof(ih));
1023        if (!err)
1024                err = fuse_copy_one(cs, &arg, sizeof(arg));
1025        fuse_copy_finish(cs);
1026
1027        if (err)
1028                return err;
1029
1030        return ih.len;
1031}
1032
1033static int fuse_read_batch_forget(struct fuse_conn *fc,
1034                                   struct fuse_copy_state *cs, size_t nbytes)
1035__releases(fc->lock)
1036{
1037        int err;
1038        unsigned max_forgets;
1039        unsigned count;
1040        struct fuse_forget_link *head;
1041        struct fuse_batch_forget_in arg = { .count = 0 };
1042        struct fuse_in_header ih = {
1043                .opcode = FUSE_BATCH_FORGET,
1044                .unique = fuse_get_unique(fc),
1045                .len = sizeof(ih) + sizeof(arg),
1046        };
1047
1048        if (nbytes < ih.len) {
1049                spin_unlock(&fc->lock);
1050                return -EINVAL;
1051        }
1052
1053        max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1054        head = dequeue_forget(fc, max_forgets, &count);
1055        spin_unlock(&fc->lock);
1056
1057        arg.count = count;
1058        ih.len += count * sizeof(struct fuse_forget_one);
1059        err = fuse_copy_one(cs, &ih, sizeof(ih));
1060        if (!err)
1061                err = fuse_copy_one(cs, &arg, sizeof(arg));
1062
1063        while (head) {
1064                struct fuse_forget_link *forget = head;
1065
1066                if (!err) {
1067                        err = fuse_copy_one(cs, &forget->forget_one,
1068                                            sizeof(forget->forget_one));
1069                }
1070                head = forget->next;
1071                kfree(forget);
1072        }
1073
1074        fuse_copy_finish(cs);
1075
1076        if (err)
1077                return err;
1078
1079        return ih.len;
1080}
1081
1082static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
1083                            size_t nbytes)
1084__releases(fc->lock)
1085{
1086        if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
1087                return fuse_read_single_forget(fc, cs, nbytes);
1088        else
1089                return fuse_read_batch_forget(fc, cs, nbytes);
1090}
1091
1092/*
1093 * Read a single request into the userspace filesystem's buffer.  This
1094 * function waits until a request is available, then removes it from
1095 * the pending list and copies request data to userspace buffer.  If
1096 * no reply is needed (FORGET) or request has been aborted or there
1097 * was an error during the copying then it's finished by calling
1098 * request_end().  Otherwise add it to the processing list, and set
1099 * the 'sent' flag.
1100 */
1101static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1102                                struct fuse_copy_state *cs, size_t nbytes)
1103{
1104        int err;
1105        struct fuse_req *req;
1106        struct fuse_in *in;
1107        unsigned reqsize;
1108
1109 restart:
1110        spin_lock(&fc->lock);
1111        err = -EAGAIN;
1112        if ((file->f_flags & O_NONBLOCK) && fc->connected &&
1113            !request_pending(fc))
1114                goto err_unlock;
1115
1116        request_wait(fc);
1117        err = -ENODEV;
1118        if (!fc->connected)
1119                goto err_unlock;
1120        err = -ERESTARTSYS;
1121        if (!request_pending(fc))
1122                goto err_unlock;
1123
1124        if (!list_empty(&fc->interrupts)) {
1125                req = list_entry(fc->interrupts.next, struct fuse_req,
1126                                 intr_entry);
1127                return fuse_read_interrupt(fc, cs, nbytes, req);
1128        }
1129
1130        if (forget_pending(fc)) {
1131                if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
1132                        return fuse_read_forget(fc, cs, nbytes);
1133
1134                if (fc->forget_batch <= -8)
1135                        fc->forget_batch = 16;
1136        }
1137
1138        req = list_entry(fc->pending.next, struct fuse_req, list);
1139        req->state = FUSE_REQ_READING;
1140        list_move(&req->list, &fc->io);
1141
1142        in = &req->in;
1143        reqsize = in->h.len;
1144        /* If request is too large, reply with an error and restart the read */
1145        if (nbytes < reqsize) {
1146                req->out.h.error = -EIO;
1147                /* SETXATTR is special, since it may contain too large data */
1148                if (in->h.opcode == FUSE_SETXATTR)
1149                        req->out.h.error = -E2BIG;
1150                request_end(fc, req);
1151                goto restart;
1152        }
1153        spin_unlock(&fc->lock);
1154        cs->req = req;
1155        err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1156        if (!err)
1157                err = fuse_copy_args(cs, in->numargs, in->argpages,
1158                                     (struct fuse_arg *) in->args, 0);
1159        fuse_copy_finish(cs);
1160        spin_lock(&fc->lock);
1161        req->locked = 0;
1162        if (req->aborted) {
1163                request_end(fc, req);
1164                return -ENODEV;
1165        }
1166        if (err) {
1167                req->out.h.error = -EIO;
1168                request_end(fc, req);
1169                return err;
1170        }
1171        if (!req->isreply)
1172                request_end(fc, req);
1173        else {
1174                req->state = FUSE_REQ_SENT;
1175                list_move_tail(&req->list, &fc->processing);
1176                if (req->interrupted)
1177                        queue_interrupt(fc, req);
1178                spin_unlock(&fc->lock);
1179        }
1180        return reqsize;
1181
1182 err_unlock:
1183        spin_unlock(&fc->lock);
1184        return err;
1185}
1186
1187static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1188                              unsigned long nr_segs, loff_t pos)
1189{
1190        struct fuse_copy_state cs;
1191        struct file *file = iocb->ki_filp;
1192        struct fuse_conn *fc = fuse_get_conn(file);
1193        if (!fc)
1194                return -EPERM;
1195
1196        fuse_copy_init(&cs, fc, 1, iov, nr_segs);
1197
1198        return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
1199}
1200
1201static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
1202                                   struct pipe_buffer *buf)
1203{
1204        return 1;
1205}
1206
1207static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
1208        .can_merge = 0,
1209        .map = generic_pipe_buf_map,
1210        .unmap = generic_pipe_buf_unmap,
1211        .confirm = generic_pipe_buf_confirm,
1212        .release = generic_pipe_buf_release,
1213        .steal = fuse_dev_pipe_buf_steal,
1214        .get = generic_pipe_buf_get,
1215};
1216
1217static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1218                                    struct pipe_inode_info *pipe,
1219                                    size_t len, unsigned int flags)
1220{
1221        int ret;
1222        int page_nr = 0;
1223        int do_wakeup = 0;
1224        struct pipe_buffer *bufs;
1225        struct fuse_copy_state cs;
1226        struct fuse_conn *fc = fuse_get_conn(in);
1227        if (!fc)
1228                return -EPERM;
1229
1230        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1231        if (!bufs)
1232                return -ENOMEM;
1233
1234        fuse_copy_init(&cs, fc, 1, NULL, 0);
1235        cs.pipebufs = bufs;
1236        cs.pipe = pipe;
1237        ret = fuse_dev_do_read(fc, in, &cs, len);
1238        if (ret < 0)
1239                goto out;
1240
1241        ret = 0;
1242        pipe_lock(pipe);
1243
1244        if (!pipe->readers) {
1245                send_sig(SIGPIPE, current, 0);
1246                if (!ret)
1247                        ret = -EPIPE;
1248                goto out_unlock;
1249        }
1250
1251        if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1252                ret = -EIO;
1253                goto out_unlock;
1254        }
1255
1256        while (page_nr < cs.nr_segs) {
1257                int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1258                struct pipe_buffer *buf = pipe->bufs + newbuf;
1259
1260                buf->page = bufs[page_nr].page;
1261                buf->offset = bufs[page_nr].offset;
1262                buf->len = bufs[page_nr].len;
1263                buf->ops = &fuse_dev_pipe_buf_ops;
1264
1265                pipe->nrbufs++;
1266                page_nr++;
1267                ret += buf->len;
1268
1269                if (pipe->inode)
1270                        do_wakeup = 1;
1271        }
1272
1273out_unlock:
1274        pipe_unlock(pipe);
1275
1276        if (do_wakeup) {
1277                smp_mb();
1278                if (waitqueue_active(&pipe->wait))
1279                        wake_up_interruptible(&pipe->wait);
1280                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1281        }
1282
1283out:
1284        for (; page_nr < cs.nr_segs; page_nr++)
1285                page_cache_release(bufs[page_nr].page);
1286
1287        kfree(bufs);
1288        return ret;
1289}
1290
1291static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1292                            struct fuse_copy_state *cs)
1293{
1294        struct fuse_notify_poll_wakeup_out outarg;
1295        int err = -EINVAL;
1296
1297        if (size != sizeof(outarg))
1298                goto err;
1299
1300        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1301        if (err)
1302                goto err;
1303
1304        fuse_copy_finish(cs);
1305        return fuse_notify_poll_wakeup(fc, &outarg);
1306
1307err:
1308        fuse_copy_finish(cs);
1309        return err;
1310}
1311
1312static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1313                                   struct fuse_copy_state *cs)
1314{
1315        struct fuse_notify_inval_inode_out outarg;
1316        int err = -EINVAL;
1317
1318        if (size != sizeof(outarg))
1319                goto err;
1320
1321        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1322        if (err)
1323                goto err;
1324        fuse_copy_finish(cs);
1325
1326        down_read(&fc->killsb);
1327        err = -ENOENT;
1328        if (fc->sb) {
1329                err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1330                                               outarg.off, outarg.len);
1331        }
1332        up_read(&fc->killsb);
1333        return err;
1334
1335err:
1336        fuse_copy_finish(cs);
1337        return err;
1338}
1339
1340static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1341                                   struct fuse_copy_state *cs)
1342{
1343        struct fuse_notify_inval_entry_out outarg;
1344        int err = -ENOMEM;
1345        char *buf;
1346        struct qstr name;
1347
1348        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1349        if (!buf)
1350                goto err;
1351
1352        err = -EINVAL;
1353        if (size < sizeof(outarg))
1354                goto err;
1355
1356        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1357        if (err)
1358                goto err;
1359
1360        err = -ENAMETOOLONG;
1361        if (outarg.namelen > FUSE_NAME_MAX)
1362                goto err;
1363
1364        err = -EINVAL;
1365        if (size != sizeof(outarg) + outarg.namelen + 1)
1366                goto err;
1367
1368        name.name = buf;
1369        name.len = outarg.namelen;
1370        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1371        if (err)
1372                goto err;
1373        fuse_copy_finish(cs);
1374        buf[outarg.namelen] = 0;
1375        name.hash = full_name_hash(name.name, name.len);
1376
1377        down_read(&fc->killsb);
1378        err = -ENOENT;
1379        if (fc->sb)
1380                err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1381        up_read(&fc->killsb);
1382        kfree(buf);
1383        return err;
1384
1385err:
1386        kfree(buf);
1387        fuse_copy_finish(cs);
1388        return err;
1389}
1390
1391static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1392                              struct fuse_copy_state *cs)
1393{
1394        struct fuse_notify_delete_out outarg;
1395        int err = -ENOMEM;
1396        char *buf;
1397        struct qstr name;
1398
1399        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1400        if (!buf)
1401                goto err;
1402
1403        err = -EINVAL;
1404        if (size < sizeof(outarg))
1405                goto err;
1406
1407        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1408        if (err)
1409                goto err;
1410
1411        err = -ENAMETOOLONG;
1412        if (outarg.namelen > FUSE_NAME_MAX)
1413                goto err;
1414
1415        err = -EINVAL;
1416        if (size != sizeof(outarg) + outarg.namelen + 1)
1417                goto err;
1418
1419        name.name = buf;
1420        name.len = outarg.namelen;
1421        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1422        if (err)
1423                goto err;
1424        fuse_copy_finish(cs);
1425        buf[outarg.namelen] = 0;
1426        name.hash = full_name_hash(name.name, name.len);
1427
1428        down_read(&fc->killsb);
1429        err = -ENOENT;
1430        if (fc->sb)
1431                err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1432                                               outarg.child, &name);
1433        up_read(&fc->killsb);
1434        kfree(buf);
1435        return err;
1436
1437err:
1438        kfree(buf);
1439        fuse_copy_finish(cs);
1440        return err;
1441}
1442
1443static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1444                             struct fuse_copy_state *cs)
1445{
1446        struct fuse_notify_store_out outarg;
1447        struct inode *inode;
1448        struct address_space *mapping;
1449        u64 nodeid;
1450        int err;
1451        pgoff_t index;
1452        unsigned int offset;
1453        unsigned int num;
1454        loff_t file_size;
1455        loff_t end;
1456
1457        err = -EINVAL;
1458        if (size < sizeof(outarg))
1459                goto out_finish;
1460
1461        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1462        if (err)
1463                goto out_finish;
1464
1465        err = -EINVAL;
1466        if (size - sizeof(outarg) != outarg.size)
1467                goto out_finish;
1468
1469        nodeid = outarg.nodeid;
1470
1471        down_read(&fc->killsb);
1472
1473        err = -ENOENT;
1474        if (!fc->sb)
1475                goto out_up_killsb;
1476
1477        inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1478        if (!inode)
1479                goto out_up_killsb;
1480
1481        mapping = inode->i_mapping;
1482        index = outarg.offset >> PAGE_CACHE_SHIFT;
1483        offset = outarg.offset & ~PAGE_CACHE_MASK;
1484        file_size = i_size_read(inode);
1485        end = outarg.offset + outarg.size;
1486        if (end > file_size) {
1487                file_size = end;
1488                fuse_write_update_size(inode, file_size);
1489        }
1490
1491        num = outarg.size;
1492        while (num) {
1493                struct page *page;
1494                unsigned int this_num;
1495
1496                err = -ENOMEM;
1497                page = find_or_create_page(mapping, index,
1498                                           mapping_gfp_mask(mapping));
1499                if (!page)
1500                        goto out_iput;
1501
1502                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1503                err = fuse_copy_page(cs, &page, offset, this_num, 0);
1504                if (!err && offset == 0 && (num != 0 || file_size == end))
1505                        SetPageUptodate(page);
1506                unlock_page(page);
1507                page_cache_release(page);
1508
1509                if (err)
1510                        goto out_iput;
1511
1512                num -= this_num;
1513                offset = 0;
1514                index++;
1515        }
1516
1517        err = 0;
1518
1519out_iput:
1520        iput(inode);
1521out_up_killsb:
1522        up_read(&fc->killsb);
1523out_finish:
1524        fuse_copy_finish(cs);
1525        return err;
1526}
1527
1528static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1529{
1530        release_pages(req->pages, req->num_pages, 0);
1531}
1532
1533static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1534                         struct fuse_notify_retrieve_out *outarg)
1535{
1536        int err;
1537        struct address_space *mapping = inode->i_mapping;
1538        struct fuse_req *req;
1539        pgoff_t index;
1540        loff_t file_size;
1541        unsigned int num;
1542        unsigned int offset;
1543        size_t total_len = 0;
1544
1545        req = fuse_get_req(fc);
1546        if (IS_ERR(req))
1547                return PTR_ERR(req);
1548
1549        offset = outarg->offset & ~PAGE_CACHE_MASK;
1550
1551        req->in.h.opcode = FUSE_NOTIFY_REPLY;
1552        req->in.h.nodeid = outarg->nodeid;
1553        req->in.numargs = 2;
1554        req->in.argpages = 1;
1555        req->page_offset = offset;
1556        req->end = fuse_retrieve_end;
1557
1558        index = outarg->offset >> PAGE_CACHE_SHIFT;
1559        file_size = i_size_read(inode);
1560        num = outarg->size;
1561        if (outarg->offset > file_size)
1562                num = 0;
1563        else if (outarg->offset + num > file_size)
1564                num = file_size - outarg->offset;
1565
1566        while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
1567                struct page *page;
1568                unsigned int this_num;
1569
1570                page = find_get_page(mapping, index);
1571                if (!page)
1572                        break;
1573
1574                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1575                req->pages[req->num_pages] = page;
1576                req->num_pages++;
1577
1578                offset = 0;
1579                num -= this_num;
1580                total_len += this_num;
1581                index++;
1582        }
1583        req->misc.retrieve_in.offset = outarg->offset;
1584        req->misc.retrieve_in.size = total_len;
1585        req->in.args[0].size = sizeof(req->misc.retrieve_in);
1586        req->in.args[0].value = &req->misc.retrieve_in;
1587        req->in.args[1].size = total_len;
1588
1589        err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1590        if (err)
1591                fuse_retrieve_end(fc, req);
1592
1593        return err;
1594}
1595
1596static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1597                                struct fuse_copy_state *cs)
1598{
1599        struct fuse_notify_retrieve_out outarg;
1600        struct inode *inode;
1601        int err;
1602
1603        err = -EINVAL;
1604        if (size != sizeof(outarg))
1605                goto copy_finish;
1606
1607        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1608        if (err)
1609                goto copy_finish;
1610
1611        fuse_copy_finish(cs);
1612
1613        down_read(&fc->killsb);
1614        err = -ENOENT;
1615        if (fc->sb) {
1616                u64 nodeid = outarg.nodeid;
1617
1618                inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1619                if (inode) {
1620                        err = fuse_retrieve(fc, inode, &outarg);
1621                        iput(inode);
1622                }
1623        }
1624        up_read(&fc->killsb);
1625
1626        return err;
1627
1628copy_finish:
1629        fuse_copy_finish(cs);
1630        return err;
1631}
1632
1633static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1634                       unsigned int size, struct fuse_copy_state *cs)
1635{
1636        switch (code) {
1637        case FUSE_NOTIFY_POLL:
1638                return fuse_notify_poll(fc, size, cs);
1639
1640        case FUSE_NOTIFY_INVAL_INODE:
1641                return fuse_notify_inval_inode(fc, size, cs);
1642
1643        case FUSE_NOTIFY_INVAL_ENTRY:
1644                return fuse_notify_inval_entry(fc, size, cs);
1645
1646        case FUSE_NOTIFY_STORE:
1647                return fuse_notify_store(fc, size, cs);
1648
1649        case FUSE_NOTIFY_RETRIEVE:
1650                return fuse_notify_retrieve(fc, size, cs);
1651
1652        case FUSE_NOTIFY_DELETE:
1653                return fuse_notify_delete(fc, size, cs);
1654
1655        default:
1656                fuse_copy_finish(cs);
1657                return -EINVAL;
1658        }
1659}
1660
1661/* Look up request on processing list by unique ID */
1662static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
1663{
1664        struct list_head *entry;
1665
1666        list_for_each(entry, &fc->processing) {
1667                struct fuse_req *req;
1668                req = list_entry(entry, struct fuse_req, list);
1669                if (req->in.h.unique == unique || req->intr_unique == unique)
1670                        return req;
1671        }
1672        return NULL;
1673}
1674
1675static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1676                         unsigned nbytes)
1677{
1678        unsigned reqsize = sizeof(struct fuse_out_header);
1679
1680        if (out->h.error)
1681                return nbytes != reqsize ? -EINVAL : 0;
1682
1683        reqsize += len_args(out->numargs, out->args);
1684
1685        if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1686                return -EINVAL;
1687        else if (reqsize > nbytes) {
1688                struct fuse_arg *lastarg = &out->args[out->numargs-1];
1689                unsigned diffsize = reqsize - nbytes;
1690                if (diffsize > lastarg->size)
1691                        return -EINVAL;
1692                lastarg->size -= diffsize;
1693        }
1694        return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1695                              out->page_zeroing);
1696}
1697
1698/*
1699 * Write a single reply to a request.  First the header is copied from
1700 * the write buffer.  The request is then searched on the processing
1701 * list by the unique ID found in the header.  If found, then remove
1702 * it from the list and copy the rest of the buffer to the request.
1703 * The request is finished by calling request_end()
1704 */
1705static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1706                                 struct fuse_copy_state *cs, size_t nbytes)
1707{
1708        int err;
1709        struct fuse_req *req;
1710        struct fuse_out_header oh;
1711
1712        if (nbytes < sizeof(struct fuse_out_header))
1713                return -EINVAL;
1714
1715        err = fuse_copy_one(cs, &oh, sizeof(oh));
1716        if (err)
1717                goto err_finish;
1718
1719        err = -EINVAL;
1720        if (oh.len != nbytes)
1721                goto err_finish;
1722
1723        /*
1724         * Zero oh.unique indicates unsolicited notification message
1725         * and error contains notification code.
1726         */
1727        if (!oh.unique) {
1728                err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1729                return err ? err : nbytes;
1730        }
1731
1732        err = -EINVAL;
1733        if (oh.error <= -1000 || oh.error > 0)
1734                goto err_finish;
1735
1736        spin_lock(&fc->lock);
1737        err = -ENOENT;
1738        if (!fc->connected)
1739                goto err_unlock;
1740
1741        req = request_find(fc, oh.unique);
1742        if (!req)
1743                goto err_unlock;
1744
1745        if (req->aborted) {
1746                spin_unlock(&fc->lock);
1747                fuse_copy_finish(cs);
1748                spin_lock(&fc->lock);
1749                request_end(fc, req);
1750                return -ENOENT;
1751        }
1752        /* Is it an interrupt reply? */
1753        if (req->intr_unique == oh.unique) {
1754                err = -EINVAL;
1755                if (nbytes != sizeof(struct fuse_out_header))
1756                        goto err_unlock;
1757
1758                if (oh.error == -ENOSYS)
1759                        fc->no_interrupt = 1;
1760                else if (oh.error == -EAGAIN)
1761                        queue_interrupt(fc, req);
1762
1763                spin_unlock(&fc->lock);
1764                fuse_copy_finish(cs);
1765                return nbytes;
1766        }
1767
1768        req->state = FUSE_REQ_WRITING;
1769        list_move(&req->list, &fc->io);
1770        req->out.h = oh;
1771        req->locked = 1;
1772        cs->req = req;
1773        if (!req->out.page_replace)
1774                cs->move_pages = 0;
1775        spin_unlock(&fc->lock);
1776
1777        err = copy_out_args(cs, &req->out, nbytes);
1778        fuse_copy_finish(cs);
1779
1780        spin_lock(&fc->lock);
1781        req->locked = 0;
1782        if (!err) {
1783                if (req->aborted)
1784                        err = -ENOENT;
1785        } else if (!req->aborted)
1786                req->out.h.error = -EIO;
1787        request_end(fc, req);
1788
1789        return err ? err : nbytes;
1790
1791 err_unlock:
1792        spin_unlock(&fc->lock);
1793 err_finish:
1794        fuse_copy_finish(cs);
1795        return err;
1796}
1797
1798static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1799                              unsigned long nr_segs, loff_t pos)
1800{
1801        struct fuse_copy_state cs;
1802        struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1803        if (!fc)
1804                return -EPERM;
1805
1806        fuse_copy_init(&cs, fc, 0, iov, nr_segs);
1807
1808        return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1809}
1810
1811static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1812                                     struct file *out, loff_t *ppos,
1813                                     size_t len, unsigned int flags)
1814{
1815        unsigned nbuf;
1816        unsigned idx;
1817        struct pipe_buffer *bufs;
1818        struct fuse_copy_state cs;
1819        struct fuse_conn *fc;
1820        size_t rem;
1821        ssize_t ret;
1822
1823        fc = fuse_get_conn(out);
1824        if (!fc)
1825                return -EPERM;
1826
1827        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1828        if (!bufs)
1829                return -ENOMEM;
1830
1831        pipe_lock(pipe);
1832        nbuf = 0;
1833        rem = 0;
1834        for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1835                rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1836
1837        ret = -EINVAL;
1838        if (rem < len) {
1839                pipe_unlock(pipe);
1840                goto out;
1841        }
1842
1843        rem = len;
1844        while (rem) {
1845                struct pipe_buffer *ibuf;
1846                struct pipe_buffer *obuf;
1847
1848                BUG_ON(nbuf >= pipe->buffers);
1849                BUG_ON(!pipe->nrbufs);
1850                ibuf = &pipe->bufs[pipe->curbuf];
1851                obuf = &bufs[nbuf];
1852
1853                if (rem >= ibuf->len) {
1854                        *obuf = *ibuf;
1855                        ibuf->ops = NULL;
1856                        pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1857                        pipe->nrbufs--;
1858                } else {
1859                        ibuf->ops->get(pipe, ibuf);
1860                        *obuf = *ibuf;
1861                        obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1862                        obuf->len = rem;
1863                        ibuf->offset += obuf->len;
1864                        ibuf->len -= obuf->len;
1865                }
1866                nbuf++;
1867                rem -= obuf->len;
1868        }
1869        pipe_unlock(pipe);
1870
1871        fuse_copy_init(&cs, fc, 0, NULL, nbuf);
1872        cs.pipebufs = bufs;
1873        cs.pipe = pipe;
1874
1875        if (flags & SPLICE_F_MOVE)
1876                cs.move_pages = 1;
1877
1878        ret = fuse_dev_do_write(fc, &cs, len);
1879
1880        for (idx = 0; idx < nbuf; idx++) {
1881                struct pipe_buffer *buf = &bufs[idx];
1882                buf->ops->release(pipe, buf);
1883        }
1884out:
1885        kfree(bufs);
1886        return ret;
1887}
1888
1889static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1890{
1891        unsigned mask = POLLOUT | POLLWRNORM;
1892        struct fuse_conn *fc = fuse_get_conn(file);
1893        if (!fc)
1894                return POLLERR;
1895
1896        poll_wait(file, &fc->waitq, wait);
1897
1898        spin_lock(&fc->lock);
1899        if (!fc->connected)
1900                mask = POLLERR;
1901        else if (request_pending(fc))
1902                mask |= POLLIN | POLLRDNORM;
1903        spin_unlock(&fc->lock);
1904
1905        return mask;
1906}
1907
1908/*
1909 * Abort all requests on the given list (pending or processing)
1910 *
1911 * This function releases and reacquires fc->lock
1912 */
1913static void end_requests(struct fuse_conn *fc, struct list_head *head)
1914__releases(fc->lock)
1915__acquires(fc->lock)
1916{
1917        while (!list_empty(head)) {
1918                struct fuse_req *req;
1919                req = list_entry(head->next, struct fuse_req, list);
1920                req->out.h.error = -ECONNABORTED;
1921                request_end(fc, req);
1922                spin_lock(&fc->lock);
1923        }
1924}
1925
1926/*
1927 * Abort requests under I/O
1928 *
1929 * The requests are set to aborted and finished, and the request
1930 * waiter is woken up.  This will make request_wait_answer() wait
1931 * until the request is unlocked and then return.
1932 *
1933 * If the request is asynchronous, then the end function needs to be
1934 * called after waiting for the request to be unlocked (if it was
1935 * locked).
1936 */
1937static void end_io_requests(struct fuse_conn *fc)
1938__releases(fc->lock)
1939__acquires(fc->lock)
1940{
1941        while (!list_empty(&fc->io)) {
1942                struct fuse_req *req =
1943                        list_entry(fc->io.next, struct fuse_req, list);
1944                void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
1945
1946                req->aborted = 1;
1947                req->out.h.error = -ECONNABORTED;
1948                req->state = FUSE_REQ_FINISHED;
1949                list_del_init(&req->list);
1950                wake_up(&req->waitq);
1951                if (end) {
1952                        req->end = NULL;
1953                        __fuse_get_request(req);
1954                        spin_unlock(&fc->lock);
1955                        wait_event(req->waitq, !req->locked);
1956                        end(fc, req);
1957                        fuse_put_request(fc, req);
1958                        spin_lock(&fc->lock);
1959                }
1960        }
1961}
1962
1963static void end_queued_requests(struct fuse_conn *fc)
1964__releases(fc->lock)
1965__acquires(fc->lock)
1966{
1967        fc->max_background = UINT_MAX;
1968        flush_bg_queue(fc);
1969        end_requests(fc, &fc->pending);
1970        end_requests(fc, &fc->processing);
1971        while (forget_pending(fc))
1972                kfree(dequeue_forget(fc, 1, NULL));
1973}
1974
1975static void end_polls(struct fuse_conn *fc)
1976{
1977        struct rb_node *p;
1978
1979        p = rb_first(&fc->polled_files);
1980
1981        while (p) {
1982                struct fuse_file *ff;
1983                ff = rb_entry(p, struct fuse_file, polled_node);
1984                wake_up_interruptible_all(&ff->poll_wait);
1985
1986                p = rb_next(p);
1987        }
1988}
1989
1990/*
1991 * Abort all requests.
1992 *
1993 * Emergency exit in case of a malicious or accidental deadlock, or
1994 * just a hung filesystem.
1995 *
1996 * The same effect is usually achievable through killing the
1997 * filesystem daemon and all users of the filesystem.  The exception
1998 * is the combination of an asynchronous request and the tricky
1999 * deadlock (see Documentation/filesystems/fuse.txt).
2000 *
2001 * During the aborting, progression of requests from the pending and
2002 * processing lists onto the io list, and progression of new requests
2003 * onto the pending list is prevented by req->connected being false.
2004 *
2005 * Progression of requests under I/O to the processing list is
2006 * prevented by the req->aborted flag being true for these requests.
2007 * For this reason requests on the io list must be aborted first.
2008 */
2009void fuse_abort_conn(struct fuse_conn *fc)
2010{
2011        spin_lock(&fc->lock);
2012        if (fc->connected) {
2013                fc->connected = 0;
2014                fc->blocked = 0;
2015                end_io_requests(fc);
2016                end_queued_requests(fc);
2017                end_polls(fc);
2018                wake_up_all(&fc->waitq);
2019                wake_up_all(&fc->blocked_waitq);
2020                kill_fasync(&fc->fasync, SIGIO, POLL_IN);
2021        }
2022        spin_unlock(&fc->lock);
2023}
2024EXPORT_SYMBOL_GPL(fuse_abort_conn);
2025
2026int fuse_dev_release(struct inode *inode, struct file *file)
2027{
2028        struct fuse_conn *fc = fuse_get_conn(file);
2029        if (fc) {
2030                spin_lock(&fc->lock);
2031                fc->connected = 0;
2032                fc->blocked = 0;
2033                end_queued_requests(fc);
2034                end_polls(fc);
2035                wake_up_all(&fc->blocked_waitq);
2036                spin_unlock(&fc->lock);
2037                fuse_conn_put(fc);
2038        }
2039
2040        return 0;
2041}
2042EXPORT_SYMBOL_GPL(fuse_dev_release);
2043
2044static int fuse_dev_fasync(int fd, struct file *file, int on)
2045{
2046        struct fuse_conn *fc = fuse_get_conn(file);
2047        if (!fc)
2048                return -EPERM;
2049
2050        /* No locking - fasync_helper does its own locking */
2051        return fasync_helper(fd, file, on, &fc->fasync);
2052}
2053
2054const struct file_operations fuse_dev_operations = {
2055        .owner          = THIS_MODULE,
2056        .llseek         = no_llseek,
2057        .read           = do_sync_read,
2058        .aio_read       = fuse_dev_read,
2059        .splice_read    = fuse_dev_splice_read,
2060        .write          = do_sync_write,
2061        .aio_write      = fuse_dev_write,
2062        .splice_write   = fuse_dev_splice_write,
2063        .poll           = fuse_dev_poll,
2064        .release        = fuse_dev_release,
2065        .fasync         = fuse_dev_fasync,
2066};
2067EXPORT_SYMBOL_GPL(fuse_dev_operations);
2068
2069static struct miscdevice fuse_miscdevice = {
2070        .minor = FUSE_MINOR,
2071        .name  = "fuse",
2072        .fops = &fuse_dev_operations,
2073};
2074
2075int __init fuse_dev_init(void)
2076{
2077        int err = -ENOMEM;
2078        fuse_req_cachep = kmem_cache_create("fuse_request",
2079                                            sizeof(struct fuse_req),
2080                                            0, 0, NULL);
2081        if (!fuse_req_cachep)
2082                goto out;
2083
2084        err = misc_register(&fuse_miscdevice);
2085        if (err)
2086                goto out_cache_clean;
2087
2088        return 0;
2089
2090 out_cache_clean:
2091        kmem_cache_destroy(fuse_req_cachep);
2092 out:
2093        return err;
2094}
2095
2096void fuse_dev_cleanup(void)
2097{
2098        misc_deregister(&fuse_miscdevice);
2099        kmem_cache_destroy(fuse_req_cachep);
2100}
2101
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.