linux-old/fs/pipe.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/pipe.c
   3 *
   4 *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
   5 */
   6
   7#include <linux/mm.h>
   8#include <linux/file.h>
   9#include <linux/poll.h>
  10#include <linux/slab.h>
  11#include <linux/module.h>
  12#include <linux/init.h>
  13
  14#include <asm/uaccess.h>
  15#include <asm/ioctls.h>
  16
  17/*
  18 * We use a start+len construction, which provides full use of the 
  19 * allocated memory.
  20 * -- Florian Coosmann (FGC)
  21 * 
  22 * Reads with count = 0 should always return 0.
  23 * -- Julian Bradfield 1999-06-07.
  24 */
  25
  26/* Drop the inode semaphore and wait for a pipe event, atomically */
  27void pipe_wait(struct inode * inode)
  28{
  29        DECLARE_WAITQUEUE(wait, current);
  30        current->state = TASK_INTERRUPTIBLE;
  31        add_wait_queue(PIPE_WAIT(*inode), &wait);
  32        up(PIPE_SEM(*inode));
  33        schedule();
  34        remove_wait_queue(PIPE_WAIT(*inode), &wait);
  35        current->state = TASK_RUNNING;
  36        down(PIPE_SEM(*inode));
  37}
  38
  39static ssize_t
  40pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
  41{
  42        struct inode *inode = filp->f_dentry->d_inode;
  43        ssize_t size, read, ret;
  44
  45        /* Seeks are not allowed on pipes.  */
  46        ret = -ESPIPE;
  47        read = 0;
  48        if (ppos != &filp->f_pos)
  49                goto out_nolock;
  50
  51        /* Always return 0 on null read.  */
  52        ret = 0;
  53        if (count == 0)
  54                goto out_nolock;
  55
  56        /* Get the pipe semaphore */
  57        ret = -ERESTARTSYS;
  58        if (down_interruptible(PIPE_SEM(*inode)))
  59                goto out_nolock;
  60
  61        if (PIPE_EMPTY(*inode)) {
  62do_more_read:
  63                ret = 0;
  64                if (!PIPE_WRITERS(*inode))
  65                        goto out;
  66
  67                ret = -EAGAIN;
  68                if (filp->f_flags & O_NONBLOCK)
  69                        goto out;
  70
  71                for (;;) {
  72                        PIPE_WAITING_READERS(*inode)++;
  73                        pipe_wait(inode);
  74                        PIPE_WAITING_READERS(*inode)--;
  75                        ret = -ERESTARTSYS;
  76                        if (signal_pending(current))
  77                                goto out;
  78                        ret = 0;
  79                        if (!PIPE_EMPTY(*inode))
  80                                break;
  81                        if (!PIPE_WRITERS(*inode))
  82                                goto out;
  83                }
  84        }
  85
  86        /* Read what data is available.  */
  87        ret = -EFAULT;
  88        while (count > 0 && (size = PIPE_LEN(*inode))) {
  89                char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
  90                ssize_t chars = PIPE_MAX_RCHUNK(*inode);
  91
  92                if (chars > count)
  93                        chars = count;
  94                if (chars > size)
  95                        chars = size;
  96
  97                if (copy_to_user(buf, pipebuf, chars))
  98                        goto out;
  99
 100                read += chars;
 101                PIPE_START(*inode) += chars;
 102                PIPE_START(*inode) &= (PIPE_SIZE - 1);
 103                PIPE_LEN(*inode) -= chars;
 104                count -= chars;
 105                buf += chars;
 106        }
 107
 108        /* Cache behaviour optimization */
 109        if (!PIPE_LEN(*inode))
 110                PIPE_START(*inode) = 0;
 111
 112        if (count && PIPE_WAITING_WRITERS(*inode) && !(filp->f_flags & O_NONBLOCK)) {
 113                /*
 114                 * We know that we are going to sleep: signal
 115                 * writers synchronously that there is more
 116                 * room.
 117                 */
 118                wake_up_interruptible_sync(PIPE_WAIT(*inode));
 119                if (!PIPE_EMPTY(*inode))
 120                        BUG();
 121                goto do_more_read;
 122        }
 123        /* Signal writers asynchronously that there is more room.  */
 124        wake_up_interruptible(PIPE_WAIT(*inode));
 125
 126        ret = read;
 127out:
 128        up(PIPE_SEM(*inode));
 129out_nolock:
 130        if (read)
 131                ret = read;
 132
 133        UPDATE_ATIME(inode);
 134        return ret;
 135}
 136
 137static ssize_t
 138pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
 139{
 140        struct inode *inode = filp->f_dentry->d_inode;
 141        ssize_t free, written, ret;
 142
 143        /* Seeks are not allowed on pipes.  */
 144        ret = -ESPIPE;
 145        written = 0;
 146        if (ppos != &filp->f_pos)
 147                goto out_nolock;
 148
 149        /* Null write succeeds.  */
 150        ret = 0;
 151        if (count == 0)
 152                goto out_nolock;
 153
 154        ret = -ERESTARTSYS;
 155        if (down_interruptible(PIPE_SEM(*inode)))
 156                goto out_nolock;
 157
 158        /* No readers yields SIGPIPE.  */
 159        if (!PIPE_READERS(*inode))
 160                goto sigpipe;
 161
 162        /* If count <= PIPE_BUF, we have to make it atomic.  */
 163        free = (count <= PIPE_BUF ? count : 1);
 164
 165        /* Wait, or check for, available space.  */
 166        if (filp->f_flags & O_NONBLOCK) {
 167                ret = -EAGAIN;
 168                if (PIPE_FREE(*inode) < free)
 169                        goto out;
 170        } else {
 171                while (PIPE_FREE(*inode) < free) {
 172                        PIPE_WAITING_WRITERS(*inode)++;
 173                        pipe_wait(inode);
 174                        PIPE_WAITING_WRITERS(*inode)--;
 175                        ret = -ERESTARTSYS;
 176                        if (signal_pending(current))
 177                                goto out;
 178
 179                        if (!PIPE_READERS(*inode))
 180                                goto sigpipe;
 181                }
 182        }
 183
 184        /* Copy into available space.  */
 185        ret = -EFAULT;
 186        while (count > 0) {
 187                int space;
 188                char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
 189                ssize_t chars = PIPE_MAX_WCHUNK(*inode);
 190
 191                if ((space = PIPE_FREE(*inode)) != 0) {
 192                        if (chars > count)
 193                                chars = count;
 194                        if (chars > space)
 195                                chars = space;
 196
 197                        if (copy_from_user(pipebuf, buf, chars))
 198                                goto out;
 199
 200                        written += chars;
 201                        PIPE_LEN(*inode) += chars;
 202                        count -= chars;
 203                        buf += chars;
 204                        space = PIPE_FREE(*inode);
 205                        continue;
 206                }
 207
 208                ret = written;
 209                if (filp->f_flags & O_NONBLOCK)
 210                        break;
 211
 212                do {
 213                        /*
 214                         * Synchronous wake-up: it knows that this process
 215                         * is going to give up this CPU, so it doesn't have
 216                         * to do idle reschedules.
 217                         */
 218                        wake_up_interruptible_sync(PIPE_WAIT(*inode));
 219                        PIPE_WAITING_WRITERS(*inode)++;
 220                        pipe_wait(inode);
 221                        PIPE_WAITING_WRITERS(*inode)--;
 222                        if (signal_pending(current))
 223                                goto out;
 224                        if (!PIPE_READERS(*inode))
 225                                goto sigpipe;
 226                } while (!PIPE_FREE(*inode));
 227                ret = -EFAULT;
 228        }
 229
 230        /* Signal readers asynchronously that there is more data.  */
 231        wake_up_interruptible(PIPE_WAIT(*inode));
 232
 233        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 234        mark_inode_dirty(inode);
 235
 236out:
 237        up(PIPE_SEM(*inode));
 238out_nolock:
 239        if (written)
 240                ret = written;
 241        return ret;
 242
 243sigpipe:
 244        if (written)
 245                goto out;
 246        up(PIPE_SEM(*inode));
 247        send_sig(SIGPIPE, current, 0);
 248        return -EPIPE;
 249}
 250
 251static ssize_t
 252bad_pipe_r(struct file *filp, char *buf, size_t count, loff_t *ppos)
 253{
 254        return -EBADF;
 255}
 256
 257static ssize_t
 258bad_pipe_w(struct file *filp, const char *buf, size_t count, loff_t *ppos)
 259{
 260        return -EBADF;
 261}
 262
 263static int
 264pipe_ioctl(struct inode *pino, struct file *filp,
 265           unsigned int cmd, unsigned long arg)
 266{
 267        switch (cmd) {
 268                case FIONREAD:
 269                        return put_user(PIPE_LEN(*pino), (int *)arg);
 270                default:
 271                        return -EINVAL;
 272        }
 273}
 274
 275/* No kernel lock held - fine */
 276static unsigned int
 277pipe_poll(struct file *filp, poll_table *wait)
 278{
 279        unsigned int mask;
 280        struct inode *inode = filp->f_dentry->d_inode;
 281
 282        poll_wait(filp, PIPE_WAIT(*inode), wait);
 283
 284        /* Reading only -- no need for acquiring the semaphore.  */
 285        mask = POLLIN | POLLRDNORM;
 286        if (PIPE_EMPTY(*inode))
 287                mask = POLLOUT | POLLWRNORM;
 288        if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
 289                mask |= POLLHUP;
 290        if (!PIPE_READERS(*inode))
 291                mask |= POLLERR;
 292
 293        return mask;
 294}
 295
 296/* FIXME: most Unices do not set POLLERR for fifos */
 297#define fifo_poll pipe_poll
 298
 299static int
 300pipe_release(struct inode *inode, int decr, int decw)
 301{
 302        down(PIPE_SEM(*inode));
 303        PIPE_READERS(*inode) -= decr;
 304        PIPE_WRITERS(*inode) -= decw;
 305        if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
 306                struct pipe_inode_info *info = inode->i_pipe;
 307                inode->i_pipe = NULL;
 308                free_page((unsigned long) info->base);
 309                kfree(info);
 310        } else {
 311                wake_up_interruptible(PIPE_WAIT(*inode));
 312        }
 313        up(PIPE_SEM(*inode));
 314
 315        return 0;
 316}
 317
 318static int
 319pipe_read_release(struct inode *inode, struct file *filp)
 320{
 321        return pipe_release(inode, 1, 0);
 322}
 323
 324static int
 325pipe_write_release(struct inode *inode, struct file *filp)
 326{
 327        return pipe_release(inode, 0, 1);
 328}
 329
 330static int
 331pipe_rdwr_release(struct inode *inode, struct file *filp)
 332{
 333        int decr, decw;
 334
 335        decr = (filp->f_mode & FMODE_READ) != 0;
 336        decw = (filp->f_mode & FMODE_WRITE) != 0;
 337        return pipe_release(inode, decr, decw);
 338}
 339
 340static int
 341pipe_read_open(struct inode *inode, struct file *filp)
 342{
 343        /* We could have perhaps used atomic_t, but this and friends
 344           below are the only places.  So it doesn't seem worthwhile.  */
 345        down(PIPE_SEM(*inode));
 346        PIPE_READERS(*inode)++;
 347        up(PIPE_SEM(*inode));
 348
 349        return 0;
 350}
 351
 352static int
 353pipe_write_open(struct inode *inode, struct file *filp)
 354{
 355        down(PIPE_SEM(*inode));
 356        PIPE_WRITERS(*inode)++;
 357        up(PIPE_SEM(*inode));
 358
 359        return 0;
 360}
 361
 362static int
 363pipe_rdwr_open(struct inode *inode, struct file *filp)
 364{
 365        down(PIPE_SEM(*inode));
 366        if (filp->f_mode & FMODE_READ)
 367                PIPE_READERS(*inode)++;
 368        if (filp->f_mode & FMODE_WRITE)
 369                PIPE_WRITERS(*inode)++;
 370        up(PIPE_SEM(*inode));
 371
 372        return 0;
 373}
 374
 375/*
 376 * The file_operations structs are not static because they
 377 * are also used in linux/fs/fifo.c to do operations on FIFOs.
 378 */
 379struct file_operations read_fifo_fops = {
 380        llseek:         no_llseek,
 381        read:           pipe_read,
 382        write:          bad_pipe_w,
 383        poll:           fifo_poll,
 384        ioctl:          pipe_ioctl,
 385        open:           pipe_read_open,
 386        release:        pipe_read_release,
 387};
 388
 389struct file_operations write_fifo_fops = {
 390        llseek:         no_llseek,
 391        read:           bad_pipe_r,
 392        write:          pipe_write,
 393        poll:           fifo_poll,
 394        ioctl:          pipe_ioctl,
 395        open:           pipe_write_open,
 396        release:        pipe_write_release,
 397};
 398
 399struct file_operations rdwr_fifo_fops = {
 400        llseek:         no_llseek,
 401        read:           pipe_read,
 402        write:          pipe_write,
 403        poll:           fifo_poll,
 404        ioctl:          pipe_ioctl,
 405        open:           pipe_rdwr_open,
 406        release:        pipe_rdwr_release,
 407};
 408
 409struct file_operations read_pipe_fops = {
 410        llseek:         no_llseek,
 411        read:           pipe_read,
 412        write:          bad_pipe_w,
 413        poll:           pipe_poll,
 414        ioctl:          pipe_ioctl,
 415        open:           pipe_read_open,
 416        release:        pipe_read_release,
 417};
 418
 419struct file_operations write_pipe_fops = {
 420        llseek:         no_llseek,
 421        read:           bad_pipe_r,
 422        write:          pipe_write,
 423        poll:           pipe_poll,
 424        ioctl:          pipe_ioctl,
 425        open:           pipe_write_open,
 426        release:        pipe_write_release,
 427};
 428
 429struct file_operations rdwr_pipe_fops = {
 430        llseek:         no_llseek,
 431        read:           pipe_read,
 432        write:          pipe_write,
 433        poll:           pipe_poll,
 434        ioctl:          pipe_ioctl,
 435        open:           pipe_rdwr_open,
 436        release:        pipe_rdwr_release,
 437};
 438
 439struct inode* pipe_new(struct inode* inode)
 440{
 441        unsigned long page;
 442
 443        page = __get_free_page(GFP_USER);
 444        if (!page)
 445                return NULL;
 446
 447        inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 448        if (!inode->i_pipe)
 449                goto fail_page;
 450
 451        init_waitqueue_head(PIPE_WAIT(*inode));
 452        PIPE_BASE(*inode) = (char*) page;
 453        PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
 454        PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
 455        PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;
 456        PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
 457
 458        return inode;
 459fail_page:
 460        free_page(page);
 461        return NULL;
 462}
 463
 464static struct vfsmount *pipe_mnt;
 465static int pipefs_delete_dentry(struct dentry *dentry)
 466{
 467        return 1;
 468}
 469static struct dentry_operations pipefs_dentry_operations = {
 470        d_delete:       pipefs_delete_dentry,
 471};
 472
 473static struct inode * get_pipe_inode(void)
 474{
 475        struct inode *inode = new_inode(pipe_mnt->mnt_sb);
 476
 477        if (!inode)
 478                goto fail_inode;
 479
 480        if(!pipe_new(inode))
 481                goto fail_iput;
 482        PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
 483        inode->i_fop = &rdwr_pipe_fops;
 484
 485        /*
 486         * Mark the inode dirty from the very beginning,
 487         * that way it will never be moved to the dirty
 488         * list because "mark_inode_dirty()" will think
 489         * that it already _is_ on the dirty list.
 490         */
 491        inode->i_state = I_DIRTY;
 492        inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
 493        inode->i_uid = current->fsuid;
 494        inode->i_gid = current->fsgid;
 495        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 496        inode->i_blksize = PAGE_SIZE;
 497        return inode;
 498
 499fail_iput:
 500        iput(inode);
 501fail_inode:
 502        return NULL;
 503}
 504
 505int do_pipe(int *fd)
 506{
 507        struct qstr this;
 508        char name[32];
 509        struct dentry *dentry;
 510        struct inode * inode;
 511        struct file *f1, *f2;
 512        int error;
 513        int i,j;
 514
 515        error = -ENFILE;
 516        f1 = get_empty_filp();
 517        if (!f1)
 518                goto no_files;
 519
 520        f2 = get_empty_filp();
 521        if (!f2)
 522                goto close_f1;
 523
 524        inode = get_pipe_inode();
 525        if (!inode)
 526                goto close_f12;
 527
 528        error = get_unused_fd();
 529        if (error < 0)
 530                goto close_f12_inode;
 531        i = error;
 532
 533        error = get_unused_fd();
 534        if (error < 0)
 535                goto close_f12_inode_i;
 536        j = error;
 537
 538        error = -ENOMEM;
 539        sprintf(name, "[%lu]", inode->i_ino);
 540        this.name = name;
 541        this.len = strlen(name);
 542        this.hash = inode->i_ino; /* will go */
 543        dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
 544        if (!dentry)
 545                goto close_f12_inode_i_j;
 546        dentry->d_op = &pipefs_dentry_operations;
 547        d_add(dentry, inode);
 548        f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
 549        f1->f_dentry = f2->f_dentry = dget(dentry);
 550
 551        /* read file */
 552        f1->f_pos = f2->f_pos = 0;
 553        f1->f_flags = O_RDONLY;
 554        f1->f_op = &read_pipe_fops;
 555        f1->f_mode = 1;
 556        f1->f_version = 0;
 557
 558        /* write file */
 559        f2->f_flags = O_WRONLY;
 560        f2->f_op = &write_pipe_fops;
 561        f2->f_mode = 2;
 562        f2->f_version = 0;
 563
 564        fd_install(i, f1);
 565        fd_install(j, f2);
 566        fd[0] = i;
 567        fd[1] = j;
 568        return 0;
 569
 570close_f12_inode_i_j:
 571        put_unused_fd(j);
 572close_f12_inode_i:
 573        put_unused_fd(i);
 574close_f12_inode:
 575        free_page((unsigned long) PIPE_BASE(*inode));
 576        kfree(inode->i_pipe);
 577        inode->i_pipe = NULL;
 578        iput(inode);
 579close_f12:
 580        put_filp(f2);
 581close_f1:
 582        put_filp(f1);
 583no_files:
 584        return error;   
 585}
 586
 587/*
 588 * pipefs should _never_ be mounted by userland - too much of security hassle,
 589 * no real gain from having the whole whorehouse mounted. So we don't need
 590 * any operations on the root directory. However, we need a non-trivial
 591 * d_name - pipe: will go nicely and kill the special-casing in procfs.
 592 */
 593static int pipefs_statfs(struct super_block *sb, struct statfs *buf)
 594{
 595        buf->f_type = PIPEFS_MAGIC;
 596        buf->f_bsize = 1024;
 597        buf->f_namelen = 255;
 598        return 0;
 599}
 600
 601static struct super_operations pipefs_ops = {
 602        statfs:         pipefs_statfs,
 603};
 604
 605static struct super_block * pipefs_read_super(struct super_block *sb, void *data, int silent)
 606{
 607        struct inode *root = new_inode(sb);
 608        if (!root)
 609                return NULL;
 610        root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
 611        root->i_uid = root->i_gid = 0;
 612        root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
 613        sb->s_blocksize = 1024;
 614        sb->s_blocksize_bits = 10;
 615        sb->s_magic = PIPEFS_MAGIC;
 616        sb->s_op        = &pipefs_ops;
 617        sb->s_root = d_alloc(NULL, &(const struct qstr) { "pipe:", 5, 0 });
 618        if (!sb->s_root) {
 619                iput(root);
 620                return NULL;
 621        }
 622        sb->s_root->d_sb = sb;
 623        sb->s_root->d_parent = sb->s_root;
 624        d_instantiate(sb->s_root, root);
 625        return sb;
 626}
 627
 628static DECLARE_FSTYPE(pipe_fs_type, "pipefs", pipefs_read_super, FS_NOMOUNT);
 629
 630static int __init init_pipe_fs(void)
 631{
 632        int err = register_filesystem(&pipe_fs_type);
 633        if (!err) {
 634                pipe_mnt = kern_mount(&pipe_fs_type);
 635                err = PTR_ERR(pipe_mnt);
 636                if (IS_ERR(pipe_mnt))
 637                        unregister_filesystem(&pipe_fs_type);
 638                else
 639                        err = 0;
 640        }
 641        return err;
 642}
 643
 644static void __exit exit_pipe_fs(void)
 645{
 646        unregister_filesystem(&pipe_fs_type);
 647        mntput(pipe_mnt);
 648}
 649
 650module_init(init_pipe_fs)
 651module_exit(exit_pipe_fs)
 652
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.