linux-old/fs/pipe.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/pipe.c
   3 *
   4 *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
   5 */
   6
   7#include <linux/mm.h>
   8#include <linux/file.h>
   9#include <linux/poll.h>
  10#include <linux/slab.h>
  11#include <linux/module.h>
  12#include <linux/init.h>
  13
  14#include <asm/uaccess.h>
  15#include <asm/ioctls.h>
  16
  17/*
  18 * We use a start+len construction, which provides full use of the 
  19 * allocated memory.
  20 * -- Florian Coosmann (FGC)
  21 * 
  22 * Reads with count = 0 should always return 0.
  23 * -- Julian Bradfield 1999-06-07.
  24 */
  25
  26/* Drop the inode semaphore and wait for a pipe event, atomically */
  27void pipe_wait(struct inode * inode)
  28{
  29        DECLARE_WAITQUEUE(wait, current);
  30        current->state = TASK_INTERRUPTIBLE;
  31        add_wait_queue(PIPE_WAIT(*inode), &wait);
  32        up(PIPE_SEM(*inode));
  33        schedule();
  34        remove_wait_queue(PIPE_WAIT(*inode), &wait);
  35        current->state = TASK_RUNNING;
  36        down(PIPE_SEM(*inode));
  37}
  38
  39static ssize_t
  40pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
  41{
  42        struct inode *inode = filp->f_dentry->d_inode;
  43        ssize_t size, read, ret;
  44
  45        /* Seeks are not allowed on pipes.  */
  46        ret = -ESPIPE;
  47        read = 0;
  48        if (ppos != &filp->f_pos)
  49                goto out_nolock;
  50
  51        /* Always return 0 on null read.  */
  52        ret = 0;
  53        if (count == 0)
  54                goto out_nolock;
  55
  56        /* Get the pipe semaphore */
  57        ret = -ERESTARTSYS;
  58        if (down_interruptible(PIPE_SEM(*inode)))
  59                goto out_nolock;
  60
  61        if (PIPE_EMPTY(*inode)) {
  62do_more_read:
  63                ret = 0;
  64                if (!PIPE_WRITERS(*inode))
  65                        goto out;
  66
  67                ret = -EAGAIN;
  68                if (filp->f_flags & O_NONBLOCK)
  69                        goto out;
  70
  71                for (;;) {
  72                        PIPE_WAITING_READERS(*inode)++;
  73                        pipe_wait(inode);
  74                        PIPE_WAITING_READERS(*inode)--;
  75                        ret = -ERESTARTSYS;
  76                        if (signal_pending(current))
  77                                goto out;
  78                        ret = 0;
  79                        if (!PIPE_EMPTY(*inode))
  80                                break;
  81                        if (!PIPE_WRITERS(*inode))
  82                                goto out;
  83                }
  84        }
  85
  86        /* Read what data is available.  */
  87        ret = -EFAULT;
  88        while (count > 0 && (size = PIPE_LEN(*inode))) {
  89                char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
  90                ssize_t chars = PIPE_MAX_RCHUNK(*inode);
  91
  92                if (chars > count)
  93                        chars = count;
  94                if (chars > size)
  95                        chars = size;
  96
  97                if (copy_to_user(buf, pipebuf, chars))
  98                        goto out;
  99
 100                read += chars;
 101                PIPE_START(*inode) += chars;
 102                PIPE_START(*inode) &= (PIPE_SIZE - 1);
 103                PIPE_LEN(*inode) -= chars;
 104                count -= chars;
 105                buf += chars;
 106        }
 107
 108        /* Cache behaviour optimization */
 109        if (!PIPE_LEN(*inode))
 110                PIPE_START(*inode) = 0;
 111
 112        if (count && PIPE_WAITING_WRITERS(*inode) && !(filp->f_flags & O_NONBLOCK)) {
 113                /*
 114                 * We know that we are going to sleep: signal
 115                 * writers synchronously that there is more
 116                 * room.
 117                 */
 118                wake_up_interruptible_sync(PIPE_WAIT(*inode));
 119                if (!PIPE_EMPTY(*inode))
 120                        BUG();
 121                goto do_more_read;
 122        }
 123        /* Signal writers asynchronously that there is more room.  */
 124        wake_up_interruptible(PIPE_WAIT(*inode));
 125
 126        ret = read;
 127out:
 128        up(PIPE_SEM(*inode));
 129out_nolock:
 130        if (read)
 131                ret = read;
 132
 133        UPDATE_ATIME(inode);
 134        return ret;
 135}
 136
 137static ssize_t
 138pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
 139{
 140        struct inode *inode = filp->f_dentry->d_inode;
 141        ssize_t free, written, ret;
 142
 143        /* Seeks are not allowed on pipes.  */
 144        ret = -ESPIPE;
 145        written = 0;
 146        if (ppos != &filp->f_pos)
 147                goto out_nolock;
 148
 149        /* Null write succeeds.  */
 150        ret = 0;
 151        if (count == 0)
 152                goto out_nolock;
 153
 154        ret = -ERESTARTSYS;
 155        if (down_interruptible(PIPE_SEM(*inode)))
 156                goto out_nolock;
 157
 158        /* No readers yields SIGPIPE.  */
 159        if (!PIPE_READERS(*inode))
 160                goto sigpipe;
 161
 162        /* If count <= PIPE_BUF, we have to make it atomic.  */
 163        free = (count <= PIPE_BUF ? count : 1);
 164
 165        /* Wait, or check for, available space.  */
 166        if (filp->f_flags & O_NONBLOCK) {
 167                ret = -EAGAIN;
 168                if (PIPE_FREE(*inode) < free)
 169                        goto out;
 170        } else {
 171                while (PIPE_FREE(*inode) < free) {
 172                        PIPE_WAITING_WRITERS(*inode)++;
 173                        pipe_wait(inode);
 174                        PIPE_WAITING_WRITERS(*inode)--;
 175                        ret = -ERESTARTSYS;
 176                        if (signal_pending(current))
 177                                goto out;
 178
 179                        if (!PIPE_READERS(*inode))
 180                                goto sigpipe;
 181                }
 182        }
 183
 184        /* Copy into available space.  */
 185        ret = -EFAULT;
 186        while (count > 0) {
 187                int space;
 188                char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
 189                ssize_t chars = PIPE_MAX_WCHUNK(*inode);
 190
 191                if ((space = PIPE_FREE(*inode)) != 0) {
 192                        if (chars > count)
 193                                chars = count;
 194                        if (chars > space)
 195                                chars = space;
 196
 197                        if (copy_from_user(pipebuf, buf, chars))
 198                                goto out;
 199
 200                        written += chars;
 201                        PIPE_LEN(*inode) += chars;
 202                        count -= chars;
 203                        buf += chars;
 204                        space = PIPE_FREE(*inode);
 205                        continue;
 206                }
 207
 208                ret = written;
 209                if (filp->f_flags & O_NONBLOCK)
 210                        break;
 211
 212                do {
 213                        /*
 214                         * Synchronous wake-up: it knows that this process
 215                         * is going to give up this CPU, so it doesn't have
 216                         * to do idle reschedules.
 217                         */
 218                        wake_up_interruptible_sync(PIPE_WAIT(*inode));
 219                        PIPE_WAITING_WRITERS(*inode)++;
 220                        pipe_wait(inode);
 221                        PIPE_WAITING_WRITERS(*inode)--;
 222                        if (signal_pending(current))
 223                                goto out;
 224                        if (!PIPE_READERS(*inode))
 225                                goto sigpipe;
 226                } while (!PIPE_FREE(*inode));
 227                ret = -EFAULT;
 228        }
 229
 230        /* Signal readers asynchronously that there is more data.  */
 231        wake_up_interruptible(PIPE_WAIT(*inode));
 232
 233        update_mctime(inode);
 234
 235out:
 236        up(PIPE_SEM(*inode));
 237out_nolock:
 238        if (written)
 239                ret = written;
 240        return ret;
 241
 242sigpipe:
 243        if (written)
 244                goto out;
 245        up(PIPE_SEM(*inode));
 246        send_sig(SIGPIPE, current, 0);
 247        return -EPIPE;
 248}
 249
 250static ssize_t
 251bad_pipe_r(struct file *filp, char *buf, size_t count, loff_t *ppos)
 252{
 253        return -EBADF;
 254}
 255
 256static ssize_t
 257bad_pipe_w(struct file *filp, const char *buf, size_t count, loff_t *ppos)
 258{
 259        return -EBADF;
 260}
 261
 262static int
 263pipe_ioctl(struct inode *pino, struct file *filp,
 264           unsigned int cmd, unsigned long arg)
 265{
 266        switch (cmd) {
 267                case FIONREAD:
 268                        return put_user(PIPE_LEN(*pino), (int *)arg);
 269                default:
 270                        return -EINVAL;
 271        }
 272}
 273
 274/* No kernel lock held - fine */
 275static unsigned int
 276pipe_poll(struct file *filp, poll_table *wait)
 277{
 278        unsigned int mask;
 279        struct inode *inode = filp->f_dentry->d_inode;
 280
 281        poll_wait(filp, PIPE_WAIT(*inode), wait);
 282
 283        /* Reading only -- no need for acquiring the semaphore.  */
 284        mask = POLLIN | POLLRDNORM;
 285        if (PIPE_EMPTY(*inode))
 286                mask = POLLOUT | POLLWRNORM;
 287        if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
 288                mask |= POLLHUP;
 289        if (!PIPE_READERS(*inode))
 290                mask |= POLLERR;
 291
 292        return mask;
 293}
 294
 295/* FIXME: most Unices do not set POLLERR for fifos */
 296#define fifo_poll pipe_poll
 297
 298static int
 299pipe_release(struct inode *inode, int decr, int decw)
 300{
 301        down(PIPE_SEM(*inode));
 302        PIPE_READERS(*inode) -= decr;
 303        PIPE_WRITERS(*inode) -= decw;
 304        if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
 305                struct pipe_inode_info *info = inode->i_pipe;
 306                inode->i_pipe = NULL;
 307                free_page((unsigned long) info->base);
 308                kfree(info);
 309        } else {
 310                wake_up_interruptible(PIPE_WAIT(*inode));
 311        }
 312        up(PIPE_SEM(*inode));
 313
 314        return 0;
 315}
 316
 317static int
 318pipe_read_release(struct inode *inode, struct file *filp)
 319{
 320        return pipe_release(inode, 1, 0);
 321}
 322
 323static int
 324pipe_write_release(struct inode *inode, struct file *filp)
 325{
 326        return pipe_release(inode, 0, 1);
 327}
 328
 329static int
 330pipe_rdwr_release(struct inode *inode, struct file *filp)
 331{
 332        int decr, decw;
 333
 334        decr = (filp->f_mode & FMODE_READ) != 0;
 335        decw = (filp->f_mode & FMODE_WRITE) != 0;
 336        return pipe_release(inode, decr, decw);
 337}
 338
 339static int
 340pipe_read_open(struct inode *inode, struct file *filp)
 341{
 342        /* We could have perhaps used atomic_t, but this and friends
 343           below are the only places.  So it doesn't seem worthwhile.  */
 344        down(PIPE_SEM(*inode));
 345        PIPE_READERS(*inode)++;
 346        up(PIPE_SEM(*inode));
 347
 348        return 0;
 349}
 350
 351static int
 352pipe_write_open(struct inode *inode, struct file *filp)
 353{
 354        down(PIPE_SEM(*inode));
 355        PIPE_WRITERS(*inode)++;
 356        up(PIPE_SEM(*inode));
 357
 358        return 0;
 359}
 360
 361static int
 362pipe_rdwr_open(struct inode *inode, struct file *filp)
 363{
 364        down(PIPE_SEM(*inode));
 365        if (filp->f_mode & FMODE_READ)
 366                PIPE_READERS(*inode)++;
 367        if (filp->f_mode & FMODE_WRITE)
 368                PIPE_WRITERS(*inode)++;
 369        up(PIPE_SEM(*inode));
 370
 371        return 0;
 372}
 373
 374/*
 375 * The file_operations structs are not static because they
 376 * are also used in linux/fs/fifo.c to do operations on FIFOs.
 377 */
 378struct file_operations read_fifo_fops = {
 379        llseek:         no_llseek,
 380        read:           pipe_read,
 381        write:          bad_pipe_w,
 382        poll:           fifo_poll,
 383        ioctl:          pipe_ioctl,
 384        open:           pipe_read_open,
 385        release:        pipe_read_release,
 386};
 387
 388struct file_operations write_fifo_fops = {
 389        llseek:         no_llseek,
 390        read:           bad_pipe_r,
 391        write:          pipe_write,
 392        poll:           fifo_poll,
 393        ioctl:          pipe_ioctl,
 394        open:           pipe_write_open,
 395        release:        pipe_write_release,
 396};
 397
 398struct file_operations rdwr_fifo_fops = {
 399        llseek:         no_llseek,
 400        read:           pipe_read,
 401        write:          pipe_write,
 402        poll:           fifo_poll,
 403        ioctl:          pipe_ioctl,
 404        open:           pipe_rdwr_open,
 405        release:        pipe_rdwr_release,
 406};
 407
 408struct file_operations read_pipe_fops = {
 409        llseek:         no_llseek,
 410        read:           pipe_read,
 411        write:          bad_pipe_w,
 412        poll:           pipe_poll,
 413        ioctl:          pipe_ioctl,
 414        open:           pipe_read_open,
 415        release:        pipe_read_release,
 416};
 417
 418struct file_operations write_pipe_fops = {
 419        llseek:         no_llseek,
 420        read:           bad_pipe_r,
 421        write:          pipe_write,
 422        poll:           pipe_poll,
 423        ioctl:          pipe_ioctl,
 424        open:           pipe_write_open,
 425        release:        pipe_write_release,
 426};
 427
 428struct file_operations rdwr_pipe_fops = {
 429        llseek:         no_llseek,
 430        read:           pipe_read,
 431        write:          pipe_write,
 432        poll:           pipe_poll,
 433        ioctl:          pipe_ioctl,
 434        open:           pipe_rdwr_open,
 435        release:        pipe_rdwr_release,
 436};
 437
 438struct inode* pipe_new(struct inode* inode)
 439{
 440        unsigned long page;
 441
 442        page = __get_free_page(GFP_USER);
 443        if (!page)
 444                return NULL;
 445
 446        inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 447        if (!inode->i_pipe)
 448                goto fail_page;
 449
 450        init_waitqueue_head(PIPE_WAIT(*inode));
 451        PIPE_BASE(*inode) = (char*) page;
 452        PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
 453        PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
 454        PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;
 455        PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
 456
 457        return inode;
 458fail_page:
 459        free_page(page);
 460        return NULL;
 461}
 462
 463static struct vfsmount *pipe_mnt;
 464static int pipefs_delete_dentry(struct dentry *dentry)
 465{
 466        return 1;
 467}
 468static struct dentry_operations pipefs_dentry_operations = {
 469        d_delete:       pipefs_delete_dentry,
 470};
 471
 472static struct inode * get_pipe_inode(void)
 473{
 474        struct inode *inode = new_inode(pipe_mnt->mnt_sb);
 475
 476        if (!inode)
 477                goto fail_inode;
 478
 479        if(!pipe_new(inode))
 480                goto fail_iput;
 481        PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
 482        inode->i_fop = &rdwr_pipe_fops;
 483
 484        /*
 485         * Mark the inode dirty from the very beginning,
 486         * that way it will never be moved to the dirty
 487         * list because "mark_inode_dirty()" will think
 488         * that it already _is_ on the dirty list.
 489         */
 490        inode->i_state = I_DIRTY;
 491        inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
 492        inode->i_uid = current->fsuid;
 493        inode->i_gid = current->fsgid;
 494        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 495        inode->i_blksize = PAGE_SIZE;
 496        return inode;
 497
 498fail_iput:
 499        iput(inode);
 500fail_inode:
 501        return NULL;
 502}
 503
 504int do_pipe(int *fd)
 505{
 506        struct qstr this;
 507        char name[32];
 508        struct dentry *dentry;
 509        struct inode * inode;
 510        struct file *f1, *f2;
 511        int error;
 512        int i,j;
 513
 514        error = -ENFILE;
 515        f1 = get_empty_filp();
 516        if (!f1)
 517                goto no_files;
 518
 519        f2 = get_empty_filp();
 520        if (!f2)
 521                goto close_f1;
 522
 523        inode = get_pipe_inode();
 524        if (!inode)
 525                goto close_f12;
 526
 527        error = get_unused_fd();
 528        if (error < 0)
 529                goto close_f12_inode;
 530        i = error;
 531
 532        error = get_unused_fd();
 533        if (error < 0)
 534                goto close_f12_inode_i;
 535        j = error;
 536
 537        error = -ENOMEM;
 538        sprintf(name, "[%lu]", inode->i_ino);
 539        this.name = name;
 540        this.len = strlen(name);
 541        this.hash = inode->i_ino; /* will go */
 542        dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
 543        if (!dentry)
 544                goto close_f12_inode_i_j;
 545        dentry->d_op = &pipefs_dentry_operations;
 546        d_add(dentry, inode);
 547        f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
 548        f1->f_dentry = f2->f_dentry = dget(dentry);
 549
 550        /* read file */
 551        f1->f_pos = f2->f_pos = 0;
 552        f1->f_flags = O_RDONLY;
 553        f1->f_op = &read_pipe_fops;
 554        f1->f_mode = 1;
 555        f1->f_version = 0;
 556
 557        /* write file */
 558        f2->f_flags = O_WRONLY;
 559        f2->f_op = &write_pipe_fops;
 560        f2->f_mode = 2;
 561        f2->f_version = 0;
 562
 563        fd_install(i, f1);
 564        fd_install(j, f2);
 565        fd[0] = i;
 566        fd[1] = j;
 567        return 0;
 568
 569close_f12_inode_i_j:
 570        put_unused_fd(j);
 571close_f12_inode_i:
 572        put_unused_fd(i);
 573close_f12_inode:
 574        free_page((unsigned long) PIPE_BASE(*inode));
 575        kfree(inode->i_pipe);
 576        inode->i_pipe = NULL;
 577        iput(inode);
 578close_f12:
 579        put_filp(f2);
 580close_f1:
 581        put_filp(f1);
 582no_files:
 583        return error;   
 584}
 585
 586/*
 587 * pipefs should _never_ be mounted by userland - too much of security hassle,
 588 * no real gain from having the whole whorehouse mounted. So we don't need
 589 * any operations on the root directory. However, we need a non-trivial
 590 * d_name - pipe: will go nicely and kill the special-casing in procfs.
 591 */
 592static int pipefs_statfs(struct super_block *sb, struct statfs *buf)
 593{
 594        buf->f_type = PIPEFS_MAGIC;
 595        buf->f_bsize = 1024;
 596        buf->f_namelen = 255;
 597        return 0;
 598}
 599
 600static struct super_operations pipefs_ops = {
 601        statfs:         pipefs_statfs,
 602};
 603
 604static struct super_block * pipefs_read_super(struct super_block *sb, void *data, int silent)
 605{
 606        struct inode *root = new_inode(sb);
 607        if (!root)
 608                return NULL;
 609        root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
 610        root->i_uid = root->i_gid = 0;
 611        root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
 612        sb->s_blocksize = 1024;
 613        sb->s_blocksize_bits = 10;
 614        sb->s_magic = PIPEFS_MAGIC;
 615        sb->s_op        = &pipefs_ops;
 616        sb->s_root = d_alloc(NULL, &(const struct qstr) { "pipe:", 5, 0 });
 617        if (!sb->s_root) {
 618                iput(root);
 619                return NULL;
 620        }
 621        sb->s_root->d_sb = sb;
 622        sb->s_root->d_parent = sb->s_root;
 623        d_instantiate(sb->s_root, root);
 624        return sb;
 625}
 626
 627static DECLARE_FSTYPE(pipe_fs_type, "pipefs", pipefs_read_super, FS_NOMOUNT);
 628
 629static int __init init_pipe_fs(void)
 630{
 631        int err = register_filesystem(&pipe_fs_type);
 632        if (!err) {
 633                pipe_mnt = kern_mount(&pipe_fs_type);
 634                err = PTR_ERR(pipe_mnt);
 635                if (IS_ERR(pipe_mnt))
 636                        unregister_filesystem(&pipe_fs_type);
 637                else
 638                        err = 0;
 639        }
 640        return err;
 641}
 642
 643static void __exit exit_pipe_fs(void)
 644{
 645        unregister_filesystem(&pipe_fs_type);
 646        mntput(pipe_mnt);
 647}
 648
 649module_init(init_pipe_fs)
 650module_exit(exit_pipe_fs)
 651
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.