linux-bk/fs/pipe.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/pipe.c
   3 *
   4 *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
   5 */
   6
   7#include <linux/mm.h>
   8#include <linux/file.h>
   9#include <linux/poll.h>
  10#include <linux/slab.h>
  11#include <linux/module.h>
  12#include <linux/init.h>
  13#include <linux/fs.h>
  14
  15#include <asm/uaccess.h>
  16#include <asm/ioctls.h>
  17
  18/*
  19 * We use a start+len construction, which provides full use of the 
  20 * allocated memory.
  21 * -- Florian Coosmann (FGC)
  22 * 
  23 * Reads with count = 0 should always return 0.
  24 * -- Julian Bradfield 1999-06-07.
  25 *
  26 * FIFOs and Pipes now generate SIGIO for both readers and writers.
  27 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
  28 */
  29
  30/* Drop the inode semaphore and wait for a pipe event, atomically */
  31void pipe_wait(struct inode * inode)
  32{
  33        DECLARE_WAITQUEUE(wait, current);
  34        current->state = TASK_INTERRUPTIBLE;
  35        add_wait_queue(PIPE_WAIT(*inode), &wait);
  36        up(PIPE_SEM(*inode));
  37        schedule();
  38        remove_wait_queue(PIPE_WAIT(*inode), &wait);
  39        current->state = TASK_RUNNING;
  40        down(PIPE_SEM(*inode));
  41}
  42
  43static ssize_t
  44pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
  45{
  46        struct inode *inode = filp->f_dentry->d_inode;
  47        ssize_t size, read, ret;
  48
  49        /* Seeks are not allowed on pipes.  */
  50        ret = -ESPIPE;
  51        read = 0;
  52        if (ppos != &filp->f_pos)
  53                goto out_nolock;
  54
  55        /* Always return 0 on null read.  */
  56        ret = 0;
  57        if (count == 0)
  58                goto out_nolock;
  59
  60        /* Get the pipe semaphore */
  61        ret = -ERESTARTSYS;
  62        if (down_interruptible(PIPE_SEM(*inode)))
  63                goto out_nolock;
  64
  65        if (PIPE_EMPTY(*inode)) {
  66do_more_read:
  67                ret = 0;
  68                if (!PIPE_WRITERS(*inode))
  69                        goto out;
  70
  71                ret = -EAGAIN;
  72                if (filp->f_flags & O_NONBLOCK)
  73                        goto out;
  74
  75                for (;;) {
  76                        PIPE_WAITING_READERS(*inode)++;
  77                        pipe_wait(inode);
  78                        PIPE_WAITING_READERS(*inode)--;
  79                        ret = -ERESTARTSYS;
  80                        if (signal_pending(current))
  81                                goto out;
  82                        ret = 0;
  83                        if (!PIPE_EMPTY(*inode))
  84                                break;
  85                        if (!PIPE_WRITERS(*inode))
  86                                goto out;
  87                }
  88        }
  89
  90        /* Read what data is available.  */
  91        ret = -EFAULT;
  92        while (count > 0 && (size = PIPE_LEN(*inode))) {
  93                char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
  94                ssize_t chars = PIPE_MAX_RCHUNK(*inode);
  95
  96                if (chars > count)
  97                        chars = count;
  98                if (chars > size)
  99                        chars = size;
 100
 101                if (copy_to_user(buf, pipebuf, chars))
 102                        goto out;
 103
 104                read += chars;
 105                PIPE_START(*inode) += chars;
 106                PIPE_START(*inode) &= (PIPE_SIZE - 1);
 107                PIPE_LEN(*inode) -= chars;
 108                count -= chars;
 109                buf += chars;
 110        }
 111
 112        /* Cache behaviour optimization */
 113        if (!PIPE_LEN(*inode))
 114                PIPE_START(*inode) = 0;
 115
 116        if (count && PIPE_WAITING_WRITERS(*inode) && !(filp->f_flags & O_NONBLOCK)) {
 117                /*
 118                 * We know that we are going to sleep: signal
 119                 * writers synchronously that there is more
 120                 * room.
 121                 */
 122                wake_up_interruptible_sync(PIPE_WAIT(*inode));
 123                kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
 124                if (!PIPE_EMPTY(*inode))
 125                        BUG();
 126                goto do_more_read;
 127        }
 128        /* Signal writers asynchronously that there is more room.  */
 129        wake_up_interruptible(PIPE_WAIT(*inode));
 130        kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
 131
 132        ret = read;
 133out:
 134        up(PIPE_SEM(*inode));
 135out_nolock:
 136        if (read)
 137                ret = read;
 138        return ret;
 139}
 140
 141static ssize_t
 142pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
 143{
 144        struct inode *inode = filp->f_dentry->d_inode;
 145        ssize_t free, written, ret;
 146
 147        /* Seeks are not allowed on pipes.  */
 148        ret = -ESPIPE;
 149        written = 0;
 150        if (ppos != &filp->f_pos)
 151                goto out_nolock;
 152
 153        /* Null write succeeds.  */
 154        ret = 0;
 155        if (count == 0)
 156                goto out_nolock;
 157
 158        ret = -ERESTARTSYS;
 159        if (down_interruptible(PIPE_SEM(*inode)))
 160                goto out_nolock;
 161
 162        /* No readers yields SIGPIPE.  */
 163        if (!PIPE_READERS(*inode))
 164                goto sigpipe;
 165
 166        /* If count <= PIPE_BUF, we have to make it atomic.  */
 167        free = (count <= PIPE_BUF ? count : 1);
 168
 169        /* Wait, or check for, available space.  */
 170        if (filp->f_flags & O_NONBLOCK) {
 171                ret = -EAGAIN;
 172                if (PIPE_FREE(*inode) < free)
 173                        goto out;
 174        } else {
 175                while (PIPE_FREE(*inode) < free) {
 176                        PIPE_WAITING_WRITERS(*inode)++;
 177                        pipe_wait(inode);
 178                        PIPE_WAITING_WRITERS(*inode)--;
 179                        ret = -ERESTARTSYS;
 180                        if (signal_pending(current))
 181                                goto out;
 182
 183                        if (!PIPE_READERS(*inode))
 184                                goto sigpipe;
 185                }
 186        }
 187
 188        /* Copy into available space.  */
 189        ret = -EFAULT;
 190        while (count > 0) {
 191                int space;
 192                char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
 193                ssize_t chars = PIPE_MAX_WCHUNK(*inode);
 194
 195                if ((space = PIPE_FREE(*inode)) != 0) {
 196                        if (chars > count)
 197                                chars = count;
 198                        if (chars > space)
 199                                chars = space;
 200
 201                        if (copy_from_user(pipebuf, buf, chars))
 202                                goto out;
 203
 204                        written += chars;
 205                        PIPE_LEN(*inode) += chars;
 206                        count -= chars;
 207                        buf += chars;
 208                        space = PIPE_FREE(*inode);
 209                        continue;
 210                }
 211
 212                ret = written;
 213                if (filp->f_flags & O_NONBLOCK)
 214                        break;
 215
 216                do {
 217                        /*
 218                         * Synchronous wake-up: it knows that this process
 219                         * is going to give up this CPU, so it doesn't have
 220                         * to do idle reschedules.
 221                         */
 222                        wake_up_interruptible_sync(PIPE_WAIT(*inode));
 223                        kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
 224                        PIPE_WAITING_WRITERS(*inode)++;
 225                        pipe_wait(inode);
 226                        PIPE_WAITING_WRITERS(*inode)--;
 227                        if (signal_pending(current))
 228                                goto out;
 229                        if (!PIPE_READERS(*inode))
 230                                goto sigpipe;
 231                } while (!PIPE_FREE(*inode));
 232                ret = -EFAULT;
 233        }
 234
 235        /* Signal readers asynchronously that there is more data.  */
 236        wake_up_interruptible(PIPE_WAIT(*inode));
 237        kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
 238
 239        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 240        mark_inode_dirty(inode);
 241
 242out:
 243        up(PIPE_SEM(*inode));
 244out_nolock:
 245        if (written)
 246                ret = written;
 247        return ret;
 248
 249sigpipe:
 250        if (written)
 251                goto out;
 252        up(PIPE_SEM(*inode));
 253        send_sig(SIGPIPE, current, 0);
 254        return -EPIPE;
 255}
 256
 257static ssize_t
 258bad_pipe_r(struct file *filp, char *buf, size_t count, loff_t *ppos)
 259{
 260        return -EBADF;
 261}
 262
 263static ssize_t
 264bad_pipe_w(struct file *filp, const char *buf, size_t count, loff_t *ppos)
 265{
 266        return -EBADF;
 267}
 268
 269static int
 270pipe_ioctl(struct inode *pino, struct file *filp,
 271           unsigned int cmd, unsigned long arg)
 272{
 273        switch (cmd) {
 274                case FIONREAD:
 275                        return put_user(PIPE_LEN(*pino), (int *)arg);
 276                default:
 277                        return -EINVAL;
 278        }
 279}
 280
 281/* No kernel lock held - fine */
 282static unsigned int
 283pipe_poll(struct file *filp, poll_table *wait)
 284{
 285        unsigned int mask;
 286        struct inode *inode = filp->f_dentry->d_inode;
 287
 288        poll_wait(filp, PIPE_WAIT(*inode), wait);
 289
 290        /* Reading only -- no need for acquiring the semaphore.  */
 291        mask = POLLIN | POLLRDNORM;
 292        if (PIPE_EMPTY(*inode))
 293                mask = POLLOUT | POLLWRNORM;
 294        if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
 295                mask |= POLLHUP;
 296        if (!PIPE_READERS(*inode))
 297                mask |= POLLERR;
 298
 299        return mask;
 300}
 301
 302/* FIXME: most Unices do not set POLLERR for fifos */
 303#define fifo_poll pipe_poll
 304
 305static int
 306pipe_release(struct inode *inode, int decr, int decw)
 307{
 308        down(PIPE_SEM(*inode));
 309        PIPE_READERS(*inode) -= decr;
 310        PIPE_WRITERS(*inode) -= decw;
 311        if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
 312                struct pipe_inode_info *info = inode->i_pipe;
 313                inode->i_pipe = NULL;
 314                free_page((unsigned long) info->base);
 315                kfree(info);
 316        } else {
 317                wake_up_interruptible(PIPE_WAIT(*inode));
 318                kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
 319                kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
 320        }
 321        up(PIPE_SEM(*inode));
 322
 323        return 0;
 324}
 325
 326static int
 327pipe_read_fasync(int fd, struct file *filp, int on)
 328{
 329        struct inode *inode = filp->f_dentry->d_inode;
 330        int retval;
 331
 332        down(PIPE_SEM(*inode));
 333        retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
 334        up(PIPE_SEM(*inode));
 335
 336        if (retval < 0)
 337                return retval;
 338
 339        return 0;
 340}
 341
 342
 343static int
 344pipe_write_fasync(int fd, struct file *filp, int on)
 345{
 346        struct inode *inode = filp->f_dentry->d_inode;
 347        int retval;
 348
 349        down(PIPE_SEM(*inode));
 350        retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
 351        up(PIPE_SEM(*inode));
 352
 353        if (retval < 0)
 354                return retval;
 355
 356        return 0;
 357}
 358
 359
 360static int
 361pipe_rdwr_fasync(int fd, struct file *filp, int on)
 362{
 363        struct inode *inode = filp->f_dentry->d_inode;
 364        int retval;
 365
 366        down(PIPE_SEM(*inode));
 367
 368        retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
 369
 370        if (retval >= 0)
 371                retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
 372
 373        up(PIPE_SEM(*inode));
 374
 375        if (retval < 0)
 376                return retval;
 377
 378        return 0;
 379}
 380
 381
 382static int
 383pipe_read_release(struct inode *inode, struct file *filp)
 384{
 385        pipe_read_fasync(-1, filp, 0);
 386        return pipe_release(inode, 1, 0);
 387}
 388
 389static int
 390pipe_write_release(struct inode *inode, struct file *filp)
 391{
 392        pipe_write_fasync(-1, filp, 0);
 393        return pipe_release(inode, 0, 1);
 394}
 395
 396static int
 397pipe_rdwr_release(struct inode *inode, struct file *filp)
 398{
 399        int decr, decw;
 400
 401        pipe_rdwr_fasync(-1, filp, 0);
 402        decr = (filp->f_mode & FMODE_READ) != 0;
 403        decw = (filp->f_mode & FMODE_WRITE) != 0;
 404        return pipe_release(inode, decr, decw);
 405}
 406
 407static int
 408pipe_read_open(struct inode *inode, struct file *filp)
 409{
 410        /* We could have perhaps used atomic_t, but this and friends
 411           below are the only places.  So it doesn't seem worthwhile.  */
 412        down(PIPE_SEM(*inode));
 413        PIPE_READERS(*inode)++;
 414        up(PIPE_SEM(*inode));
 415
 416        return 0;
 417}
 418
 419static int
 420pipe_write_open(struct inode *inode, struct file *filp)
 421{
 422        down(PIPE_SEM(*inode));
 423        PIPE_WRITERS(*inode)++;
 424        up(PIPE_SEM(*inode));
 425
 426        return 0;
 427}
 428
 429static int
 430pipe_rdwr_open(struct inode *inode, struct file *filp)
 431{
 432        down(PIPE_SEM(*inode));
 433        if (filp->f_mode & FMODE_READ)
 434                PIPE_READERS(*inode)++;
 435        if (filp->f_mode & FMODE_WRITE)
 436                PIPE_WRITERS(*inode)++;
 437        up(PIPE_SEM(*inode));
 438
 439        return 0;
 440}
 441
 442/*
 443 * The file_operations structs are not static because they
 444 * are also used in linux/fs/fifo.c to do operations on FIFOs.
 445 */
 446struct file_operations read_fifo_fops = {
 447        .llseek         = no_llseek,
 448        .read           = pipe_read,
 449        .write          = bad_pipe_w,
 450        .poll           = fifo_poll,
 451        .ioctl          = pipe_ioctl,
 452        .open           = pipe_read_open,
 453        .release        = pipe_read_release,
 454        .fasync         = pipe_read_fasync,
 455};
 456
 457struct file_operations write_fifo_fops = {
 458        .llseek         = no_llseek,
 459        .read           = bad_pipe_r,
 460        .write          = pipe_write,
 461        .poll           = fifo_poll,
 462        .ioctl          = pipe_ioctl,
 463        .open           = pipe_write_open,
 464        .release        = pipe_write_release,
 465        .fasync         = pipe_write_fasync,
 466};
 467
 468struct file_operations rdwr_fifo_fops = {
 469        .llseek         = no_llseek,
 470        .read           = pipe_read,
 471        .write          = pipe_write,
 472        .poll           = fifo_poll,
 473        .ioctl          = pipe_ioctl,
 474        .open           = pipe_rdwr_open,
 475        .release        = pipe_rdwr_release,
 476        .fasync         = pipe_rdwr_fasync,
 477};
 478
 479struct file_operations read_pipe_fops = {
 480        .llseek         = no_llseek,
 481        .read           = pipe_read,
 482        .write          = bad_pipe_w,
 483        .poll           = pipe_poll,
 484        .ioctl          = pipe_ioctl,
 485        .open           = pipe_read_open,
 486        .release        = pipe_read_release,
 487        .fasync         = pipe_read_fasync,
 488};
 489
 490struct file_operations write_pipe_fops = {
 491        .llseek         = no_llseek,
 492        .read           = bad_pipe_r,
 493        .write          = pipe_write,
 494        .poll           = pipe_poll,
 495        .ioctl          = pipe_ioctl,
 496        .open           = pipe_write_open,
 497        .release        = pipe_write_release,
 498        .fasync         = pipe_write_fasync,
 499};
 500
 501struct file_operations rdwr_pipe_fops = {
 502        .llseek         = no_llseek,
 503        .read           = pipe_read,
 504        .write          = pipe_write,
 505        .poll           = pipe_poll,
 506        .ioctl          = pipe_ioctl,
 507        .open           = pipe_rdwr_open,
 508        .release        = pipe_rdwr_release,
 509        .fasync         = pipe_rdwr_fasync,
 510};
 511
 512struct inode* pipe_new(struct inode* inode)
 513{
 514        unsigned long page;
 515
 516        page = __get_free_page(GFP_USER);
 517        if (!page)
 518                return NULL;
 519
 520        inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 521        if (!inode->i_pipe)
 522                goto fail_page;
 523
 524        init_waitqueue_head(PIPE_WAIT(*inode));
 525        PIPE_BASE(*inode) = (char*) page;
 526        PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
 527        PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
 528        PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;
 529        PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
 530        *PIPE_FASYNC_READERS(*inode) = *PIPE_FASYNC_WRITERS(*inode) = NULL;
 531
 532        return inode;
 533fail_page:
 534        free_page(page);
 535        return NULL;
 536}
 537
 538static struct vfsmount *pipe_mnt;
 539static int pipefs_delete_dentry(struct dentry *dentry)
 540{
 541        return 1;
 542}
 543static struct dentry_operations pipefs_dentry_operations = {
 544        .d_delete       = pipefs_delete_dentry,
 545};
 546
 547static struct inode * get_pipe_inode(void)
 548{
 549        struct inode *inode = new_inode(pipe_mnt->mnt_sb);
 550
 551        if (!inode)
 552                goto fail_inode;
 553
 554        if(!pipe_new(inode))
 555                goto fail_iput;
 556        PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
 557        inode->i_fop = &rdwr_pipe_fops;
 558
 559        /*
 560         * Mark the inode dirty from the very beginning,
 561         * that way it will never be moved to the dirty
 562         * list because "mark_inode_dirty()" will think
 563         * that it already _is_ on the dirty list.
 564         */
 565        inode->i_state = I_DIRTY;
 566        inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
 567        inode->i_uid = current->fsuid;
 568        inode->i_gid = current->fsgid;
 569        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 570        inode->i_blksize = PAGE_SIZE;
 571        return inode;
 572
 573fail_iput:
 574        iput(inode);
 575fail_inode:
 576        return NULL;
 577}
 578
 579int do_pipe(int *fd)
 580{
 581        struct qstr this;
 582        char name[32];
 583        struct dentry *dentry;
 584        struct inode * inode;
 585        struct file *f1, *f2;
 586        int error;
 587        int i,j;
 588
 589        error = -ENFILE;
 590        f1 = get_empty_filp();
 591        if (!f1)
 592                goto no_files;
 593
 594        f2 = get_empty_filp();
 595        if (!f2)
 596                goto close_f1;
 597
 598        inode = get_pipe_inode();
 599        if (!inode)
 600                goto close_f12;
 601
 602        error = get_unused_fd();
 603        if (error < 0)
 604                goto close_f12_inode;
 605        i = error;
 606
 607        error = get_unused_fd();
 608        if (error < 0)
 609                goto close_f12_inode_i;
 610        j = error;
 611
 612        error = -ENOMEM;
 613        sprintf(name, "[%lu]", inode->i_ino);
 614        this.name = name;
 615        this.len = strlen(name);
 616        this.hash = inode->i_ino; /* will go */
 617        dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
 618        if (!dentry)
 619                goto close_f12_inode_i_j;
 620        dentry->d_op = &pipefs_dentry_operations;
 621        d_add(dentry, inode);
 622        f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
 623        f1->f_dentry = f2->f_dentry = dget(dentry);
 624
 625        /* read file */
 626        f1->f_pos = f2->f_pos = 0;
 627        f1->f_flags = O_RDONLY;
 628        f1->f_op = &read_pipe_fops;
 629        f1->f_mode = 1;
 630        f1->f_version = 0;
 631
 632        /* write file */
 633        f2->f_flags = O_WRONLY;
 634        f2->f_op = &write_pipe_fops;
 635        f2->f_mode = 2;
 636        f2->f_version = 0;
 637
 638        fd_install(i, f1);
 639        fd_install(j, f2);
 640        fd[0] = i;
 641        fd[1] = j;
 642        return 0;
 643
 644close_f12_inode_i_j:
 645        put_unused_fd(j);
 646close_f12_inode_i:
 647        put_unused_fd(i);
 648close_f12_inode:
 649        free_page((unsigned long) PIPE_BASE(*inode));
 650        kfree(inode->i_pipe);
 651        inode->i_pipe = NULL;
 652        iput(inode);
 653close_f12:
 654        put_filp(f2);
 655close_f1:
 656        put_filp(f1);
 657no_files:
 658        return error;   
 659}
 660
 661/*
 662 * pipefs should _never_ be mounted by userland - too much of security hassle,
 663 * no real gain from having the whole whorehouse mounted. So we don't need
 664 * any operations on the root directory. However, we need a non-trivial
 665 * d_name - pipe: will go nicely and kill the special-casing in procfs.
 666 */
 667
 668static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
 669        int flags, char *dev_name, void *data)
 670{
 671        return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
 672}
 673
 674static struct file_system_type pipe_fs_type = {
 675        .name           = "pipefs",
 676        .get_sb         = pipefs_get_sb,
 677        .kill_sb        = kill_anon_super,
 678};
 679
 680static int __init init_pipe_fs(void)
 681{
 682        int err = register_filesystem(&pipe_fs_type);
 683        if (!err) {
 684                pipe_mnt = kern_mount(&pipe_fs_type);
 685                err = PTR_ERR(pipe_mnt);
 686                if (IS_ERR(pipe_mnt))
 687                        unregister_filesystem(&pipe_fs_type);
 688                else
 689                        err = 0;
 690        }
 691        return err;
 692}
 693
 694static void __exit exit_pipe_fs(void)
 695{
 696        unregister_filesystem(&pipe_fs_type);
 697        mntput(pipe_mnt);
 698}
 699
 700module_init(init_pipe_fs)
 701module_exit(exit_pipe_fs)
 702
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.