linux-bk/ipc/mqueue.c
<<
>>
Prefs
   1/*
   2 * POSIX message queues filesystem for Linux.
   3 *
   4 * Copyright (C) 2003,2004  Krzysztof Benedyczak    (golbi@mat.uni.torun.pl)
   5 *                          Michal Wronski          (wrona@mat.uni.torun.pl)
   6 *
   7 * Spinlocks:               Mohamed Abbas           (abbas.mohamed@intel.com)
   8 * Lockless receive & send, fd based notify:
   9 *                          Manfred Spraul          (manfred@colorfullife.com)
  10 *
  11 * This file is released under the GPL.
  12 */
  13
  14#include <linux/init.h>
  15#include <linux/pagemap.h>
  16#include <linux/file.h>
  17#include <linux/mount.h>
  18#include <linux/namei.h>
  19#include <linux/sysctl.h>
  20#include <linux/poll.h>
  21#include <linux/mqueue.h>
  22#include <linux/msg.h>
  23#include <linux/skbuff.h>
  24#include <linux/netlink.h>
  25#include <linux/syscalls.h>
  26#include <net/sock.h>
  27#include "util.h"
  28
  29#define MQUEUE_MAGIC    0x19800202
  30#define DIRENT_SIZE     20
  31#define FILENT_SIZE     80
  32
  33#define SEND            0
  34#define RECV            1
  35
  36#define STATE_NONE      0
  37#define STATE_PENDING   1
  38#define STATE_READY     2
  39
  40/* used by sysctl */
  41#define FS_MQUEUE       1
  42#define CTL_QUEUESMAX   2
  43#define CTL_MSGMAX      3
  44#define CTL_MSGSIZEMAX  4
  45
  46/* default values */
  47#define DFLT_QUEUESMAX  256     /* max number of message queues */
  48#define DFLT_MSGMAX     10      /* max number of messages in each queue */
  49#define HARD_MSGMAX     (131072/sizeof(void*))
  50#define DFLT_MSGSIZEMAX 8192    /* max message size */
  51
  52#define NOTIFY_COOKIE_LEN       32
  53
  54struct ext_wait_queue {         /* queue of sleeping tasks */
  55        struct task_struct *task;
  56        struct list_head list;
  57        struct msg_msg *msg;    /* ptr of loaded message */
  58        int state;              /* one of STATE_* values */
  59};
  60
  61struct mqueue_inode_info {
  62        spinlock_t lock;
  63        struct inode vfs_inode;
  64        wait_queue_head_t wait_q;
  65
  66        struct msg_msg **messages;
  67        struct mq_attr attr;
  68
  69        struct sigevent notify;
  70        pid_t notify_owner;
  71        struct user_struct *user;       /* user who created, for accouting */
  72        struct sock *notify_sock;
  73        struct sk_buff *notify_cookie;
  74
  75        /* for tasks waiting for free space and messages, respectively */
  76        struct ext_wait_queue e_wait_q[2];
  77
  78        unsigned long qsize; /* size of queue in memory (sum of all msgs) */
  79};
  80
  81static struct inode_operations mqueue_dir_inode_operations;
  82static struct file_operations mqueue_file_operations;
  83static struct super_operations mqueue_super_ops;
  84static void remove_notification(struct mqueue_inode_info *info);
  85
  86static spinlock_t mq_lock;
  87static kmem_cache_t *mqueue_inode_cachep;
  88static struct vfsmount *mqueue_mnt;
  89
  90static unsigned int queues_count;
  91static unsigned int queues_max  = DFLT_QUEUESMAX;
  92static unsigned int msg_max     = DFLT_MSGMAX;
  93static unsigned int msgsize_max = DFLT_MSGSIZEMAX;
  94
  95static struct ctl_table_header * mq_sysctl_table;
  96
  97static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
  98{
  99        return container_of(inode, struct mqueue_inode_info, vfs_inode);
 100}
 101
 102static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
 103                                                        struct mq_attr *attr)
 104{
 105        struct inode *inode;
 106
 107        inode = new_inode(sb);
 108        if (inode) {
 109                inode->i_mode = mode;
 110                inode->i_uid = current->fsuid;
 111                inode->i_gid = current->fsgid;
 112                inode->i_blksize = PAGE_CACHE_SIZE;
 113                inode->i_blocks = 0;
 114                inode->i_mtime = inode->i_ctime = inode->i_atime =
 115                                CURRENT_TIME;
 116
 117                if (S_ISREG(mode)) {
 118                        struct mqueue_inode_info *info;
 119                        struct task_struct *p = current;
 120                        struct user_struct *u = p->user;
 121                        unsigned long mq_bytes, mq_msg_tblsz;
 122
 123                        inode->i_fop = &mqueue_file_operations;
 124                        inode->i_size = FILENT_SIZE;
 125                        /* mqueue specific info */
 126                        info = MQUEUE_I(inode);
 127                        spin_lock_init(&info->lock);
 128                        init_waitqueue_head(&info->wait_q);
 129                        INIT_LIST_HEAD(&info->e_wait_q[0].list);
 130                        INIT_LIST_HEAD(&info->e_wait_q[1].list);
 131                        info->messages = NULL;
 132                        info->notify_owner = 0;
 133                        info->qsize = 0;
 134                        info->user = NULL;      /* set when all is ok */
 135                        memset(&info->attr, 0, sizeof(info->attr));
 136                        info->attr.mq_maxmsg = DFLT_MSGMAX;
 137                        info->attr.mq_msgsize = DFLT_MSGSIZEMAX;
 138                        if (attr) {
 139                                info->attr.mq_maxmsg = attr->mq_maxmsg;
 140                                info->attr.mq_msgsize = attr->mq_msgsize;
 141                        }
 142                        mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
 143                        mq_bytes = (mq_msg_tblsz +
 144                                (info->attr.mq_maxmsg * info->attr.mq_msgsize));
 145
 146                        spin_lock(&mq_lock);
 147                        if (u->mq_bytes + mq_bytes < u->mq_bytes ||
 148                            u->mq_bytes + mq_bytes >
 149                            p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
 150                                spin_unlock(&mq_lock);
 151                                goto out_inode;
 152                        }
 153                        u->mq_bytes += mq_bytes;
 154                        spin_unlock(&mq_lock);
 155
 156                        info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
 157                        if (!info->messages) {
 158                                spin_lock(&mq_lock);
 159                                u->mq_bytes -= mq_bytes;
 160                                spin_unlock(&mq_lock);
 161                                goto out_inode;
 162                        }
 163                        /* all is ok */
 164                        info->user = get_uid(u);
 165                } else if (S_ISDIR(mode)) {
 166                        inode->i_nlink++;
 167                        /* Some things misbehave if size == 0 on a directory */
 168                        inode->i_size = 2 * DIRENT_SIZE;
 169                        inode->i_op = &mqueue_dir_inode_operations;
 170                        inode->i_fop = &simple_dir_operations;
 171                }
 172        }
 173        return inode;
 174out_inode:
 175        make_bad_inode(inode);
 176        iput(inode);
 177        return NULL;
 178}
 179
 180static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
 181{
 182        struct inode *inode;
 183
 184        sb->s_blocksize = PAGE_CACHE_SIZE;
 185        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 186        sb->s_magic = MQUEUE_MAGIC;
 187        sb->s_op = &mqueue_super_ops;
 188
 189        inode = mqueue_get_inode(sb, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL);
 190        if (!inode)
 191                return -ENOMEM;
 192
 193        sb->s_root = d_alloc_root(inode);
 194        if (!sb->s_root) {
 195                iput(inode);
 196                return -ENOMEM;
 197        }
 198
 199        return 0;
 200}
 201
 202static struct super_block *mqueue_get_sb(struct file_system_type *fs_type,
 203                                         int flags, const char *dev_name,
 204                                         void *data)
 205{
 206        return get_sb_single(fs_type, flags, data, mqueue_fill_super);
 207}
 208
 209static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
 210{
 211        struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo;
 212
 213        if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
 214                SLAB_CTOR_CONSTRUCTOR)
 215                inode_init_once(&p->vfs_inode);
 216}
 217
 218static struct inode *mqueue_alloc_inode(struct super_block *sb)
 219{
 220        struct mqueue_inode_info *ei;
 221
 222        ei = kmem_cache_alloc(mqueue_inode_cachep, SLAB_KERNEL);
 223        if (!ei)
 224                return NULL;
 225        return &ei->vfs_inode;
 226}
 227
 228static void mqueue_destroy_inode(struct inode *inode)
 229{
 230        kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
 231}
 232
 233static void mqueue_delete_inode(struct inode *inode)
 234{
 235        struct mqueue_inode_info *info;
 236        struct user_struct *user;
 237        unsigned long mq_bytes;
 238        int i;
 239
 240        if (S_ISDIR(inode->i_mode)) {
 241                clear_inode(inode);
 242                return;
 243        }
 244        info = MQUEUE_I(inode);
 245        spin_lock(&info->lock);
 246        for (i = 0; i < info->attr.mq_curmsgs; i++)
 247                free_msg(info->messages[i]);
 248        kfree(info->messages);
 249        spin_unlock(&info->lock);
 250
 251        clear_inode(inode);
 252
 253        mq_bytes = (info->attr.mq_maxmsg * sizeof(struct msg_msg *) +
 254                   (info->attr.mq_maxmsg * info->attr.mq_msgsize));
 255        user = info->user;
 256        if (user) {
 257                spin_lock(&mq_lock);
 258                user->mq_bytes -= mq_bytes;
 259                queues_count--;
 260                spin_unlock(&mq_lock);
 261                free_uid(user);
 262        }
 263}
 264
 265static int mqueue_create(struct inode *dir, struct dentry *dentry,
 266                                int mode, struct nameidata *nd)
 267{
 268        struct inode *inode;
 269        struct mq_attr *attr = dentry->d_fsdata;
 270        int error;
 271
 272        spin_lock(&mq_lock);
 273        if (queues_count >= queues_max && !capable(CAP_SYS_RESOURCE)) {
 274                error = -ENOSPC;
 275                goto out_lock;
 276        }
 277        queues_count++;
 278        spin_unlock(&mq_lock);
 279
 280        inode = mqueue_get_inode(dir->i_sb, mode, attr);
 281        if (!inode) {
 282                error = -ENOMEM;
 283                spin_lock(&mq_lock);
 284                queues_count--;
 285                goto out_lock;
 286        }
 287
 288        dir->i_size += DIRENT_SIZE;
 289        dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
 290
 291        d_instantiate(dentry, inode);
 292        dget(dentry);
 293        return 0;
 294out_lock:
 295        spin_unlock(&mq_lock);
 296        return error;
 297}
 298
 299static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
 300{
 301        struct inode *inode = dentry->d_inode;
 302
 303        dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
 304        dir->i_size -= DIRENT_SIZE;
 305        inode->i_nlink--;
 306        dput(dentry);
 307        return 0;
 308}
 309
 310/*
 311*       This is routine for system read from queue file.
 312*       To avoid mess with doing here some sort of mq_receive we allow
 313*       to read only queue size & notification info (the only values
 314*       that are interesting from user point of view and aren't accessible
 315*       through std routines)
 316*/
 317static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
 318                                size_t count, loff_t * off)
 319{
 320        struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
 321        char buffer[FILENT_SIZE];
 322        size_t slen;
 323        loff_t o;
 324
 325        if (!count)
 326                return 0;
 327
 328        spin_lock(&info->lock);
 329        snprintf(buffer, sizeof(buffer),
 330                        "QSIZE:%-10lu NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n",
 331                        info->qsize,
 332                        info->notify_owner ? info->notify.sigev_notify : 0,
 333                        (info->notify_owner &&
 334                         info->notify.sigev_notify == SIGEV_SIGNAL) ?
 335                                info->notify.sigev_signo : 0,
 336                        info->notify_owner);
 337        spin_unlock(&info->lock);
 338        buffer[sizeof(buffer)-1] = '\0';
 339        slen = strlen(buffer)+1;
 340
 341        o = *off;
 342        if (o > slen)
 343                return 0;
 344
 345        if (o + count > slen)
 346                count = slen - o;
 347
 348        if (copy_to_user(u_data, buffer + o, count))
 349                return -EFAULT;
 350
 351        *off = o + count;
 352        filp->f_dentry->d_inode->i_atime = filp->f_dentry->d_inode->i_ctime = CURRENT_TIME;
 353        return count;
 354}
 355
 356static int mqueue_flush_file(struct file *filp)
 357{
 358        struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
 359
 360        spin_lock(&info->lock);
 361        if (current->tgid == info->notify_owner)
 362                remove_notification(info);
 363
 364        spin_unlock(&info->lock);
 365        return 0;
 366}
 367
 368static unsigned int mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab)
 369{
 370        struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
 371        int retval = 0;
 372
 373        poll_wait(filp, &info->wait_q, poll_tab);
 374
 375        spin_lock(&info->lock);
 376        if (info->attr.mq_curmsgs)
 377                retval = POLLIN | POLLRDNORM;
 378
 379        if (info->attr.mq_curmsgs < info->attr.mq_maxmsg)
 380                retval |= POLLOUT | POLLWRNORM;
 381        spin_unlock(&info->lock);
 382
 383        return retval;
 384}
 385
 386/* Adds current to info->e_wait_q[sr] before element with smaller prio */
 387static void wq_add(struct mqueue_inode_info *info, int sr,
 388                        struct ext_wait_queue *ewp)
 389{
 390        struct ext_wait_queue *walk;
 391
 392        ewp->task = current;
 393
 394        list_for_each_entry(walk, &info->e_wait_q[sr].list, list) {
 395                if (walk->task->static_prio <= current->static_prio) {
 396                        list_add_tail(&ewp->list, &walk->list);
 397                        return;
 398                }
 399        }
 400        list_add_tail(&ewp->list, &info->e_wait_q[sr].list);
 401}
 402
 403/*
 404 * Puts current task to sleep. Caller must hold queue lock. After return
 405 * lock isn't held.
 406 * sr: SEND or RECV
 407 */
 408static int wq_sleep(struct mqueue_inode_info *info, int sr,
 409                        long timeout, struct ext_wait_queue *ewp)
 410{
 411        int retval;
 412        signed long time;
 413
 414        wq_add(info, sr, ewp);
 415
 416        for (;;) {
 417                set_current_state(TASK_INTERRUPTIBLE);
 418
 419                spin_unlock(&info->lock);
 420                time = schedule_timeout(timeout);
 421
 422                while (ewp->state == STATE_PENDING)
 423                        cpu_relax();
 424
 425                if (ewp->state == STATE_READY) {
 426                        retval = 0;
 427                        goto out;
 428                }
 429                spin_lock(&info->lock);
 430                if (ewp->state == STATE_READY) {
 431                        retval = 0;
 432                        goto out_unlock;
 433                }
 434                if (signal_pending(current)) {
 435                        retval = -ERESTARTSYS;
 436                        break;
 437                }
 438                if (time == 0) {
 439                        retval = -ETIMEDOUT;
 440                        break;
 441                }
 442        }
 443        list_del(&ewp->list);
 444out_unlock:
 445        spin_unlock(&info->lock);
 446out:
 447        return retval;
 448}
 449
 450/*
 451 * Returns waiting task that should be serviced first or NULL if none exists
 452 */
 453static struct ext_wait_queue *wq_get_first_waiter(
 454                struct mqueue_inode_info *info, int sr)
 455{
 456        struct list_head *ptr;
 457
 458        ptr = info->e_wait_q[sr].list.prev;
 459        if (ptr == &info->e_wait_q[sr].list)
 460                return NULL;
 461        return list_entry(ptr, struct ext_wait_queue, list);
 462}
 463
 464/* Auxiliary functions to manipulate messages' list */
 465static void msg_insert(struct msg_msg *ptr, struct mqueue_inode_info *info)
 466{
 467        int k;
 468
 469        k = info->attr.mq_curmsgs - 1;
 470        while (k >= 0 && info->messages[k]->m_type >= ptr->m_type) {
 471                info->messages[k + 1] = info->messages[k];
 472                k--;
 473        }
 474        info->attr.mq_curmsgs++;
 475        info->qsize += ptr->m_ts;
 476        info->messages[k + 1] = ptr;
 477}
 478
 479static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
 480{
 481        info->qsize -= info->messages[--info->attr.mq_curmsgs]->m_ts;
 482        return info->messages[info->attr.mq_curmsgs];
 483}
 484
 485static inline void set_cookie(struct sk_buff *skb, char code)
 486{
 487        ((char*)skb->data)[NOTIFY_COOKIE_LEN-1] = code;
 488}
 489
 490/*
 491 * The next function is only to split too long sys_mq_timedsend
 492 */
 493static void __do_notify(struct mqueue_inode_info *info)
 494{
 495        /* notification
 496         * invoked when there is registered process and there isn't process
 497         * waiting synchronously for message AND state of queue changed from
 498         * empty to not empty. Here we are sure that no one is waiting
 499         * synchronously. */
 500        if (info->notify_owner &&
 501            info->attr.mq_curmsgs == 1) {
 502                struct siginfo sig_i;
 503                switch (info->notify.sigev_notify) {
 504                case SIGEV_NONE:
 505                        break;
 506                case SIGEV_SIGNAL:
 507                        /* sends signal */
 508
 509                        sig_i.si_signo = info->notify.sigev_signo;
 510                        sig_i.si_errno = 0;
 511                        sig_i.si_code = SI_MESGQ;
 512                        sig_i.si_value = info->notify.sigev_value;
 513                        sig_i.si_pid = current->tgid;
 514                        sig_i.si_uid = current->uid;
 515
 516                        kill_proc_info(info->notify.sigev_signo,
 517                                       &sig_i, info->notify_owner);
 518                        break;
 519                case SIGEV_THREAD:
 520                        set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
 521                        netlink_sendskb(info->notify_sock,
 522                                        info->notify_cookie, 0);
 523                        break;
 524                }
 525                /* after notification unregisters process */
 526                info->notify_owner = 0;
 527        }
 528        wake_up(&info->wait_q);
 529}
 530
 531static long prepare_timeout(const struct timespec __user *u_arg)
 532{
 533        struct timespec ts, nowts;
 534        long timeout;
 535
 536        if (u_arg) {
 537                if (unlikely(copy_from_user(&ts, u_arg,
 538                                        sizeof(struct timespec))))
 539                        return -EFAULT;
 540
 541                if (unlikely(ts.tv_nsec < 0 || ts.tv_sec < 0
 542                        || ts.tv_nsec >= NSEC_PER_SEC))
 543                        return -EINVAL;
 544                nowts = CURRENT_TIME;
 545                /* first subtract as jiffies can't be too big */
 546                ts.tv_sec -= nowts.tv_sec;
 547                if (ts.tv_nsec < nowts.tv_nsec) {
 548                        ts.tv_nsec += NSEC_PER_SEC;
 549                        ts.tv_sec--;
 550                }
 551                ts.tv_nsec -= nowts.tv_nsec;
 552                if (ts.tv_sec < 0)
 553                        return 0;
 554
 555                timeout = timespec_to_jiffies(&ts) + 1;
 556        } else
 557                return MAX_SCHEDULE_TIMEOUT;
 558
 559        return timeout;
 560}
 561
 562static void remove_notification(struct mqueue_inode_info *info)
 563{
 564        if (info->notify_owner != 0 &&
 565            info->notify.sigev_notify == SIGEV_THREAD) {
 566                set_cookie(info->notify_cookie, NOTIFY_REMOVED);
 567                netlink_sendskb(info->notify_sock, info->notify_cookie, 0);
 568        }
 569        info->notify_owner = 0;
 570}
 571
 572static int mq_attr_ok(struct mq_attr *attr)
 573{
 574        if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
 575                return 0;
 576        if (capable(CAP_SYS_RESOURCE)) {
 577                if (attr->mq_maxmsg > HARD_MSGMAX)
 578                        return 0;
 579        } else {
 580                if (attr->mq_maxmsg > msg_max ||
 581                                attr->mq_msgsize > msgsize_max)
 582                        return 0;
 583        }
 584        /* check for overflow */
 585        if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
 586                return 0;
 587        if ((unsigned long)(attr->mq_maxmsg * attr->mq_msgsize) +
 588            (attr->mq_maxmsg * sizeof (struct msg_msg *)) <
 589            (unsigned long)(attr->mq_maxmsg * attr->mq_msgsize))
 590                return 0;
 591        return 1;
 592}
 593
 594/*
 595 * Invoked when creating a new queue via sys_mq_open
 596 */
 597static struct file *do_create(struct dentry *dir, struct dentry *dentry,
 598                        int oflag, mode_t mode, struct mq_attr __user *u_attr)
 599{
 600        struct file *filp;
 601        struct mq_attr attr;
 602        int ret;
 603
 604        if (u_attr != NULL) {
 605                if (copy_from_user(&attr, u_attr, sizeof(attr)))
 606                        return ERR_PTR(-EFAULT);
 607                if (!mq_attr_ok(&attr))
 608                        return ERR_PTR(-EINVAL);
 609                /* store for use during create */
 610                dentry->d_fsdata = &attr;
 611        }
 612
 613        ret = vfs_create(dir->d_inode, dentry, mode, NULL);
 614        dentry->d_fsdata = NULL;
 615        if (ret)
 616                return ERR_PTR(ret);
 617
 618        filp = dentry_open(dentry, mqueue_mnt, oflag);
 619        if (!IS_ERR(filp))
 620                dget(dentry);
 621
 622        return filp;
 623}
 624
 625/* Opens existing queue */
 626static struct file *do_open(struct dentry *dentry, int oflag)
 627{
 628static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
 629                                        MAY_READ | MAY_WRITE };
 630        struct file *filp;
 631
 632        if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
 633                return ERR_PTR(-EINVAL);
 634
 635        if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL))
 636                return ERR_PTR(-EACCES);
 637
 638        filp = dentry_open(dentry, mqueue_mnt, oflag);
 639
 640        if (!IS_ERR(filp))
 641                dget(dentry);
 642
 643        return filp;
 644}
 645
 646asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
 647                                struct mq_attr __user *u_attr)
 648{
 649        struct dentry *dentry;
 650        struct file *filp;
 651        char *name;
 652        int fd, error;
 653
 654        if (IS_ERR(name = getname(u_name)))
 655                return PTR_ERR(name);
 656
 657        fd = get_unused_fd();
 658        if (fd < 0)
 659                goto out_putname;
 660
 661        down(&mqueue_mnt->mnt_root->d_inode->i_sem);
 662        dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
 663        if (IS_ERR(dentry)) {
 664                error = PTR_ERR(dentry);
 665                goto out_err;
 666        }
 667        mntget(mqueue_mnt);
 668
 669        if (oflag & O_CREAT) {
 670                if (dentry->d_inode) {  /* entry already exists */
 671                        filp = (oflag & O_EXCL) ? ERR_PTR(-EEXIST) :
 672                                        do_open(dentry, oflag);
 673                } else {
 674                        filp = do_create(mqueue_mnt->mnt_root, dentry,
 675                                                oflag, mode, u_attr);
 676                }
 677        } else
 678                filp = (dentry->d_inode) ? do_open(dentry, oflag) :
 679                                        ERR_PTR(-ENOENT);
 680
 681        dput(dentry);
 682
 683        if (IS_ERR(filp)) {
 684                error = PTR_ERR(filp);
 685                goto out_putfd;
 686        }
 687
 688        set_close_on_exec(fd, 1);
 689        fd_install(fd, filp);
 690        goto out_upsem;
 691
 692out_putfd:
 693        mntput(mqueue_mnt);
 694        put_unused_fd(fd);
 695out_err:
 696        fd = error;
 697out_upsem:
 698        up(&mqueue_mnt->mnt_root->d_inode->i_sem);
 699out_putname:
 700        putname(name);
 701        return fd;
 702}
 703
 704asmlinkage long sys_mq_unlink(const char __user *u_name)
 705{
 706        int err;
 707        char *name;
 708        struct dentry *dentry;
 709        struct inode *inode = NULL;
 710
 711        name = getname(u_name);
 712        if (IS_ERR(name))
 713                return PTR_ERR(name);
 714
 715        down(&mqueue_mnt->mnt_root->d_inode->i_sem);
 716        dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
 717        if (IS_ERR(dentry)) {
 718                err = PTR_ERR(dentry);
 719                goto out_unlock;
 720        }
 721
 722        if (!dentry->d_inode) {
 723                err = -ENOENT;
 724                goto out_err;
 725        }
 726
 727        inode = dentry->d_inode;
 728        if (inode)
 729                atomic_inc(&inode->i_count);
 730
 731        err = vfs_unlink(dentry->d_parent->d_inode, dentry);
 732out_err:
 733        dput(dentry);
 734
 735out_unlock:
 736        up(&mqueue_mnt->mnt_root->d_inode->i_sem);
 737        putname(name);
 738        if (inode)
 739                iput(inode);
 740
 741        return err;
 742}
 743
 744/* Pipelined send and receive functions.
 745 *
 746 * If a receiver finds no waiting message, then it registers itself in the
 747 * list of waiting receivers. A sender checks that list before adding the new
 748 * message into the message array. If there is a waiting receiver, then it
 749 * bypasses the message array and directly hands the message over to the
 750 * receiver.
 751 * The receiver accepts the message and returns without grabbing the queue
 752 * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
 753 * are necessary. The same algorithm is used for sysv semaphores, see
 754 * ipc/sem.c fore more details.
 755 *
 756 * The same algorithm is used for senders.
 757 */
 758
 759/* pipelined_send() - send a message directly to the task waiting in
 760 * sys_mq_timedreceive() (without inserting message into a queue).
 761 */
 762static inline void pipelined_send(struct mqueue_inode_info *info,
 763                                  struct msg_msg *message,
 764                                  struct ext_wait_queue *receiver)
 765{
 766        receiver->msg = message;
 767        list_del(&receiver->list);
 768        receiver->state = STATE_PENDING;
 769        wake_up_process(receiver->task);
 770        wmb();
 771        receiver->state = STATE_READY;
 772}
 773
 774/* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
 775 * gets its message and put to the queue (we have one free place for sure). */
 776static inline void pipelined_receive(struct mqueue_inode_info *info)
 777{
 778        struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND);
 779
 780        if (!sender) {
 781                /* for poll */
 782                wake_up_interruptible(&info->wait_q);
 783                return;
 784        }
 785        msg_insert(sender->msg, info);
 786        list_del(&sender->list);
 787        sender->state = STATE_PENDING;
 788        wake_up_process(sender->task);
 789        wmb();
 790        sender->state = STATE_READY;
 791}
 792
 793asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
 794        size_t msg_len, unsigned int msg_prio,
 795        const struct timespec __user *u_abs_timeout)
 796{
 797        struct file *filp;
 798        struct inode *inode;
 799        struct ext_wait_queue wait;
 800        struct ext_wait_queue *receiver;
 801        struct msg_msg *msg_ptr;
 802        struct mqueue_inode_info *info;
 803        long timeout;
 804        int ret;
 805
 806        if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
 807                return -EINVAL;
 808
 809        timeout = prepare_timeout(u_abs_timeout);
 810
 811        ret = -EBADF;
 812        filp = fget(mqdes);
 813        if (unlikely(!filp))
 814                goto out;
 815
 816        inode = filp->f_dentry->d_inode;
 817        if (unlikely(filp->f_op != &mqueue_file_operations))
 818                goto out_fput;
 819        info = MQUEUE_I(inode);
 820
 821        if (unlikely(!(filp->f_mode & FMODE_WRITE)))
 822                goto out_fput;
 823
 824        if (unlikely(msg_len > info->attr.mq_msgsize)) {
 825                ret = -EMSGSIZE;
 826                goto out_fput;
 827        }
 828
 829        /* First try to allocate memory, before doing anything with
 830         * existing queues. */
 831        msg_ptr = load_msg(u_msg_ptr, msg_len);
 832        if (IS_ERR(msg_ptr)) {
 833                ret = PTR_ERR(msg_ptr);
 834                goto out_fput;
 835        }
 836        msg_ptr->m_ts = msg_len;
 837        msg_ptr->m_type = msg_prio;
 838
 839        spin_lock(&info->lock);
 840
 841        if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) {
 842                if (filp->f_flags & O_NONBLOCK) {
 843                        spin_unlock(&info->lock);
 844                        ret = -EAGAIN;
 845                } else if (unlikely(timeout < 0)) {
 846                        spin_unlock(&info->lock);
 847                        ret = timeout;
 848                } else {
 849                        wait.task = current;
 850                        wait.msg = (void *) msg_ptr;
 851                        wait.state = STATE_NONE;
 852                        ret = wq_sleep(info, SEND, timeout, &wait);
 853                }
 854                if (ret < 0)
 855                        free_msg(msg_ptr);
 856        } else {
 857                receiver = wq_get_first_waiter(info, RECV);
 858                if (receiver) {
 859                        pipelined_send(info, msg_ptr, receiver);
 860                } else {
 861                        /* adds message to the queue */
 862                        msg_insert(msg_ptr, info);
 863                        __do_notify(info);
 864                }
 865                inode->i_atime = inode->i_mtime = inode->i_ctime =
 866                                CURRENT_TIME;
 867                spin_unlock(&info->lock);
 868                ret = 0;
 869        }
 870out_fput:
 871        fput(filp);
 872out:
 873        return ret;
 874}
 875
 876asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
 877        size_t msg_len, unsigned int __user *u_msg_prio,
 878        const struct timespec __user *u_abs_timeout)
 879{
 880        long timeout;
 881        ssize_t ret;
 882        struct msg_msg *msg_ptr;
 883        struct file *filp;
 884        struct inode *inode;
 885        struct mqueue_inode_info *info;
 886        struct ext_wait_queue wait;
 887
 888        timeout = prepare_timeout(u_abs_timeout);
 889
 890        ret = -EBADF;
 891        filp = fget(mqdes);
 892        if (unlikely(!filp))
 893                goto out;
 894
 895        inode = filp->f_dentry->d_inode;
 896        if (unlikely(filp->f_op != &mqueue_file_operations))
 897                goto out_fput;
 898        info = MQUEUE_I(inode);
 899
 900        if (unlikely(!(filp->f_mode & FMODE_READ)))
 901                goto out_fput;
 902
 903        /* checks if buffer is big enough */
 904        if (unlikely(msg_len < info->attr.mq_msgsize)) {
 905                ret = -EMSGSIZE;
 906                goto out_fput;
 907        }
 908
 909        spin_lock(&info->lock);
 910        if (info->attr.mq_curmsgs == 0) {
 911                if (filp->f_flags & O_NONBLOCK) {
 912                        spin_unlock(&info->lock);
 913                        ret = -EAGAIN;
 914                        msg_ptr = NULL;
 915                } else if (unlikely(timeout < 0)) {
 916                        spin_unlock(&info->lock);
 917                        ret = timeout;
 918                        msg_ptr = NULL;
 919                } else {
 920                        wait.task = current;
 921                        wait.state = STATE_NONE;
 922                        ret = wq_sleep(info, RECV, timeout, &wait);
 923                        msg_ptr = wait.msg;
 924                }
 925        } else {
 926                msg_ptr = msg_get(info);
 927
 928                inode->i_atime = inode->i_mtime = inode->i_ctime =
 929                                CURRENT_TIME;
 930
 931                /* There is now free space in queue. */
 932                pipelined_receive(info);
 933                spin_unlock(&info->lock);
 934                ret = 0;
 935        }
 936        if (ret == 0) {
 937                ret = msg_ptr->m_ts;
 938
 939                if ((u_msg_prio && put_user(msg_ptr->m_type, u_msg_prio)) ||
 940                        store_msg(u_msg_ptr, msg_ptr, msg_ptr->m_ts)) {
 941                        ret = -EFAULT;
 942                }
 943                free_msg(msg_ptr);
 944        }
 945out_fput:
 946        fput(filp);
 947out:
 948        return ret;
 949}
 950
 951/*
 952 * Notes: the case when user wants us to deregister (with NULL as pointer)
 953 * and he isn't currently owner of notification, will be silently discarded.
 954 * It isn't explicitly defined in the POSIX.
 955 */
 956asmlinkage long sys_mq_notify(mqd_t mqdes,
 957                                const struct sigevent __user *u_notification)
 958{
 959        int ret;
 960        struct file *filp;
 961        struct sock *sock;
 962        struct inode *inode;
 963        struct sigevent notification;
 964        struct mqueue_inode_info *info;
 965        struct sk_buff *nc;
 966
 967        nc = NULL;
 968        sock = NULL;
 969        if (u_notification != NULL) {
 970                if (copy_from_user(&notification, u_notification,
 971                                        sizeof(struct sigevent)))
 972                        return -EFAULT;
 973
 974                if (unlikely(notification.sigev_notify != SIGEV_NONE &&
 975                             notification.sigev_notify != SIGEV_SIGNAL &&
 976                             notification.sigev_notify != SIGEV_THREAD))
 977                        return -EINVAL;
 978                if (notification.sigev_notify == SIGEV_SIGNAL &&
 979                        (notification.sigev_signo < 0 ||
 980                         notification.sigev_signo > _NSIG)) {
 981                        return -EINVAL;
 982                }
 983                if (notification.sigev_notify == SIGEV_THREAD) {
 984                        /* create the notify skb */
 985                        nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL);
 986                        ret = -ENOMEM;
 987                        if (!nc)
 988                                goto out;
 989                        ret = -EFAULT;
 990                        if (copy_from_user(nc->data,
 991                                        notification.sigev_value.sival_ptr,
 992                                        NOTIFY_COOKIE_LEN)) {
 993                                goto out;
 994                        }
 995
 996                        /* TODO: add a header? */
 997                        skb_put(nc, NOTIFY_COOKIE_LEN);
 998                        /* and attach it to the socket */
 999retry:
1000                        filp = fget(notification.sigev_signo);
1001                        ret = -EBADF;
1002                        if (!filp)
1003                                goto out;
1004                        sock = netlink_getsockbyfilp(filp);
1005                        fput(filp);
1006                        if (IS_ERR(sock)) {
1007                                ret = PTR_ERR(sock);
1008                                sock = NULL;
1009                                goto out;
1010                        }
1011
1012                        ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT);
1013                        if (ret == 1)
1014                                goto retry;
1015                        if (ret) {
1016                                sock = NULL;
1017                                nc = NULL;
1018                                goto out;
1019                        }
1020                }
1021        }
1022
1023        ret = -EBADF;
1024        filp = fget(mqdes);
1025        if (!filp)
1026                goto out;
1027
1028        inode = filp->f_dentry->d_inode;
1029        if (unlikely(filp->f_op != &mqueue_file_operations))
1030                goto out_fput;
1031        info = MQUEUE_I(inode);
1032
1033        ret = 0;
1034        spin_lock(&info->lock);
1035        if (u_notification == NULL) {
1036                if (info->notify_owner == current->tgid) {
1037                        remove_notification(info);
1038                        inode->i_atime = inode->i_ctime = CURRENT_TIME;
1039                }
1040        } else if (info->notify_owner != 0) {
1041                ret = -EBUSY;
1042        } else {
1043                switch (notification.sigev_notify) {
1044                case SIGEV_NONE:
1045                        info->notify.sigev_notify = SIGEV_NONE;
1046                        break;
1047                case SIGEV_THREAD:
1048                        info->notify_sock = sock;
1049                        info->notify_cookie = nc;
1050                        sock = NULL;
1051                        nc = NULL;
1052                        info->notify.sigev_notify = SIGEV_THREAD;
1053                        break;
1054                case SIGEV_SIGNAL:
1055                        info->notify.sigev_signo = notification.sigev_signo;
1056                        info->notify.sigev_value = notification.sigev_value;
1057                        info->notify.sigev_notify = SIGEV_SIGNAL;
1058                        break;
1059                }
1060                info->notify_owner = current->tgid;
1061                inode->i_atime = inode->i_ctime = CURRENT_TIME;
1062        }
1063        spin_unlock(&info->lock);
1064out_fput:
1065        fput(filp);
1066out:
1067        if (sock) {
1068                netlink_detachskb(sock, nc);
1069        } else if (nc) {
1070                dev_kfree_skb(nc);
1071        }
1072        return ret;
1073}
1074
1075asmlinkage long sys_mq_getsetattr(mqd_t mqdes,
1076                        const struct mq_attr __user *u_mqstat,
1077                        struct mq_attr __user *u_omqstat)
1078{
1079        int ret;
1080        struct mq_attr mqstat, omqstat;
1081        struct file *filp;
1082        struct inode *inode;
1083        struct mqueue_inode_info *info;
1084
1085        if (u_mqstat != NULL) {
1086                if (copy_from_user(&mqstat, u_mqstat, sizeof(struct mq_attr)))
1087                        return -EFAULT;
1088                if (mqstat.mq_flags & (~O_NONBLOCK))
1089                        return -EINVAL;
1090        }
1091
1092        ret = -EBADF;
1093        filp = fget(mqdes);
1094        if (!filp)
1095                goto out;
1096
1097        inode = filp->f_dentry->d_inode;
1098        if (unlikely(filp->f_op != &mqueue_file_operations))
1099                goto out_fput;
1100        info = MQUEUE_I(inode);
1101
1102        spin_lock(&info->lock);
1103
1104        omqstat = info->attr;
1105        omqstat.mq_flags = filp->f_flags & O_NONBLOCK;
1106        if (u_mqstat) {
1107                if (mqstat.mq_flags & O_NONBLOCK)
1108                        filp->f_flags |= O_NONBLOCK;
1109                else
1110                        filp->f_flags &= ~O_NONBLOCK;
1111
1112                inode->i_atime = inode->i_ctime = CURRENT_TIME;
1113        }
1114
1115        spin_unlock(&info->lock);
1116
1117        ret = 0;
1118        if (u_omqstat != NULL && copy_to_user(u_omqstat, &omqstat,
1119                                                sizeof(struct mq_attr)))
1120                ret = -EFAULT;
1121
1122out_fput:
1123        fput(filp);
1124out:
1125        return ret;
1126}
1127
1128static struct inode_operations mqueue_dir_inode_operations = {
1129        .lookup = simple_lookup,
1130        .create = mqueue_create,
1131        .unlink = mqueue_unlink,
1132};
1133
1134static struct file_operations mqueue_file_operations = {
1135        .flush = mqueue_flush_file,
1136        .poll = mqueue_poll_file,
1137        .read = mqueue_read_file,
1138};
1139
1140static struct super_operations mqueue_super_ops = {
1141        .alloc_inode = mqueue_alloc_inode,
1142        .destroy_inode = mqueue_destroy_inode,
1143        .statfs = simple_statfs,
1144        .delete_inode = mqueue_delete_inode,
1145        .drop_inode = generic_delete_inode,
1146};
1147
1148static struct file_system_type mqueue_fs_type = {
1149        .name = "mqueue",
1150        .get_sb = mqueue_get_sb,
1151        .kill_sb = kill_litter_super,
1152};
1153
1154static int msg_max_limit_min = DFLT_MSGMAX;
1155static int msg_max_limit_max = HARD_MSGMAX;
1156
1157static int msg_maxsize_limit_min = DFLT_MSGSIZEMAX;
1158static int msg_maxsize_limit_max = INT_MAX;
1159
1160static ctl_table mq_sysctls[] = {
1161        {
1162                .ctl_name       = CTL_QUEUESMAX,
1163                .procname       = "queues_max",
1164                .data           = &queues_max,
1165                .maxlen         = sizeof(int),
1166                .mode           = 0644,
1167                .proc_handler   = &proc_dointvec,
1168        },
1169        {
1170                .ctl_name       = CTL_MSGMAX,
1171                .procname       = "msg_max",
1172                .data           = &msg_max,
1173                .maxlen         = sizeof(int),
1174                .mode           = 0644,
1175                .proc_handler   = &proc_dointvec_minmax,
1176                .extra1         = &msg_max_limit_min,
1177                .extra2         = &msg_max_limit_max,
1178        },
1179        {
1180                .ctl_name       = CTL_MSGSIZEMAX,
1181                .procname       = "msgsize_max",
1182                .data           = &msgsize_max,
1183                .maxlen         = sizeof(int),
1184                .mode           = 0644,
1185                .proc_handler   = &proc_dointvec_minmax,
1186                .extra1         = &msg_maxsize_limit_min,
1187                .extra2         = &msg_maxsize_limit_max,
1188        },
1189        { .ctl_name = 0 }
1190};
1191
1192static ctl_table mq_sysctl_dir[] = {
1193        {
1194                .ctl_name       = FS_MQUEUE,
1195                .procname       = "mqueue",
1196                .mode           = 0555,
1197                .child          = mq_sysctls,
1198        },
1199        { .ctl_name = 0 }
1200};
1201
1202static ctl_table mq_sysctl_root[] = {
1203        {
1204                .ctl_name       = CTL_FS,
1205                .procname       = "fs",
1206                .mode           = 0555,
1207                .child          = mq_sysctl_dir,
1208        },
1209        { .ctl_name = 0 }
1210};
1211
1212static int __init init_mqueue_fs(void)
1213{
1214        int error;
1215
1216        mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache",
1217                                sizeof(struct mqueue_inode_info), 0,
1218                                SLAB_HWCACHE_ALIGN, init_once, NULL);
1219        if (mqueue_inode_cachep == NULL)
1220                return -ENOMEM;
1221
1222        /* ignore failues - they are not fatal */
1223        mq_sysctl_table = register_sysctl_table(mq_sysctl_root, 0);
1224
1225        error = register_filesystem(&mqueue_fs_type);
1226        if (error)
1227                goto out_sysctl;
1228
1229        if (IS_ERR(mqueue_mnt = kern_mount(&mqueue_fs_type))) {
1230                error = PTR_ERR(mqueue_mnt);
1231                goto out_filesystem;
1232        }
1233
1234        /* internal initialization - not common for vfs */
1235        queues_count = 0;
1236        spin_lock_init(&mq_lock);
1237
1238        return 0;
1239
1240out_filesystem:
1241        unregister_filesystem(&mqueue_fs_type);
1242out_sysctl:
1243        if (mq_sysctl_table)
1244                unregister_sysctl_table(mq_sysctl_table);
1245        if (kmem_cache_destroy(mqueue_inode_cachep)) {
1246                printk(KERN_INFO
1247                        "mqueue_inode_cache: not all structures were freed\n");
1248        }
1249        return error;
1250}
1251
1252__initcall(init_mqueue_fs);
1253
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.